I have created an elastic index with a user defined type, the entire configuration and sample data follows, trying to query all properties nested under episodes with a search phrase, but an exception occurs.
Can the community help?
PUT test
Mapping
PUT test/_mapping/mytype
{
"properties": {
"age": {
"type": "string"
},
"birthdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"deathdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"deceased": {
"type": "boolean"
},
"documentsignatureid": {
"type": "integer"
},
"episodes": {
"type": "nested",
"properties": {
"activities": {
"type": "nested",
"properties": {
"description": {
"type": "string"
},
"executiondate": {
"type": "date",
"format": "date_optional_time"
},
"performerspecialty": {
"type": "string",
"index": "not_analyzed"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
},
"administrations": {
"type": "nested",
"properties": {
"activeprincipal": {
"type": "string"
},
"administrationdate": {
"type": "date",
"format": "date_optional_time"
},
"comercialname": {
"type": "string"
},
"dose": {
"type": "double"
},
"frequency": {
"type": "integer"
},
"medicinecode": {
"type": "string",
"index": "not_analyzed"
}
}
},
"age": {
"type": "string"
},
"agedescription": {
"type": "string",
"index": "not_analyzed"
},
"ageindays": {
"type": "integer"
},
"backgrounds": {
"type": "nested",
"properties": {
"date": {
"type": "date",
"format": "date_optional_time"
},
"observation": {
"type": "string"
},
"subtype": {
"type": "string",
"index": "not_analyzed"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
},
"biometrics": {
"type": "nested",
"properties": {
"date": {
"type": "date",
"format": "date_optional_time"
},
"description": {
"type": "string"
},
"value": {
"type": "string",
"index": "not_analyzed"
}
}
},
"birthdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"clinicalnotes": {
"type": "nested",
"properties": {
"date": {
"type": "date",
"format": "date_optional_time"
},
"observation": {
"type": "string"
},
"specialty": {
"type": "string"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
},
"deathdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"deceased": {
"type": "boolean"
},
"diagnostics": {
"type": "nested",
"properties": {
"code": {
"type": "string",
"index": "not_analyzed"
},
"codification": {
"type": "string",
"index": "not_analyzed"
},
"description": {
"type": "string"
},
"enddate": {
"type": "date",
"format": "date_optional_time"
},
"startdate": {
"type": "date",
"format": "date_optional_time"
},
"state": {
"type": "string",
"index": "not_analyzed"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
},
"dietetics": {
"type": "nested",
"properties": {
"date": {
"type": "date",
"format": "date_optional_time"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
},
"episodeid": {
"type": "string",
"index": "not_analyzed"
},
"episodetype": {
"type": "string",
"index": "not_analyzed"
},
"examinationrequests": {
"type": "nested",
"properties": {
"anticonceptionmethod": {
"type": "string",
"index": "not_analyzed"
},
"cancellationreason": {
"type": "string"
},
"clinicalinformation": {
"type": "string"
},
"date": {
"type": "date",
"format": "yyyy-MM-dd"
},
"documentnumber": {
"type": "string"
},
"duration": {
"type": "string",
"index": "not_analyzed"
},
"examinformations": {
"type": "nested",
"properties": {
"admstate": {
"type": "string",
"index": "not_analyzed"
},
"anatomicalregion": {
"type": "string",
"index": "not_analyzed"
},
"arscode": {
"type": "string",
"index": "not_analyzed"
},
"blockexternal": {
"type": "integer"
},
"cancellationmotive": {
"type": "string",
"index": "not_analyzed"
},
"charge": {
"type": "string",
"index": "not_analyzed"
},
"code": {
"type": "string",
"index": "not_analyzed"
},
"documentnumber": {
"type": "string",
"index": "not_analyzed"
},
"executantmechanicalnumber": {
"type": "string",
"index": "not_analyzed"
},
"externalcode": {
"type": "string",
"index": "not_analyzed"
},
"externaldescription": {
"type": "string",
"index": "not_analyzed"
},
"externalexecutionmotive": {
"type": "string",
"index": "not_analyzed"
},
"face": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "integer"
},
"justification": {
"type": "string"
},
"laterality": {
"type": "string",
"index": "not_analyzed"
},
"name": {
"type": "integer"
},
"number": {
"type": "integer"
},
"observation": {
"type": "string"
},
"sextante": {
"type": "integer"
},
"state": {
"type": "string",
"index": "not_analyzed"
}
}
},
"executingservicecode": {
"type": "string"
},
"executingservicedescription": {
"type": "string"
},
"extrainformation": {
"type": "string"
},
"factor": {
"type": "string",
"index": "not_analyzed"
},
"frequency": {
"type": "string",
"index": "not_analyzed"
},
"harvestdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"lastmenstruationdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"menopause": {
"type": "boolean"
},
"nottransportable": {
"type": "string",
"index": "not_analyzed"
},
"number": {
"type": "string",
"index": "not_analyzed"
},
"observations": {
"type": "string"
},
"priority": {
"type": "string",
"index": "not_analyzed"
},
"requestdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"requesthour": {
"type": "date",
"format": "yyyy-MM-dd"
},
"requestingmedic": {
"type": "string"
},
"requestingservicecode": {
"type": "string",
"index": "not_analyzed"
},
"requestingservicedescription": {
"type": "string"
},
"sessions": {
"type": "integer"
},
"state": {
"type": "string",
"index": "not_analyzed"
}
}
},
"gender": {
"type": "string",
"index": "not_analyzed"
},
"internments": {
"type": "nested",
"properties": {
"internmentdate": {
"type": "date",
"format": "date_optional_time"
},
"number": {
"type": "string",
"index": "not_analyzed"
},
"realeasedate": {
"type": "date",
"format": "date_optional_time"
}
}
},
"maritalstatus": {
"type": "string",
"index": "not_analyzed"
},
"mothername": {
"type": "string"
},
"nacionality": {
"type": "string",
"index": "not_analyzed"
},
"name": {
"type": "string"
},
"patientid": {
"type": "string",
"index": "not_analyzed"
},
"patienttype": {
"type": "string",
"index": "not_analyzed"
},
"prescriptions": {
"type": "nested",
"properties": {
"activeprincipal": {
"type": "string"
},
"comercialname": {
"type": "string"
},
"dose": {
"type": "double"
},
"frequency": {
"type": "integer"
},
"medicinecode": {
"type": "string",
"index": "not_analyzed"
},
"prescriptiondate": {
"type": "date",
"format": "date_optional_time"
},
"scope": {
"type": "string",
"index": "not_analyzed"
}
}
},
"sns": {
"type": "string",
"index": "not_analyzed"
},
"title": {
"type": "string"
},
"uniqueid": {
"type": "string",
"index": "not_analyzed"
}
}
},
"gender": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "string",
"index": "not_analyzed"
},
"maritalstatus": {
"type": "string",
"index": "not_analyzed"
},
"mothername": {
"type": "string"
},
"nacionality": {
"type": "string",
"index": "not_analyzed"
},
"name": {
"type": "string"
},
"patientid": {
"type": "string",
"index": "not_analyzed"
},
"patienttype": {
"type": "string",
"index": "not_analyzed"
},
"sns": {
"type": "string",
"index": "not_analyzed"
},
"title": {
"type": "string"
},
"uniqueid": {
"type": "string",
"index": "not_analyzed"
}
}
}
Sample Documents :
PUT test/mytype/UNMDV%2F000000075
{
"documentsignatureid": 14706,
"episodes": [
{
"administrations": [],
"birthdate": "1956-06-07",
"deceased": false,
"gender": "F",
"patientid": "000000075",
"episodetype": "Consultas",
"clinicalnotes": [
{
"date": "2016-01-25T00:00:00",
"specialty": "Oncologia",
"observation": "Dores, etc",
"type": "NOTAS_PRIV"
},
{
"date": "2016-01-25T00:00:00",
"specialty": "Oncologia",
"observation": "Dores, etc",
"type": "EVOLUCAO"
}
],
"examinationrequests": [],
"episodeid": "242546",
"biometrics": [],
"agedescription": "59 Anos",
"diagnostics": [],
"dietetics": [],
"maritalstatus": "S",
"activities": [],
"backgrounds": [],
"name": "POLIANA PENHA DE JESUS",
"mothername": "SOFIA FIGUEIRA",
"internments": [],
"patienttype": "UNMDV",
"prescriptions": [],
"ageindays": 21835,
"uniqueid": "209445701",
"age": "59"
},
{
"administrations": [],
"birthdate": "1956-06-07",
"deceased": false,
"gender": "F",
"patientid": "000000075",
"episodetype": "Consultas",
"clinicalnotes": [],
"examinationrequests": [],
"episodeid": "242546",
"biometrics": [],
"agedescription": "59 Anos",
"diagnostics": [],
"dietetics": [],
"maritalstatus": "S",
"activities": [],
"backgrounds": [],
"name": "POLIANA PENHA DE JESUS",
"mothername": "SOFIA FIGUEIRA",
"internments": [],
"patienttype": "UNMDV",
"prescriptions": [],
"ageindays": 21835,
"uniqueid": "209445701",
"age": "59"
},
{
"administrations": [],
"birthdate": "1956-06-07",
"deceased": false,
"gender": "F",
"patientid": "000000075",
"episodetype": "Consultas",
"clinicalnotes": [],
"examinationrequests": [],
"episodeid": "242546",
"biometrics": [],
"agedescription": "59 Anos",
"diagnostics": [
{
"code": "A689",
"codification": "CID 10",
"description": "A68.9 - Febre recorrente NE",
"startdate": "2016-01-25T00:00:00",
"type": "DEF"
}
],
"dietetics": [],
"maritalstatus": "S",
"activities": [],
"backgrounds": [],
"name": "POLIANA PENHA DE JESUS",
"mothername": "SOFIA FIGUEIRA",
"internments": [],
"patienttype": "UNMDV",
"prescriptions": [],
"ageindays": 21835,
"uniqueid": "209445701",
"age": "59"
},
{
"administrations": [],
"birthdate": "1956-06-07",
"deceased": false,
"gender": "F",
"patientid": "000000075",
"episodetype": "Consultas",
"clinicalnotes": [
{
"date": "2016-01-25T00:00:00",
"specialty": "Oncologia",
"observation": "Dores, etc",
"type": "NOTAS_PRIV"
},
{
"date": "2016-01-25T00:00:00",
"specialty": "Oncologia",
"observation": "Dores, etc",
"type": "EVOLUCAO"
}
],
"examinationrequests": [],
"episodeid": "242546",
"biometrics": [],
"agedescription": "59 Anos",
"diagnostics": [],
"dietetics": [],
"maritalstatus": "S",
"activities": [],
"backgrounds": [],
"name": "POLIANA PENHA DE JESUS",
"mothername": "SOFIA FIGUEIRA",
"internments": [],
"patienttype": "UNMDV",
"prescriptions": [],
"ageindays": 21835,
"uniqueid": "209445701",
"age": "59"
}
],
"uniqueid": "209445701",
"patientid": "000000075",
"patienttype": "UNMDV",
"name": "POLIANA PENHA DE JESUS",
"gender": "F",
"birthdate": "1956-06-07",
"mothername": "SOFIA FIGUEIRA",
"maritalstatus": "S",
"deceased": false,
"age": "59"
}
PUT test/mytype/UNMDV%2F000000046
{
"documentsignatureid": 14711,
"episodes": [
{
"administrations": [],
"birthdate": "1970-12-12",
"deceased": false,
"gender": "F",
"patientid": "000000046",
"episodetype": "Consultas",
"clinicalnotes": [],
"examinationrequests": [],
"episodeid": "242557",
"biometrics": [],
"agedescription": "45 Anos",
"diagnostics": [],
"dietetics": [],
"maritalstatus": "D",
"activities": [],
"backgrounds": [],
"name": "JURACI IZABEL BADKE NEVES",
"mothername": "A",
"internments": [],
"patienttype": "UNMDV",
"prescriptions": [],
"ageindays": 16534,
"uniqueid": "209442701",
"age": "45"
}
],
"uniqueid": "209442701",
"patientid": "000000046",
"patienttype": "UNMDV",
"name": "JURACI IZABEL BADKE NEVES",
"gender": "F",
"birthdate": "1970-12-12",
"mothername": "A",
"maritalstatus": "D",
"deceased": false,
"age": "45"
}
Query :
POST test/mytype/_search
{
"from": 0,
"size": 20,
"highlight": {
"fields": {
"_all": {}
},
"require_field_match": false
},
"query": {
"nested": {
"path": "episodes",
"query": {
"multi_match": {
"query": "febre",
"operator": "and",
"fields": [
"episodes.*"
]
}
}
}
}
}
Result:
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "Invalid format: \"febre\""
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "test",
"node": "W0tautNcT22Z4LNnd5gDCg",
"reason": {
"type": "illegal_argument_exception",
"reason": "Invalid format: \"febre\""
}
}
]
},
"status": 400
}
Related
Im using ES 1.7, trying to look for documents using match_phrase to search using exact match string. The filter works fine when used alone, however when I combine the filters, I get an error.
example: people document
q=aaron&address=scarborough - searching a person by name and address, works fine.
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": {
"nested": {
"path": "addresses",
"query": {
"match_phrase": {
"address": "scarborough"
}
}
}
}
}
},
q=aaron&phone=813-689-6889 - searching a person by name and phone number works fine as well.
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": {
"query": {
"match_phrase": {
"phone": "813-689-6889"
}
}
}
}
}
However, When I try to use both filters, address and phone I get a No filter registered for [match_phrase] error
for example: q=aaron&address=scarborough&phone=813-689-6889
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": {
"nested": {
"path": "addresses",
"query": {
"match_phrase": {
"address": "scarborough"
}
}
},
"query": {
"match_phrase": {
"phone": "813-689-6889"
}
}
}
}
}
the error, when using address and phone filters together:
nested: QueryParsingException[[pl_people] No filter registered for [match_phrase]]; }]","status":400}):
index mapping (person):
{
"pl_people": {
"mappings": {
"person": {
"properties": {
"ac_name": {
"type": "string",
"analyzer": "autocomplete"
},
"addresses": {
"type": "nested",
"properties": {
"address": {
"type": "string"
},
"city": {
"type": "string",
"index": "not_analyzed"
},
"city_id": {
"type": "long"
},
"country": {
"type": "string",
"index": "not_analyzed"
},
"county": {
"type": "string",
"index": "not_analyzed"
},
"county_id": {
"type": "long"
},
"id": {
"type": "long"
},
"location": {
"type": "geo_point"
},
"parameterized": {
"type": "string"
},
"state": {
"type": "string",
"index": "not_analyzed"
},
"state_id": {
"type": "long"
},
"zip": {
"type": "string",
"index": "not_analyzed"
}
}
},
"author": {
"type": "string",
"index": "not_analyzed"
},
"body": {
"type": "string",
"analyzer": "remove_html",
"fields": {
"ns_body": {
"type": "string",
"analyzer": "remove_html_stopwords"
}
}
},
"charities": {
"type": "nested",
"properties": {
"email": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "long"
}
}
},
"community": {
"properties": {
"name": {
"type": "string",
"index": "not_analyzed"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"slug": {
"type": "string",
"index": "not_analyzed"
}
}
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"date_of_birth": {
"type": "date",
"format": "dateOptionalTime"
},
"delimiters": {
"type": "nested",
"properties": {
"delimiter_type": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "long"
}
}
},
"description": {
"type": "string"
},
"employments": {
"type": "nested",
"properties": {
"email": {
"type": "string",
"index": "not_analyzed"
},
"employment_status": {
"type": "string",
"index": "not_analyzed"
},
"foia_contact": {
"type": "boolean"
},
"id": {
"type": "long"
},
"phone": {
"type": "string",
"index": "not_analyzed"
},
"phone_extension": {
"type": "string",
"index": "not_analyzed"
}
}
},
"first_name": {
"type": "string",
"fields": {
"na_first_name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"last_name": {
"type": "string",
"fields": {
"na_last_name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"market": {
"properties": {
"name": {
"type": "string",
"index": "not_analyzed"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"slug": {
"type": "string",
"index": "not_analyzed"
}
}
},
"middle_name": {
"type": "string",
"fields": {
"na_middle_name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"most_recent_organization": {
"properties": {
"description": {
"type": "string"
},
"id": {
"type": "long"
},
"name": {
"type": "string"
},
"parameterized": {
"type": "string"
},
"phone": {
"type": "string"
}
}
},
"name": {
"type": "string",
"fields": {
"na_name": {
"type": "string",
"index": "not_analyzed"
},
"ngram_name": {
"type": "string",
"analyzer": "my_start"
},
"ns_name": {
"type": "string",
"analyzer": "no_stopwords"
}
}
},
"organizations": {
"properties": {
"name": {
"type": "string"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
}
}
},
"package": {
"properties": {
"name": {
"type": "string",
"index": "not_analyzed"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"slug": {
"type": "string",
"index": "not_analyzed"
}
}
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"phone": {
"type": "string"
},
"photo": {
"properties": {
"large": {
"type": "string"
},
"medium": {
"type": "string"
},
"teaser": {
"type": "string"
},
"thumb": {
"type": "string"
},
"url": {
"type": "string"
}
}
},
"projects": {
"properties": {
"id": {
"type": "long"
},
"name": {
"type": "string",
"index": "not_analyzed"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"slug": {
"type": "string",
"index": "not_analyzed"
}
}
},
"public_offices": {
"type": "nested",
"properties": {
"email": {
"type": "string",
"index": "not_analyzed"
},
"employment_status": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "long"
}
}
},
"published": {
"type": "string",
"index": "not_analyzed"
},
"region": {
"properties": {
"name": {
"type": "string",
"index": "not_analyzed"
},
"parameterized": {
"type": "string",
"index": "not_analyzed"
},
"slug": {
"type": "string",
"index": "not_analyzed"
}
}
},
"resource": {
"type": "string"
},
"short_description": {
"type": "string"
},
"show_path": {
"type": "string"
},
"time": {
"type": "date",
"format": "dateOptionalTime"
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
}
}
}
}
}
}
Document I am using to test
[
{
"_index": "pl_people",
"_type": "person",
"_id": "813106",
"_score": null,
"sort": [
-9223372036854775808
],
"resource": "Person",
"parameterized": "813106-aaron-mcguire",
"created_at": "2011-10-29T19:51:24.000-05:00",
"updated_at": "2014-12-11T07:21:08.000-06:00",
"name": "Aaron McGuire",
"title": null,
"photo": {
"url": "/assets/140x140.gif"
},
"description": null,
"short_description": null,
"time": "2014-12-11",
"show_path": "/people/813106-aaron-mcguire",
"published": true,
"aliases": [],
"phone": "813-689-6889",
"date_of_birth": "1991-03-01",
"first_name": "Aaron",
"middle_name": "",
"last_name": "McGuire",
"delimiters": [],
"market": null,
"region": null,
"most_recent_organization": null,
"households": [],
"court_cases": [],
"addresses": [
{
"id": 1,
"parameterized": "1",
"address": "123 Scarborough road",
"zip": "L5A2A9",
"city": "Ontario",
"country": "USA",
"state": "California",
"location": null,
"state_id": null,
"county_id": null,
"city_id": null
}
],
"projects": [],
"voter_ids": [],
"id": "813106"
}
]
Finally, I was able to reproduce the issue, Looks like "include_in_parent": true, missed in your mapping, due to which I was getting the error mentioned In my comment..
To fix, the issue I used the same mapping provided you but add "include_in_parent": true, at the top level of address nested field. For brevity providing only the address part of modified mapping.
"addresses": {
"type": "nested",
"include_in_parent": true, --> added only this in your mapping.
"properties": {
"address": {
"type": "string"
},
"city": {
"type": "string",
"index": "not_analyzed"
},
"city_id": {
"type": "long"
},
"country": {
"type": "string",
"index": "not_analyzed"
},
"county": {
"type": "string",
"index": "not_analyzed"
},
"county_id": {
"type": "long"
},
"id": {
"type": "long"
},
"location": {
"type": "geo_point"
},
"parameterized": {
"type": "string"
},
"state": {
"type": "string",
"index": "not_analyzed"
},
"state_id": {
"type": "long"
},
"zip": {
"type": "string",
"index": "not_analyzed"
}
}
}
After that indexed some sample docs, which have different values of address and name param, to verify the right outcome of my search query.
And final search query which includes both the filter, please note, I removed the nested part on address field as it's included at the parent document, so you can directly query now on it. This query looks much simple and now gives the expected result.
{
"query": {
"match_phrase": {
"name": "aaron"
}
},
"filter": {
"bool": {
"must": [{
"query": {
"match_phrase": {
"address": "scarborough"
}
}
},
{
"query": {
"match_phrase": {
"phone": "813-689-6889"
}
}
}]
}
}
}
Output on sample docs
"hits": [
{
"_index": "so-match-phrase",
"_type": "pl_people",
"_id": "4",
"_score": 0.19178301,
"_source": {
"resource": "Person",
"parameterized": "813106-aaron-mcguire",
"created_at": "2011-10-29T19:51:24.000-05:00",
"updated_at": "2014-12-11T07:21:08.000-06:00",
"name": "aaron McGuire",
"title": null,
"photo": {
"url": "/assets/140x140.gif"
},
"description": null,
"short_description": null,
"time": "2014-12-11",
"show_path": "/people/813106-aaron-mcguire",
"published": true,
"aliases": [],
"phone": "813-689-6889",
"date_of_birth": "1991-03-01",
"first_name": "Aaron",
"middle_name": "",
"last_name": "McGuire",
"delimiters": [],
"market": null,
"region": null,
"most_recent_organization": null,
"households": [],
"court_cases": [],
"addresses": [
{
"id": 1,
"parameterized": "1",
"address": "Scarborough road",
"zip": "L5A2A9",
"city": "Ontario",
"country": "USA",
"state": "California",
"location": null,
"state_id": null,
"county_id": null,
"city_id": null
}
],
"id": "813106"
}
},
{
"_index": "so-match-phrase",
"_type": "pl_people",
"_id": "1",
"_score": 0.19178301,
"_source": {
"resource": "Person",
"parameterized": "813106-aaron-mcguire",
"created_at": "2011-10-29T19:51:24.000-05:00",
"updated_at": "2014-12-11T07:21:08.000-06:00",
"name": "Aaron McGuire",
"title": null,
"photo": {
"url": "/assets/140x140.gif"
},
"description": null,
"short_description": null,
"time": "2014-12-11",
"show_path": "/people/813106-aaron-mcguire",
"published": true,
"aliases": [],
"phone": "813-689-6889",
"date_of_birth": "1991-03-01",
"first_name": "Aaron",
"middle_name": "",
"last_name": "McGuire",
"delimiters": [],
"market": null,
"region": null,
"most_recent_organization": null,
"households": [],
"court_cases": [],
"addresses": [
{
"id": 1,
"parameterized": "1",
"address": "123 Scarborough road",
"zip": "L5A2A9",
"city": "Ontario",
"country": "USA",
"state": "California",
"location": null,
"state_id": null,
"county_id": null,
"city_id": null
}
],
"id": "813106"
}
}
]
I'm trying to implement an auto-suggest control powered by an ES index. The index has multiple fields (Multi-language - Arabic and English) and I want to be able to search in all languages.
The easiest way to do that is NGram with the "_all" field, as long as some care is taken in the mapping definition. The issue we have now how to accomplish this using multi-language.
PS: We are looking to separate field for all the possible languages (Using one index).
I tried to use the nGram tokenizer and filter and it's working good for one language (English).
{
"template": "index_com",
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edgeNGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
}
}
}
},
"mappings": {
"product": {
"_all": {
"enabled": true,
"index_analyzer": "edgeNGram_analyzer",
"search_analyzer": "standard"
},
"properties": {
"id": {
"type": "string",
"index": "no",
"include_in_all": false
},
"uuid": {
"type": "string",
"index": "no",
"include_in_all": false
},
"name": {
"type": "string",
"include_in_all": true
},
"description": {
"type": "string",
"include_in_all": true
},
"brand": {
"type": "string",
"include_in_all": true
},
"made_id": {
"type": "string",
"include_in_all": true
},
"category": {
"type": "string",
"include_in_all": true
},
"category_id": {
"type": "integer",
"include_in_all": false
},
"keywords": {
"type": "string",
"include_in_all": true
},
"colors": {
"type": "string",
"index": "not_analyzed"
},
"colors_name": {
"type": "string",
"include_in_all": true
},
"quality": {
"type": "string",
"index": "not_analyzed"
},
"vendor_name": {
"type": "string",
"include_in_all": false
},
"vendor_location" : {
"type" : "geo_point",
"include_in_all": false
},
"price": {
"type": "double",
"include_in_all": false
},
"price_before_discount": {
"type": "double",
"include_in_all": false
},
"is_deal": {
"type": "integer",
"include_in_all": false
},
"is_best_seller": {
"type": "integer",
"include_in_all": false
},
"views": {
"type": "integer",
"include_in_all": false
},
"rating": {
"type": "integer",
"include_in_all": false
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"image_link": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
Arabic analyzer:
{
"settings": {
"analysis": {
"filter": {
"arabic_stop": {
"type": "stop",
"stopwords": "_arabic_"
},
"arabic_keywords": {
"type": "keyword_marker",
"keywords": []
},
"arabic_stemmer": {
"type": "stemmer",
"language": "arabic"
}
},
"analyzer": {
"arabic": {
"tokenizer": "standard",
"filter": [
"lowercase",
"arabic_stop",
"arabic_normalization",
"arabic_keywords",
"arabic_stemmer"
]
}
}
}
}
}
can someone suggest any solution? Thanks!
Your second snippet defines the arabic analyzer, which is already available so you shouldn't need to add it.
What you are missing is to tell elasticsearch to also use the arabic analyzer. So you want to analyze each field twice, in english and arabic. To do that, add
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
to all your fields that have "include_in_all": true. That makes your mappings look like this:
{
"template": "index_com",
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edgeNGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
}
}
}
},
"mappings": {
"product": {
"_all": {
"enabled": true,
"index_analyzer": "edgeNGram_analyzer",
"search_analyzer": "standard"
},
"properties": {
"id": {
"type": "string",
"index": "no",
"include_in_all": false
},
"uuid": {
"type": "string",
"index": "no",
"include_in_all": false
},
"name": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"description": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"brand": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"made_id": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"category": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"category_id": {
"type": "integer",
"include_in_all": false
},
"keywords": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"colors": {
"type": "string",
"index": "not_analyzed"
},
"colors_name": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"quality": {
"type": "string",
"index": "not_analyzed"
},
"vendor_name": {
"type": "string",
"include_in_all": false
},
"vendor_location": {
"type": "geo_point",
"include_in_all": false
},
"price": {
"type": "double",
"include_in_all": false
},
"price_before_discount": {
"type": "double",
"include_in_all": false
},
"is_deal": {
"type": "integer",
"include_in_all": false
},
"is_best_seller": {
"type": "integer",
"include_in_all": false
},
"views": {
"type": "integer",
"include_in_all": false
},
"rating": {
"type": "integer",
"include_in_all": false
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"image_link": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
I am trying to index following data to elasticsearch,
{
"_id": "5619578c1983757a72efef15",
"aseg": {},
"cs": {
"source": "None",
"ss": "In Transit",
"sr": "Weight Captured",
"act": "+B",
"pid": "BAG21678106",
"st": "UD",
"dest": "Bharatpur_DC (Rajasthan)",
"u": "J",
"sl": "Jaipur_Hub (Rajasthan)",
"ud": "2015-10-12T14:59:44.270000",
"sd": "2015-10-12T14:59:44.270000"
},
"nsl": [
{
"dt": [
2015,
10,
10
],
"code": "X-PPONM"
},
{
"dt": [
2015,
10,
11
],
"code": "X-UCI"
},
]
}
but in return i am getting this error
MapperParsingException[failed to parse [cs.nsl]]; nested: ElasticsearchIllegalArgumentException[unknown property [dt]];
I checked the mapping, mapping is correct, nsl nested inside cs dict has a different mapping than nsl at root level.
"cs": {
"properties": {
"act": {
"type": "string"
},
"add": {
"type": "string"
},
"asr": {
"type": "string"
},
"bucket": {
"type": "string"
},
"dest": {
"type": "string",
"index": "not_analyzed"
},
"dwbn": {
"type": "string"
},
"lcld": {
"type": "string"
},
"lat": {
"type": "string"
},
"lon": {
"type": "string"
},
"loc": {
"type": "double"
},
"nsl": {
"type": "string",
"index": "not_analyzed"
},
"ntd": {
"type": "date",
"format": "dateOptionalTime"
},
"pbs": {
"type": "string"
},
"pid": {
"type": "string"
},
"pupid": {
"type": "string"
},
"sd": {
"type": "date",
"format": "dateOptionalTime"
},
"sl": {
"type": "string",
"index": "not_analyzed"
},
"source": {
"properties": {
"source": {
"type": "string"
},
"source_id": {
"type": "string"
},
"source_type": {
"type": "string"
}
}
},
"sr": {
"type": "string"
},
"ss": {
"type": "string",
"index": "not_analyzed"
},
"st": {
"type": "string"
},
"u": {
"type": "string",
"index": "not_analyzed"
},
"ud": {
"type": "date",
"format": "dateOptionalTime"
},
"vh": {
"type": "string"
}
}
},
and for nsl at root level mapping is as follow
"nsl": {
"properties" : {
"code" : {
"type" : "string",
"index": "not_analyzed"
},
"dt" : {
"type" : "string",
"index": "not_analyzed"
}
}
},
this is happening for only a few records, rest all are syncing fine.
there isn't any changes in payload.
Futher nsl is a sparse key inside cs.
In your mapping nsl is as follows -
"nsl": {
"type": "string",
"index": "not_analyzed"
},
As per mapping , Elasticsearch is expecting a concrete string value to the nsl field but its a object array in the document you have provided.
Elasticsearch once it has a mapping , its definite. You cant insert an object data into a string field.
I tried your document without pre-setting any mapping as follows:
{
"aseg": {},
"cs": {
"source": "None",
"ss": "In Transit",
"sr": "Weight Captured",
"act": "+B",
"pid": "BAG21678106",
"st": "UD",
"dest": "Bharatpur_DC (Rajasthan)",
"u": "J",
"nsl":"foo",
"sl": "Jaipur_Hub (Rajasthan)",
"ud": "2015-10-12T14:59:44.270000",
"sd": "2015-10-12T14:59:44.270000"
},
"nsl": [
{
"dt": [
2015,
10,
10
],
"code": "X-PPONM"
},
{
"dt": [
2015,
10,
11
],
"code": "X-UCI"
}
]
}
And the ES created the mapping as follows:
"nsl": {
"properties": {
"dt": {
"type": "long"
},
"code": {
"type": "string"
}
}
}
As you can see ES put the "dt" type as "long" which is the internal representation of a date type. So, may be need to change that type?
Also, without seeing the successful document it is difficult to guess but I believe those documents do not have the "dt" field value.
Of course, you are free to put "not_analyzed" as you see fit for any field.
I created my index and mapping, but appears that the mapping for nested documents is changed after I start adding documents to the index.
Before I start adding documents the mapping is:
{"products_1_1": {
"mappings": {
"product": {
"properties": {
"description": {
"type": "string"
},
"metaDescription": {
"type": "string"
},
"metaTitle": {
"type": "string"
},
"mis_spells": {
"type": "string"
},
"name": {
"type": "string"
},
"productId": {
"type": "integer"
},
"categories": {
"type": "nested",
"include_in_parent": true,
"properties": {
"default_category": {
"type": "integer",
"index": "no"
},
"filter_name": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "integer"
},
"name": {
"type": "string"
},
"parent_categories_ids": {
"type": "string",
"index": "no"
},
"parent_categories_names": {
"type": "string",
"index": "no"
},
"parent_categories_url": {
"type": "string",
"index": "no"
},
"parent_category": {
"type": "integer",
"index": "no"
},
"tags": {
"type": "string"
},
"url": {
"type": "string",
"index": "no"
}
}
}
}
}
}
}
}
After I start inserting documents the mapping becomes:
{"products_1_1": {
"mappings": {
"product": {
"properties": {
"description": {
"type": "string"
},
"metaDescription": {
"type": "string"
},
"metaTitle": {
"type": "string"
},
"mis_spells": {
"type": "string"
},
"name": {
"type": "string"
},
"productId": {
"type": "integer"
},
"categories": {
"type": "nested",
"include_in_parent": true,
"properties": {
"0": {
"properties": {
"default_category": {
"type": "long"
},
"filter_name": {
"type": "string"
},
"id": {
"type": "long"
},
"name": {
"type": "string"
},
"parent_categories_ids": {
"type": "string"
},
"parent_categories_names": {
"type": "string"
},
"parent_categories_url": {
"type": "string"
},
"parent_category": {
"type": "long"
},
"tags": {
"type": "string"
},
"url": {
"type": "string"
}
}
},
"1": {
"properties": {
"default_category": {
"type": "long"
},
"filter_name": {
"type": "string"
},
"id": {
"type": "long"
},
"name": {
"type": "string"
},
"parent_categories_ids": {
"type": "string"
},
"parent_categories_names": {
"type": "string"
},
"parent_categories_url": {
"type": "string"
},
"parent_category": {
"type": "long"
},
"tags": {
"type": "string"
},
"url": {
"type": "string"
}
}
},
"2": {
"properties": {
"default_category": {
"type": "long"
},
"filter_name": {
"type": "string"
},
"id": {
"type": "long"
},
"name": {
"type": "string"
},
"parent_categories_ids": {
"type": "string"
},
"parent_categories_names": {
"type": "string"
},
"parent_categories_url": {
"type": "string"
},
"parent_category": {
"type": "long"
},
"tags": {
"type": "string"
},
"url": {
"type": "string"
}
}
},
...
"default_category": {
"type": "integer",
"index": "no"
},
"filter_name": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "integer"
},
"name": {
"type": "string"
},
"parent_categories_ids": {
"type": "string",
"index": "no"
},
"parent_categories_names": {
"type": "string",
"index": "no"
},
"parent_categories_url": {
"type": "string",
"index": "no"
},
"parent_category": {
"type": "integer",
"index": "no"
},
"tags": {
"type": "string"
},
"url": {
"type": "string",
"index": "no"
}
}
}
}
}
}
}
}
Does anybody have any idea why my mapping will be altered this way?
Thanks!
I have the following mapping on my index in elasticsearch.
{
"mail": {
"properties": {
"project": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"mailbox": {
"type": "string",
"index": "not_analyzed",
"null_value": "#na",
"store" : "yes"
},
"path": {
"type": "string",
"index": "not_analyzed",
"null_value": "#na",
"store" : "yes"
},
"messageid": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"nodeid":
{
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false",
"store" : "yes"
},
"replyto": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"references": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"subject": {
"boost": "3.0",
"type": "string",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"from": {
"type": "nested",
"properties": {
"name": {
"type" : "multi_field",
"fields" : {
"name" : {"type" : "string", "analyzer" : "standard", "index" : "analyzed"},
"untouched" : {"type" : "string", "index" : "not_analyzed"}
}
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"to": {
"type": "nested",
"properties":{
"name": {
"type": "string",
"analyzer":"analyzer_keyword"
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"cc": {
"type": "nested",
"properties":{
"name": {
"type": "string",
"analyzer":"analyzer_keyword"
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"bcc": {
"type": "nested",
"properties":{
"name": {
"type": "string",
"analyzer":"analyzer_keyword"
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"message_snippet": {
"type": "string",
"index": "no",
"include_in_all": "false"
},
"text_messages": {
"type": "string",
"store": "yes",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"html_messages": {
"type": "string",
"store": "yes",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"message_attachments": {
"dynamic": "true",
"properties":{
"filename":{
"type": "string",
"store": "yes"
},
"content":{
"type": "string",
"store": "yes",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"hash":{
"type": "string",
"store": "yes",
"analyzer": "analyzer_keyword"
},
"nodeid":{
"type": "string"
}
}
},
"date": {
"type": "date"
},
"entities": {
"type": "nested",
"properties": {
"name": {
"type": "string",
"analyzer": "analyzer_keyword"
},
"type": {
"type": "string",
"analyzer": "analyzer_keyword"
},
"nodeid":{
"type": "string"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
}
}
}
}
And I try searching on the mail.from.name field with the following query, but I doesn't give me any results.
{
"query": {
"nested": {
"path": "from",
"query": {
"term": {
"name": "mark"
}
}
}
}
}
What is wrong about my mapping or query?
A sample document looks like this
{
"project": "test",
"mailbox": "test.pst",
"messageid": "5e667f7f-4421-4836-91f3-8b5216c04839",
"nodeid": "671",
"subject": "No Subject",
"from": [
{
"name": "Mike Johnson",
"address": "mike#gmail.com",
"nodeid": "3",
"facet": "Mike Johnson"
}
],
"to": [
{
"name": "John Doe",
"address": "JDoe#gmail.com",
"nodeid": "367",
"facet": "John Doe"
}
],
"cc": [],
"bcc": [],
"textbody": "this is a test email with no further lines of text",
"htmlbody": "",
"snippet": "",
"transmitted": "",
"replyto": "",
"references": "",
"attachments": [],
"entities": [
{
"name": "google",
"type": "organization",
"nodeid": "656",
"facet": "google"
}
],
"domains": [
"google.com"
],
"path": ""
}
You need to address the nested objects name in your query again
{
"query": {
"nested": {
"path": "from",
"query": {
"term": {
"from.name": "mike"
}
}
}
}
}