Related
I am trying to create an index with a custom default analyzer.
I already checked the following questions:
Analyzer not found exception while creating an index with mapping and settings
How to specify an analyzer while creating an index in ElasticSearch
mapper_parsing_exception for a custom analyzer while creating index in elasticsearch?
but they didn't solve the issue.
Here is my schema:
put /emails
{
"mappings": {
"email": {
"analyzer": "lkw",
"properties": {
"createdOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"data": {
"type": "object",
"dynamic": "true"
},
"from": {
"type": "string",
"store": true
},
"id": {
"type": "string",
"store": true
},
"sentOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"sesId": {
"type": "string",
"store": true
},
"subject": {
"type": "string",
"store": true,
"analyzer": "standard"
},
"templates": {
"properties": {
"html": {
"type": "string",
"store": true
},
"plainText": {
"type": "string",
"store": true
}
}
},
"to": {
"type": "string",
"store": true
},
"type": {
"type": "string",
"store": true
}
}
},
"event": {
"_parent": {
"type": "email"
},
"analyzer": "lkw",
"properties": {
"id": {
"type": "string",
"store": true
},
"origin": {
"type": "string",
"store": true
},
"time": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"type": {
"type": "string",
"store": true
},
"userAgent": {
"type": "string",
"store": true
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"lkw": {
"tokenizer": "keyword",
"filter": [
"lowercase"
],
"type": "custom"
}
}
}
}
}
When I execute the command above, I get this error:
{
"error": {
"root_cause": [
{
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [analyzer : lkw]"
}
],
"type": "mapper_parsing_exception",
"reason": "Failed to parse mapping [event]: Root mapping definition has unsupported parameters: [analyzer : lkw]",
"caused_by": {
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [analyzer : lkw]"
}
},
"status": 400
}
Since you have only a few string fields, I suggest you simply specify your lkw analyzer where you need it, just like you did for the standard one:
PUT /emails
{
"mappings": {
"email": {
"properties": {
"createdOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"data": {
"type": "object",
"dynamic": "true"
},
"from": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"id": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"sentOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"sesId": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"subject": {
"type": "string",
"store": true,
"analyzer": "standard"
},
"templates": {
"properties": {
"html": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"plainText": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
},
"to": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"type": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
},
"event": {
"_parent": {
"type": "email"
},
"properties": {
"id": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"origin": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"time": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"type": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"userAgent": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"lkw": {
"tokenizer": "keyword",
"filter": [
"lowercase"
],
"type": "custom"
}
}
}
}
}
Hi I am having a problem, we are building a help system where user can search the help content. I have the following mapping:
{
"com.mycompany.content": {
"mappings": {
"pageMasterContent": {
"properties": {
"contentType": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": true
},
"customer": {
"type": "nested",
"properties": {
"class": {
"type": "string",
"index": "no",
"include_in_all": false
},
"customerName": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": true
},
"id": {
"type": "long",
"include_in_all": false
},
"ref": {
"type": "string",
"index": "no",
"include_in_all": false
}
}
},
"language": {
"type": "nested",
"properties": {
"class": {
"type": "string",
"index": "no",
"include_in_all": false
},
"id": {
"type": "long",
"include_in_all": false
},
"langName": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": false
},
"ref": {
"type": "string",
"index": "no",
"include_in_all": false
}
}
},
"pageContent": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": true
},
"pageMaster": {
"type": "nested",
"properties": {
"class": {
"type": "string",
"index": "no",
"include_in_all": false
},
"id": {
"type": "long",
"include_in_all": false
},
"pageCode": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": true
},
"pageSinceVersion": {
"properties": {
"class": {
"type": "string"
},
"id": {
"type": "long"
},
"versionNumber": {
"type": "string"
},
"versionOrder": {
"type": "long"
}
}
},
"pageUptoVersion": {
"properties": {
"class": {
"type": "string"
},
"id": {
"type": "long"
},
"versionNumber": {
"type": "string"
},
"versionOrder": {
"type": "long"
}
}
},
"ref": {
"type": "string",
"index": "no",
"include_in_all": false
}
}
},
"pageVersion": {
"type": "nested",
"properties": {
"class": {
"type": "string",
"index": "no",
"include_in_all": false
},
"id": {
"type": "long",
"include_in_all": false
},
"ref": {
"type": "string",
"index": "no",
"include_in_all": false
},
"versionNumber": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": true
},
"versionOrder": {
"type": "integer",
"include_in_all": false
}
}
}
}
}
}
}
}
I want to filter my data with following parameters:
language, pageSinceVersion and pageUptoVersion where, if a user searches for any content it should show only for that language and for >pageSinceVersion and <pageUptoVersion content.
Thanks in advance.
I'm developping a search engine for my client which has to use synonym expansion. I can properly setup my index with a synonym token filter and a custom file (synonym.txt).
Example: ipod, i-pod, i pod
However, whenever we want a synonym expansion, I get the synonyms from Elasticsearch and display them as tags on the website.
Each tag can be unselected. In this case, how can we specify while querying to Elasticsearch, to use a different set of synonyms, not coming from the synonym.txt file ?
Example: if the user is looking for the term ipod, then I will show these two tags: i-pod, i pod. But if the user chooses to unselect "i-pod", I would like to be able to specify that only "i pod" is a synonym of "ipod" while querying.
My index settings are :
{
"settings": {
"analysis": {
"filter": {
"elision": {
"type": "elision",
"articles": ["l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"]
},
"french_stop": {
"type": "stop",
"stopwords": "_french_"
},
"french_stemmer": {
"type": "stemmer",
"language": "light_french"
},
"synonymsFilter": {
"type" : "synonym",
"synonyms_path" : "analysis/synonym.txt"
},
"autocompleteFilter": {
"max_shingle_size": "5",
"min_shingle_size": "2",
"type": "shingle"
}
},
"analyzer": {
"default": {
"tokenizer": "letter",
"filter": ["asciifolding", "lowercase", "french_stemmer", "elision", "french_stop"]
},
"auto-complete-suggester": {
"filter": [
"lowercase",
"autocompleteFilter"
],
"char_filter": [
"html_strip"
],
"type": "custom",
"tokenizer": "standard"
},
"did-you-mean-suggester": {
"tokenizer": "standard",
"filter": ["asciifolding", "lowercase"]
},
"synonym_analyzer" : {
"tokenizer" : "whitespace",
"filter" : ["synonymsFilter"]
},
"synonym_analyzer2": {
"tokenizer": "standard",
"filter": ["asciifolding", "lowercase", "french_stop", "autocompleteFilter"]
}
}
}
},
"mappings": {
"companies": {
"date_detection": "false",
"properties": {
"auto_complete": {
"type": "string",
"analyzer": "auto-complete-suggester",
"term_vector" : "yes"
},
"did_you_mean": {
"type": "string",
"analyzer": "did-you-mean-suggester",
"term_vector" : "yes"
},
"synonyms": {
"type": "string",
"analyzer": "synonym_analyzer",
"term_vector" : "yes"
},
"company_name": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"siren": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"CPposteEntreprise": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"commercial_company_name": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"year_creation_company": {
"type": "long"
},
"month_creation_company": {
"type": "long"
},
"month_year_creation_company": {
"type": "date",
"format": "yyyyMM",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"city_company": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"departement_company": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"region_company": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"is_excellence": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"interlocuteurs": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"flag_entreprise_finance": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"flag_indirect": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"flag_direct": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"flag_investissement": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"montant_total_investissement": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"motant_total_finance": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"nombre_investissement": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"nombre_financement_accorde": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caFiltre": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"effectif": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"textRank": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
},
"term_vector" : "yes",
"copy_to": [
"synonyms"
]
},
"masterKeywords": {
"type": "nested",
"properties": {
"keyword": {
"type":"string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
}
}
},
"dossiers":{
"type": "nested",
"date_detection": "false",
"properties": {
"dossierCommercial": {
"type": "long"
},
"sousDossierCommercial": {
"type": "long"
},
"historiqueProduitBPI": {
"type": "string"
},
"statutSousDossier": {
"type": "string"
},
"dateDecision": {
"type": "date",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" },
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"nomChargesAffaires": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"contactChargesAffaires": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"montantAide": {
"type": "double",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" },
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"contentValidation": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"contentDecision": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"contentDirectionEngagements": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"metaDomain": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"sousSecteur": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"keywords": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"descriptionProjet": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
}
}
},
"investissements": {
"type": "nested",
"date_detection": "false",
"properties": {
"flag_indirect": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"nom_societe_gestion_svi":{
"type": "string"
},
"date_entree_investissement":{
"type": "date",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"montant_investissement_df":{
"type": "double"
},
"description_projet_investissement":{
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
}
}
},
"bilans":{
"type": "nested",
"date_detection": "false",
"properties": {
"bilanAnneeN": {
"properties": {
"effectif": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"capital": {
"type": "double"
},
"resultatNet": {
"type": "double"
},
"clotureDate": {
"type": "date"
},
"annee": {
"type": "long"
},
"ebeMoyen": {
"type": "double"
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
}
}
},
"bilanAnneeN1": {
"properties": {
"effectif": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"capital": {
"type": "double"
},
"resultatNet": {
"type": "double"
},
"clotureDate": {
"type": "date"
},
"annee": {
"type": "long"
},
"ebeMoyen": {
"type": "double"
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
}
}
},
"bilanAnneeN2": {
"properties": {
"effectif": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"capital": {
"type": "double"
},
"resultatNet": {
"type": "double"
},
"clotureDate": {
"type": "date"
},
"annee": {
"type": "long"
},
"ebeMoyen": {
"type": "double"
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
}
}
}
}
},
"news": {
"type": "nested",
"date_detection": "false",
"properties": {
"date": {
"type": "date",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" },
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"description": {
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"title": {
"type": "string"
},
"content": {
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"url": {
"type": "string"
},
"tags": {
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"links": {
"type": "string"
},
"external_source": {
"type": "string"
}
}
}
}
}
}
}
For now, I am using a master field called "synonyms". Is this a good idea ?
Thanks in advance for your help.
I have created an elastic index with a user defined type, the entire configuration and sample data follows, trying to query all properties nested under episodes with a search phrase, but an exception occurs.
Can the community help?
PUT test
Mapping
PUT test/_mapping/mytype
{
"properties": {
"age": {
"type": "string"
},
"birthdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"deathdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"deceased": {
"type": "boolean"
},
"documentsignatureid": {
"type": "integer"
},
"episodes": {
"type": "nested",
"properties": {
"activities": {
"type": "nested",
"properties": {
"description": {
"type": "string"
},
"executiondate": {
"type": "date",
"format": "date_optional_time"
},
"performerspecialty": {
"type": "string",
"index": "not_analyzed"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
},
"administrations": {
"type": "nested",
"properties": {
"activeprincipal": {
"type": "string"
},
"administrationdate": {
"type": "date",
"format": "date_optional_time"
},
"comercialname": {
"type": "string"
},
"dose": {
"type": "double"
},
"frequency": {
"type": "integer"
},
"medicinecode": {
"type": "string",
"index": "not_analyzed"
}
}
},
"age": {
"type": "string"
},
"agedescription": {
"type": "string",
"index": "not_analyzed"
},
"ageindays": {
"type": "integer"
},
"backgrounds": {
"type": "nested",
"properties": {
"date": {
"type": "date",
"format": "date_optional_time"
},
"observation": {
"type": "string"
},
"subtype": {
"type": "string",
"index": "not_analyzed"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
},
"biometrics": {
"type": "nested",
"properties": {
"date": {
"type": "date",
"format": "date_optional_time"
},
"description": {
"type": "string"
},
"value": {
"type": "string",
"index": "not_analyzed"
}
}
},
"birthdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"clinicalnotes": {
"type": "nested",
"properties": {
"date": {
"type": "date",
"format": "date_optional_time"
},
"observation": {
"type": "string"
},
"specialty": {
"type": "string"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
},
"deathdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"deceased": {
"type": "boolean"
},
"diagnostics": {
"type": "nested",
"properties": {
"code": {
"type": "string",
"index": "not_analyzed"
},
"codification": {
"type": "string",
"index": "not_analyzed"
},
"description": {
"type": "string"
},
"enddate": {
"type": "date",
"format": "date_optional_time"
},
"startdate": {
"type": "date",
"format": "date_optional_time"
},
"state": {
"type": "string",
"index": "not_analyzed"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
},
"dietetics": {
"type": "nested",
"properties": {
"date": {
"type": "date",
"format": "date_optional_time"
},
"type": {
"type": "string",
"index": "not_analyzed"
}
}
},
"episodeid": {
"type": "string",
"index": "not_analyzed"
},
"episodetype": {
"type": "string",
"index": "not_analyzed"
},
"examinationrequests": {
"type": "nested",
"properties": {
"anticonceptionmethod": {
"type": "string",
"index": "not_analyzed"
},
"cancellationreason": {
"type": "string"
},
"clinicalinformation": {
"type": "string"
},
"date": {
"type": "date",
"format": "yyyy-MM-dd"
},
"documentnumber": {
"type": "string"
},
"duration": {
"type": "string",
"index": "not_analyzed"
},
"examinformations": {
"type": "nested",
"properties": {
"admstate": {
"type": "string",
"index": "not_analyzed"
},
"anatomicalregion": {
"type": "string",
"index": "not_analyzed"
},
"arscode": {
"type": "string",
"index": "not_analyzed"
},
"blockexternal": {
"type": "integer"
},
"cancellationmotive": {
"type": "string",
"index": "not_analyzed"
},
"charge": {
"type": "string",
"index": "not_analyzed"
},
"code": {
"type": "string",
"index": "not_analyzed"
},
"documentnumber": {
"type": "string",
"index": "not_analyzed"
},
"executantmechanicalnumber": {
"type": "string",
"index": "not_analyzed"
},
"externalcode": {
"type": "string",
"index": "not_analyzed"
},
"externaldescription": {
"type": "string",
"index": "not_analyzed"
},
"externalexecutionmotive": {
"type": "string",
"index": "not_analyzed"
},
"face": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "integer"
},
"justification": {
"type": "string"
},
"laterality": {
"type": "string",
"index": "not_analyzed"
},
"name": {
"type": "integer"
},
"number": {
"type": "integer"
},
"observation": {
"type": "string"
},
"sextante": {
"type": "integer"
},
"state": {
"type": "string",
"index": "not_analyzed"
}
}
},
"executingservicecode": {
"type": "string"
},
"executingservicedescription": {
"type": "string"
},
"extrainformation": {
"type": "string"
},
"factor": {
"type": "string",
"index": "not_analyzed"
},
"frequency": {
"type": "string",
"index": "not_analyzed"
},
"harvestdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"lastmenstruationdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"menopause": {
"type": "boolean"
},
"nottransportable": {
"type": "string",
"index": "not_analyzed"
},
"number": {
"type": "string",
"index": "not_analyzed"
},
"observations": {
"type": "string"
},
"priority": {
"type": "string",
"index": "not_analyzed"
},
"requestdate": {
"type": "date",
"format": "yyyy-MM-dd"
},
"requesthour": {
"type": "date",
"format": "yyyy-MM-dd"
},
"requestingmedic": {
"type": "string"
},
"requestingservicecode": {
"type": "string",
"index": "not_analyzed"
},
"requestingservicedescription": {
"type": "string"
},
"sessions": {
"type": "integer"
},
"state": {
"type": "string",
"index": "not_analyzed"
}
}
},
"gender": {
"type": "string",
"index": "not_analyzed"
},
"internments": {
"type": "nested",
"properties": {
"internmentdate": {
"type": "date",
"format": "date_optional_time"
},
"number": {
"type": "string",
"index": "not_analyzed"
},
"realeasedate": {
"type": "date",
"format": "date_optional_time"
}
}
},
"maritalstatus": {
"type": "string",
"index": "not_analyzed"
},
"mothername": {
"type": "string"
},
"nacionality": {
"type": "string",
"index": "not_analyzed"
},
"name": {
"type": "string"
},
"patientid": {
"type": "string",
"index": "not_analyzed"
},
"patienttype": {
"type": "string",
"index": "not_analyzed"
},
"prescriptions": {
"type": "nested",
"properties": {
"activeprincipal": {
"type": "string"
},
"comercialname": {
"type": "string"
},
"dose": {
"type": "double"
},
"frequency": {
"type": "integer"
},
"medicinecode": {
"type": "string",
"index": "not_analyzed"
},
"prescriptiondate": {
"type": "date",
"format": "date_optional_time"
},
"scope": {
"type": "string",
"index": "not_analyzed"
}
}
},
"sns": {
"type": "string",
"index": "not_analyzed"
},
"title": {
"type": "string"
},
"uniqueid": {
"type": "string",
"index": "not_analyzed"
}
}
},
"gender": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "string",
"index": "not_analyzed"
},
"maritalstatus": {
"type": "string",
"index": "not_analyzed"
},
"mothername": {
"type": "string"
},
"nacionality": {
"type": "string",
"index": "not_analyzed"
},
"name": {
"type": "string"
},
"patientid": {
"type": "string",
"index": "not_analyzed"
},
"patienttype": {
"type": "string",
"index": "not_analyzed"
},
"sns": {
"type": "string",
"index": "not_analyzed"
},
"title": {
"type": "string"
},
"uniqueid": {
"type": "string",
"index": "not_analyzed"
}
}
}
Sample Documents :
PUT test/mytype/UNMDV%2F000000075
{
"documentsignatureid": 14706,
"episodes": [
{
"administrations": [],
"birthdate": "1956-06-07",
"deceased": false,
"gender": "F",
"patientid": "000000075",
"episodetype": "Consultas",
"clinicalnotes": [
{
"date": "2016-01-25T00:00:00",
"specialty": "Oncologia",
"observation": "Dores, etc",
"type": "NOTAS_PRIV"
},
{
"date": "2016-01-25T00:00:00",
"specialty": "Oncologia",
"observation": "Dores, etc",
"type": "EVOLUCAO"
}
],
"examinationrequests": [],
"episodeid": "242546",
"biometrics": [],
"agedescription": "59 Anos",
"diagnostics": [],
"dietetics": [],
"maritalstatus": "S",
"activities": [],
"backgrounds": [],
"name": "POLIANA PENHA DE JESUS",
"mothername": "SOFIA FIGUEIRA",
"internments": [],
"patienttype": "UNMDV",
"prescriptions": [],
"ageindays": 21835,
"uniqueid": "209445701",
"age": "59"
},
{
"administrations": [],
"birthdate": "1956-06-07",
"deceased": false,
"gender": "F",
"patientid": "000000075",
"episodetype": "Consultas",
"clinicalnotes": [],
"examinationrequests": [],
"episodeid": "242546",
"biometrics": [],
"agedescription": "59 Anos",
"diagnostics": [],
"dietetics": [],
"maritalstatus": "S",
"activities": [],
"backgrounds": [],
"name": "POLIANA PENHA DE JESUS",
"mothername": "SOFIA FIGUEIRA",
"internments": [],
"patienttype": "UNMDV",
"prescriptions": [],
"ageindays": 21835,
"uniqueid": "209445701",
"age": "59"
},
{
"administrations": [],
"birthdate": "1956-06-07",
"deceased": false,
"gender": "F",
"patientid": "000000075",
"episodetype": "Consultas",
"clinicalnotes": [],
"examinationrequests": [],
"episodeid": "242546",
"biometrics": [],
"agedescription": "59 Anos",
"diagnostics": [
{
"code": "A689",
"codification": "CID 10",
"description": "A68.9 - Febre recorrente NE",
"startdate": "2016-01-25T00:00:00",
"type": "DEF"
}
],
"dietetics": [],
"maritalstatus": "S",
"activities": [],
"backgrounds": [],
"name": "POLIANA PENHA DE JESUS",
"mothername": "SOFIA FIGUEIRA",
"internments": [],
"patienttype": "UNMDV",
"prescriptions": [],
"ageindays": 21835,
"uniqueid": "209445701",
"age": "59"
},
{
"administrations": [],
"birthdate": "1956-06-07",
"deceased": false,
"gender": "F",
"patientid": "000000075",
"episodetype": "Consultas",
"clinicalnotes": [
{
"date": "2016-01-25T00:00:00",
"specialty": "Oncologia",
"observation": "Dores, etc",
"type": "NOTAS_PRIV"
},
{
"date": "2016-01-25T00:00:00",
"specialty": "Oncologia",
"observation": "Dores, etc",
"type": "EVOLUCAO"
}
],
"examinationrequests": [],
"episodeid": "242546",
"biometrics": [],
"agedescription": "59 Anos",
"diagnostics": [],
"dietetics": [],
"maritalstatus": "S",
"activities": [],
"backgrounds": [],
"name": "POLIANA PENHA DE JESUS",
"mothername": "SOFIA FIGUEIRA",
"internments": [],
"patienttype": "UNMDV",
"prescriptions": [],
"ageindays": 21835,
"uniqueid": "209445701",
"age": "59"
}
],
"uniqueid": "209445701",
"patientid": "000000075",
"patienttype": "UNMDV",
"name": "POLIANA PENHA DE JESUS",
"gender": "F",
"birthdate": "1956-06-07",
"mothername": "SOFIA FIGUEIRA",
"maritalstatus": "S",
"deceased": false,
"age": "59"
}
PUT test/mytype/UNMDV%2F000000046
{
"documentsignatureid": 14711,
"episodes": [
{
"administrations": [],
"birthdate": "1970-12-12",
"deceased": false,
"gender": "F",
"patientid": "000000046",
"episodetype": "Consultas",
"clinicalnotes": [],
"examinationrequests": [],
"episodeid": "242557",
"biometrics": [],
"agedescription": "45 Anos",
"diagnostics": [],
"dietetics": [],
"maritalstatus": "D",
"activities": [],
"backgrounds": [],
"name": "JURACI IZABEL BADKE NEVES",
"mothername": "A",
"internments": [],
"patienttype": "UNMDV",
"prescriptions": [],
"ageindays": 16534,
"uniqueid": "209442701",
"age": "45"
}
],
"uniqueid": "209442701",
"patientid": "000000046",
"patienttype": "UNMDV",
"name": "JURACI IZABEL BADKE NEVES",
"gender": "F",
"birthdate": "1970-12-12",
"mothername": "A",
"maritalstatus": "D",
"deceased": false,
"age": "45"
}
Query :
POST test/mytype/_search
{
"from": 0,
"size": 20,
"highlight": {
"fields": {
"_all": {}
},
"require_field_match": false
},
"query": {
"nested": {
"path": "episodes",
"query": {
"multi_match": {
"query": "febre",
"operator": "and",
"fields": [
"episodes.*"
]
}
}
}
}
}
Result:
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "Invalid format: \"febre\""
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "test",
"node": "W0tautNcT22Z4LNnd5gDCg",
"reason": {
"type": "illegal_argument_exception",
"reason": "Invalid format: \"febre\""
}
}
]
},
"status": 400
}
I'm trying to implement an auto-suggest control powered by an ES index. The index has multiple fields (Multi-language - Arabic and English) and I want to be able to search in all languages.
The easiest way to do that is NGram with the "_all" field, as long as some care is taken in the mapping definition. The issue we have now how to accomplish this using multi-language.
PS: We are looking to separate field for all the possible languages (Using one index).
I tried to use the nGram tokenizer and filter and it's working good for one language (English).
{
"template": "index_com",
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edgeNGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
}
}
}
},
"mappings": {
"product": {
"_all": {
"enabled": true,
"index_analyzer": "edgeNGram_analyzer",
"search_analyzer": "standard"
},
"properties": {
"id": {
"type": "string",
"index": "no",
"include_in_all": false
},
"uuid": {
"type": "string",
"index": "no",
"include_in_all": false
},
"name": {
"type": "string",
"include_in_all": true
},
"description": {
"type": "string",
"include_in_all": true
},
"brand": {
"type": "string",
"include_in_all": true
},
"made_id": {
"type": "string",
"include_in_all": true
},
"category": {
"type": "string",
"include_in_all": true
},
"category_id": {
"type": "integer",
"include_in_all": false
},
"keywords": {
"type": "string",
"include_in_all": true
},
"colors": {
"type": "string",
"index": "not_analyzed"
},
"colors_name": {
"type": "string",
"include_in_all": true
},
"quality": {
"type": "string",
"index": "not_analyzed"
},
"vendor_name": {
"type": "string",
"include_in_all": false
},
"vendor_location" : {
"type" : "geo_point",
"include_in_all": false
},
"price": {
"type": "double",
"include_in_all": false
},
"price_before_discount": {
"type": "double",
"include_in_all": false
},
"is_deal": {
"type": "integer",
"include_in_all": false
},
"is_best_seller": {
"type": "integer",
"include_in_all": false
},
"views": {
"type": "integer",
"include_in_all": false
},
"rating": {
"type": "integer",
"include_in_all": false
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"image_link": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
Arabic analyzer:
{
"settings": {
"analysis": {
"filter": {
"arabic_stop": {
"type": "stop",
"stopwords": "_arabic_"
},
"arabic_keywords": {
"type": "keyword_marker",
"keywords": []
},
"arabic_stemmer": {
"type": "stemmer",
"language": "arabic"
}
},
"analyzer": {
"arabic": {
"tokenizer": "standard",
"filter": [
"lowercase",
"arabic_stop",
"arabic_normalization",
"arabic_keywords",
"arabic_stemmer"
]
}
}
}
}
}
can someone suggest any solution? Thanks!
Your second snippet defines the arabic analyzer, which is already available so you shouldn't need to add it.
What you are missing is to tell elasticsearch to also use the arabic analyzer. So you want to analyze each field twice, in english and arabic. To do that, add
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
to all your fields that have "include_in_all": true. That makes your mappings look like this:
{
"template": "index_com",
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edgeNGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
}
}
}
},
"mappings": {
"product": {
"_all": {
"enabled": true,
"index_analyzer": "edgeNGram_analyzer",
"search_analyzer": "standard"
},
"properties": {
"id": {
"type": "string",
"index": "no",
"include_in_all": false
},
"uuid": {
"type": "string",
"index": "no",
"include_in_all": false
},
"name": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"description": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"brand": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"made_id": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"category": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"category_id": {
"type": "integer",
"include_in_all": false
},
"keywords": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"colors": {
"type": "string",
"index": "not_analyzed"
},
"colors_name": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"quality": {
"type": "string",
"index": "not_analyzed"
},
"vendor_name": {
"type": "string",
"include_in_all": false
},
"vendor_location": {
"type": "geo_point",
"include_in_all": false
},
"price": {
"type": "double",
"include_in_all": false
},
"price_before_discount": {
"type": "double",
"include_in_all": false
},
"is_deal": {
"type": "integer",
"include_in_all": false
},
"is_best_seller": {
"type": "integer",
"include_in_all": false
},
"views": {
"type": "integer",
"include_in_all": false
},
"rating": {
"type": "integer",
"include_in_all": false
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"image_link": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}