ES NGram with multi-languages

ES NGram with multi-languages - elasticsearch

I'm trying to implement an auto-suggest control powered by an ES index. The index has multiple fields (Multi-language - Arabic and English) and I want to be able to search in all languages.
The easiest way to do that is NGram with the "_all" field, as long as some care is taken in the mapping definition. The issue we have now how to accomplish this using multi-language.
PS: We are looking to separate field for all the possible languages (Using one index).
I tried to use the nGram tokenizer and filter and it's working good for one language (English).
{
"template": "index_com",
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edgeNGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
}
}
}
},
"mappings": {
"product": {
"_all": {
"enabled": true,
"index_analyzer": "edgeNGram_analyzer",
"search_analyzer": "standard"
},
"properties": {
"id": {
"type": "string",
"index": "no",
"include_in_all": false
},
"uuid": {
"type": "string",
"index": "no",
"include_in_all": false
},
"name": {
"type": "string",
"include_in_all": true
},
"description": {
"type": "string",
"include_in_all": true
},
"brand": {
"type": "string",
"include_in_all": true
},
"made_id": {
"type": "string",
"include_in_all": true
},
"category": {
"type": "string",
"include_in_all": true
},
"category_id": {
"type": "integer",
"include_in_all": false
},
"keywords": {
"type": "string",
"include_in_all": true
},
"colors": {
"type": "string",
"index": "not_analyzed"
},
"colors_name": {
"type": "string",
"include_in_all": true
},
"quality": {
"type": "string",
"index": "not_analyzed"
},
"vendor_name": {
"type": "string",
"include_in_all": false
},
"vendor_location" : {
"type" : "geo_point",
"include_in_all": false
},
"price": {
"type": "double",
"include_in_all": false
},
"price_before_discount": {
"type": "double",
"include_in_all": false
},
"is_deal": {
"type": "integer",
"include_in_all": false
},
"is_best_seller": {
"type": "integer",
"include_in_all": false
},
"views": {
"type": "integer",
"include_in_all": false
},
"rating": {
"type": "integer",
"include_in_all": false
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"image_link": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
Arabic analyzer:
{
"settings": {
"analysis": {
"filter": {
"arabic_stop": {
"type": "stop",
"stopwords": "_arabic_"
},
"arabic_keywords": {
"type": "keyword_marker",
"keywords": []
},
"arabic_stemmer": {
"type": "stemmer",
"language": "arabic"
}
},
"analyzer": {
"arabic": {
"tokenizer": "standard",
"filter": [
"lowercase",
"arabic_stop",
"arabic_normalization",
"arabic_keywords",
"arabic_stemmer"
]
}
}
}
}
}
can someone suggest any solution? Thanks!

Your second snippet defines the arabic analyzer, which is already available so you shouldn't need to add it.
What you are missing is to tell elasticsearch to also use the arabic analyzer. So you want to analyze each field twice, in english and arabic. To do that, add
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
to all your fields that have "include_in_all": true. That makes your mappings look like this:
{
"template": "index_com",
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edgeNGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
}
}
}
},
"mappings": {
"product": {
"_all": {
"enabled": true,
"index_analyzer": "edgeNGram_analyzer",
"search_analyzer": "standard"
},
"properties": {
"id": {
"type": "string",
"index": "no",
"include_in_all": false
},
"uuid": {
"type": "string",
"index": "no",
"include_in_all": false
},
"name": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"description": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"brand": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"made_id": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"category": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"category_id": {
"type": "integer",
"include_in_all": false
},
"keywords": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"colors": {
"type": "string",
"index": "not_analyzed"
},
"colors_name": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"quality": {
"type": "string",
"index": "not_analyzed"
},
"vendor_name": {
"type": "string",
"include_in_all": false
},
"vendor_location": {
"type": "geo_point",
"include_in_all": false
},
"price": {
"type": "double",
"include_in_all": false
},
"price_before_discount": {
"type": "double",
"include_in_all": false
},
"is_deal": {
"type": "integer",
"include_in_all": false
},
"is_best_seller": {
"type": "integer",
"include_in_all": false
},
"views": {
"type": "integer",
"include_in_all": false
},
"rating": {
"type": "integer",
"include_in_all": false
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"image_link": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}

Related

How to migrate data from ES2.1 to ES7 by logstash

[ERROR][logstash.outputs.elasticsearch][main] Failed to install
template {:message=>"Got response code '400' contacting Elasticsearch
at URL 'http://127.0.0.1:9200/_template/ecs-logstash'",
:exception=>LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError
original ES version: 2.1.2 new ES version: 7.13 logstash version: 8.1.1
I have below index on my ES2
"designs_v1": {
"mappings": {
"design": {
"dynamic": "false",
"_all": {
"enabled": false
},
"_id": {
"store": true,
"index": "not_analyzed"
},
"_timestamp": {
"enabled": true,
"store": true
},
"properties": {
"createDate": {
"type": "date",
"fielddata": {
"__comment": "Used for sorting",
"loading": "eager"
},
"format": "epoch_millis||date_time"
},
"designId": {
"type": "long",
"fielddata": {
"__comment": "Used for sorting to break ties and accessed by our custom scorer",
"loading": "eager"
}
},
"editorialTags": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "shingle"
},
"stemmed": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "kstem"
}
}
},
"isPersonalizable": {
"type": "boolean"
},
"legalBlockTags": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "standard_with_stopwords"
},
"memberId": {
"type": "long"
},
"pixel_height": {
"type": "integer"
},
"pixel_width": {
"type": "integer"
},
"products": {
"type": "nested",
"properties": {
"caption": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "standard_with_stopwords"
},
"createDate": {
"type": "date",
"format": "epoch_millis||date_time"
},
"defaultThumbnail": {
"type": "integer"
},
"description": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "standard_with_stopwords"
},
"hasPersonalizableSvg": {
"type": "boolean"
},
"imageOneId": {
"type": "long"
},
"imageTwoId": {
"type": "long"
},
"locations": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "keyword"
},
"productId": {
"type": "long"
},
"productTypeId": {
"type": "integer",
"fielddata": {
"__comment": "Used during aggregations",
"loading": "eager"
}
},
"showColorId": {
"type": "integer"
},
"storeId": {
"type": "long"
}
}
},
"sellerTags": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "shingle"
},
"stemmed": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "kstem"
}
}
}
}
}
}
}
I created new index in ES7:
{
"mappings": {
// "_id": {
// "store": true,
// "index": "not_analyzed"
// },
// "_timestamp": {
// "enabled": true,
// "store": true
// },
"properties": {
"createDate": {
"type": "date",
"format": "epoch_millis||date_time"
},
"designId": {
"type": "long"
},
"editorialTags": {
"type": "text",
"norms": true,
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "text",
"norms": true,
"analyzer": "shingle"
},
"stemmed": {
"type": "text",
"norms": true,
"analyzer": "kstem"
}
}
},
"isPersonalizable": {
"type": "boolean"
},
"legalBlockTags": {
"type": "text",
"norms": false,
"analyzer": "standard_with_stopwords"
},
"memberId": {
"type": "long"
},
"pixel_height": {
"type": "integer"
},
"pixel_width": {
"type": "integer"
},
"products": {
"type": "nested",
"properties": {
"caption": {
"type": "text",
"norms": false,
"analyzer": "standard_with_stopwords"
},
"createDate": {
"type": "date",
"format": "epoch_millis||date_time"
},
"defaultThumbnail": {
"type": "integer"
},
"description": {
"type": "text",
"norms": false,
"analyzer": "standard_with_stopwords"
},
"hasPersonalizableSvg": {
"type": "boolean"
},
"imageOneId": {
"type": "long"
},
"imageTwoId": {
"type": "long"
},
"locations": {
"type": "text",
"norms": false,
"analyzer": "keyword"
},
"productId": {
"type": "long"
},
"productTypeId": {
"type": "integer"
},
"showColorId": {
"type": "integer"
},
"storeId": {
"type": "long"
}
}
},
"sellerTags": {
"type": "text",
"norms": true,
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "text",
"norms": true,
"analyzer": "shingle"
},
"stemmed": {
"type": "text",
"norms": true,
"analyzer": "kstem"
}
}
}
}
}
}
I want to migrate data to ES7 by logstash, below is my logstash conf file:
input {
elasticsearch {
hosts => ["http://xxx:9200"]
index => "designs_v1"
type => "design"
size => 10
scroll => "1m"
}
}
filter {
}
output {
elasticsearch {
hosts => ["http://127.0.0.1:9200"]
index => "designs_v1"
#document_type => "%{[#metadata][_type]}"
document_id => "%{[#metadata][_id]}"
}
}
But I can't fix issue:
[ERROR][logstash.outputs.elasticsearch][main] Failed to install
template {:message=>"Got response code '400' contacting Elasticsearch
at URL 'http://127.0.0.1:9200/_template/ecs-logstash'",
:exception=>LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError
What can I do now?

Error while creating an index on Elasticsearch with custom analyzer

I am trying to create an index with a custom default analyzer.
I already checked the following questions:
Analyzer not found exception while creating an index with mapping and settings
How to specify an analyzer while creating an index in ElasticSearch
mapper_parsing_exception for a custom analyzer while creating index in elasticsearch?
but they didn't solve the issue.
Here is my schema:
put /emails
{
"mappings": {
"email": {
"analyzer": "lkw",
"properties": {
"createdOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"data": {
"type": "object",
"dynamic": "true"
},
"from": {
"type": "string",
"store": true
},
"id": {
"type": "string",
"store": true
},
"sentOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"sesId": {
"type": "string",
"store": true
},
"subject": {
"type": "string",
"store": true,
"analyzer": "standard"
},
"templates": {
"properties": {
"html": {
"type": "string",
"store": true
},
"plainText": {
"type": "string",
"store": true
}
}
},
"to": {
"type": "string",
"store": true
},
"type": {
"type": "string",
"store": true
}
}
},
"event": {
"_parent": {
"type": "email"
},
"analyzer": "lkw",
"properties": {
"id": {
"type": "string",
"store": true
},
"origin": {
"type": "string",
"store": true
},
"time": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"type": {
"type": "string",
"store": true
},
"userAgent": {
"type": "string",
"store": true
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"lkw": {
"tokenizer": "keyword",
"filter": [
"lowercase"
],
"type": "custom"
}
}
}
}
}
When I execute the command above, I get this error:
{
"error": {
"root_cause": [
{
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [analyzer : lkw]"
}
],
"type": "mapper_parsing_exception",
"reason": "Failed to parse mapping [event]: Root mapping definition has unsupported parameters: [analyzer : lkw]",
"caused_by": {
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [analyzer : lkw]"
}
},
"status": 400
}

Since you have only a few string fields, I suggest you simply specify your lkw analyzer where you need it, just like you did for the standard one:
PUT /emails
{
"mappings": {
"email": {
"properties": {
"createdOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"data": {
"type": "object",
"dynamic": "true"
},
"from": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"id": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"sentOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"sesId": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"subject": {
"type": "string",
"store": true,
"analyzer": "standard"
},
"templates": {
"properties": {
"html": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"plainText": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
},
"to": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"type": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
},
"event": {
"_parent": {
"type": "email"
},
"properties": {
"id": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"origin": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"time": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"type": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"userAgent": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"lkw": {
"tokenizer": "keyword",
"filter": [
"lowercase"
],
"type": "custom"
}
}
}
}
}

elasticsearch nested query does not give proper result

Hi I am having a problem, we are building a help system where user can search the help content. I have the following mapping:
{
"com.mycompany.content": {
"mappings": {
"pageMasterContent": {
"properties": {
"contentType": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": true
},
"customer": {
"type": "nested",
"properties": {
"class": {
"type": "string",
"index": "no",
"include_in_all": false
},
"customerName": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": true
},
"id": {
"type": "long",
"include_in_all": false
},
"ref": {
"type": "string",
"index": "no",
"include_in_all": false
}
}
},
"language": {
"type": "nested",
"properties": {
"class": {
"type": "string",
"index": "no",
"include_in_all": false
},
"id": {
"type": "long",
"include_in_all": false
},
"langName": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": false
},
"ref": {
"type": "string",
"index": "no",
"include_in_all": false
}
}
},
"pageContent": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": true
},
"pageMaster": {
"type": "nested",
"properties": {
"class": {
"type": "string",
"index": "no",
"include_in_all": false
},
"id": {
"type": "long",
"include_in_all": false
},
"pageCode": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": true
},
"pageSinceVersion": {
"properties": {
"class": {
"type": "string"
},
"id": {
"type": "long"
},
"versionNumber": {
"type": "string"
},
"versionOrder": {
"type": "long"
}
}
},
"pageUptoVersion": {
"properties": {
"class": {
"type": "string"
},
"id": {
"type": "long"
},
"versionNumber": {
"type": "string"
},
"versionOrder": {
"type": "long"
}
}
},
"ref": {
"type": "string",
"index": "no",
"include_in_all": false
}
}
},
"pageVersion": {
"type": "nested",
"properties": {
"class": {
"type": "string",
"index": "no",
"include_in_all": false
},
"id": {
"type": "long",
"include_in_all": false
},
"ref": {
"type": "string",
"index": "no",
"include_in_all": false
},
"versionNumber": {
"type": "string",
"term_vector": "with_positions_offsets",
"include_in_all": true
},
"versionOrder": {
"type": "integer",
"include_in_all": false
}
}
}
}
}
}
}
}
I want to filter my data with following parameters:
language, pageSinceVersion and pageUptoVersion where, if a user searches for any content it should show only for that language and for >pageSinceVersion and <pageUptoVersion content.
Thanks in advance.

Elasticsearch - How to provide custom synonyms when querying?

I'm developping a search engine for my client which has to use synonym expansion. I can properly setup my index with a synonym token filter and a custom file (synonym.txt).
Example: ipod, i-pod, i pod
However, whenever we want a synonym expansion, I get the synonyms from Elasticsearch and display them as tags on the website.
Each tag can be unselected. In this case, how can we specify while querying to Elasticsearch, to use a different set of synonyms, not coming from the synonym.txt file ?
Example: if the user is looking for the term ipod, then I will show these two tags: i-pod, i pod. But if the user chooses to unselect "i-pod", I would like to be able to specify that only "i pod" is a synonym of "ipod" while querying.
My index settings are :
{
"settings": {
"analysis": {
"filter": {
"elision": {
"type": "elision",
"articles": ["l", "m", "t", "qu", "n", "s", "j", "d", "c", "jusqu", "quoiqu", "lorsqu", "puisqu"]
},
"french_stop": {
"type": "stop",
"stopwords": "_french_"
},
"french_stemmer": {
"type": "stemmer",
"language": "light_french"
},
"synonymsFilter": {
"type" : "synonym",
"synonyms_path" : "analysis/synonym.txt"
},
"autocompleteFilter": {
"max_shingle_size": "5",
"min_shingle_size": "2",
"type": "shingle"
}
},
"analyzer": {
"default": {
"tokenizer": "letter",
"filter": ["asciifolding", "lowercase", "french_stemmer", "elision", "french_stop"]
},
"auto-complete-suggester": {
"filter": [
"lowercase",
"autocompleteFilter"
],
"char_filter": [
"html_strip"
],
"type": "custom",
"tokenizer": "standard"
},
"did-you-mean-suggester": {
"tokenizer": "standard",
"filter": ["asciifolding", "lowercase"]
},
"synonym_analyzer" : {
"tokenizer" : "whitespace",
"filter" : ["synonymsFilter"]
},
"synonym_analyzer2": {
"tokenizer": "standard",
"filter": ["asciifolding", "lowercase", "french_stop", "autocompleteFilter"]
}
}
}
},
"mappings": {
"companies": {
"date_detection": "false",
"properties": {
"auto_complete": {
"type": "string",
"analyzer": "auto-complete-suggester",
"term_vector" : "yes"
},
"did_you_mean": {
"type": "string",
"analyzer": "did-you-mean-suggester",
"term_vector" : "yes"
},
"synonyms": {
"type": "string",
"analyzer": "synonym_analyzer",
"term_vector" : "yes"
},
"company_name": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"siren": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"CPposteEntreprise": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"commercial_company_name": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"year_creation_company": {
"type": "long"
},
"month_creation_company": {
"type": "long"
},
"month_year_creation_company": {
"type": "date",
"format": "yyyyMM",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"city_company": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"departement_company": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"region_company": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"is_excellence": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"interlocuteurs": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" },
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"flag_entreprise_finance": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"flag_indirect": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"flag_direct": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"flag_investissement": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"montant_total_investissement": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"motant_total_finance": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"nombre_investissement": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"nombre_financement_accorde": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caFiltre": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"effectif": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"textRank": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
},
"term_vector" : "yes",
"copy_to": [
"synonyms"
]
},
"masterKeywords": {
"type": "nested",
"properties": {
"keyword": {
"type":"string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
}
}
},
"dossiers":{
"type": "nested",
"date_detection": "false",
"properties": {
"dossierCommercial": {
"type": "long"
},
"sousDossierCommercial": {
"type": "long"
},
"historiqueProduitBPI": {
"type": "string"
},
"statutSousDossier": {
"type": "string"
},
"dateDecision": {
"type": "date",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" },
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"nomChargesAffaires": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"contactChargesAffaires": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"montantAide": {
"type": "double",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" },
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"contentValidation": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"contentDecision": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"contentDirectionEngagements": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"metaDomain": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"sousSecteur": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
}
},
"keywords": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"descriptionProjet": {
"type": "string",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" }
},
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
}
}
},
"investissements": {
"type": "nested",
"date_detection": "false",
"properties": {
"flag_indirect": {
"type": "string",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"nom_societe_gestion_svi":{
"type": "string"
},
"date_entree_investissement":{
"type": "date",
"fields": {
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"montant_investissement_df":{
"type": "double"
},
"description_projet_investissement":{
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
}
}
},
"bilans":{
"type": "nested",
"date_detection": "false",
"properties": {
"bilanAnneeN": {
"properties": {
"effectif": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"capital": {
"type": "double"
},
"resultatNet": {
"type": "double"
},
"clotureDate": {
"type": "date"
},
"annee": {
"type": "long"
},
"ebeMoyen": {
"type": "double"
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
}
}
},
"bilanAnneeN1": {
"properties": {
"effectif": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"capital": {
"type": "double"
},
"resultatNet": {
"type": "double"
},
"clotureDate": {
"type": "date"
},
"annee": {
"type": "long"
},
"ebeMoyen": {
"type": "double"
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
}
}
},
"bilanAnneeN2": {
"properties": {
"effectif": {
"type": "long",
"fields": {
"raw": { "type": "long", "index": "not_analyzed" }
}
},
"capital": {
"type": "double"
},
"resultatNet": {
"type": "double"
},
"clotureDate": {
"type": "date"
},
"annee": {
"type": "long"
},
"ebeMoyen": {
"type": "double"
},
"caInterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
},
"caExterne": {
"type": "double",
"fields": {
"raw": { "type": "double", "index": "not_analyzed" }
}
}
}
}
}
},
"news": {
"type": "nested",
"date_detection": "false",
"properties": {
"date": {
"type": "date",
"fields": {
"suggester": { "type": "string", "analyzer": "did-you-mean-suggester" },
"raw": { "type": "string", "index": "not_analyzed" }
}
},
"description": {
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"title": {
"type": "string"
},
"content": {
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete",
"synonyms"
]
},
"url": {
"type": "string"
},
"tags": {
"type": "string",
"term_vector" : "yes",
"copy_to": [
"did_you_mean",
"auto_complete"
]
},
"links": {
"type": "string"
},
"external_source": {
"type": "string"
}
}
}
}
}
}
}
For now, I am using a master field called "synonyms". Is this a good idea ?
Thanks in advance for your help.

no results elasticsearch when query by multi_field

I have the following mapping on my index in elasticsearch.
{
"mail": {
"properties": {
"project": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"mailbox": {
"type": "string",
"index": "not_analyzed",
"null_value": "#na",
"store" : "yes"
},
"path": {
"type": "string",
"index": "not_analyzed",
"null_value": "#na",
"store" : "yes"
},
"messageid": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"nodeid":
{
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false",
"store" : "yes"
},
"replyto": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"references": {
"type": "string",
"index": "not_analyzed",
"null_value": "na",
"include_in_all": "false"
},
"subject": {
"boost": "3.0",
"type": "string",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"from": {
"type": "nested",
"properties": {
"name": {
"type" : "multi_field",
"fields" : {
"name" : {"type" : "string", "analyzer" : "standard", "index" : "analyzed"},
"untouched" : {"type" : "string", "index" : "not_analyzed"}
}
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"to": {
"type": "nested",
"properties":{
"name": {
"type": "string",
"analyzer":"analyzer_keyword"
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"cc": {
"type": "nested",
"properties":{
"name": {
"type": "string",
"analyzer":"analyzer_keyword"
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"bcc": {
"type": "nested",
"properties":{
"name": {
"type": "string",
"analyzer":"analyzer_keyword"
},
"address": {
"type": "string",
"analyzer": "analyzer_email"
},
"nodeid": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
},
"message_snippet": {
"type": "string",
"index": "no",
"include_in_all": "false"
},
"text_messages": {
"type": "string",
"store": "yes",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"html_messages": {
"type": "string",
"store": "yes",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"message_attachments": {
"dynamic": "true",
"properties":{
"filename":{
"type": "string",
"store": "yes"
},
"content":{
"type": "string",
"store": "yes",
"term_vector": "with_positions_offsets",
"analyzer": "snowball"
},
"hash":{
"type": "string",
"store": "yes",
"analyzer": "analyzer_keyword"
},
"nodeid":{
"type": "string"
}
}
},
"date": {
"type": "date"
},
"entities": {
"type": "nested",
"properties": {
"name": {
"type": "string",
"analyzer": "analyzer_keyword"
},
"type": {
"type": "string",
"analyzer": "analyzer_keyword"
},
"nodeid":{
"type": "string"
},
"facet": {
"type": "string",
"index": "not_analyzed",
"store" : "yes"
}
}
}
}
}
}
And I try searching on the mail.from.name field with the following query, but I doesn't give me any results.
{
"query": {
"nested": {
"path": "from",
"query": {
"term": {
"name": "mark"
}
}
}
}
}
What is wrong about my mapping or query?
A sample document looks like this
{
"project": "test",
"mailbox": "test.pst",
"messageid": "5e667f7f-4421-4836-91f3-8b5216c04839",
"nodeid": "671",
"subject": "No Subject",
"from": [
{
"name": "Mike Johnson",
"address": "mike#gmail.com",
"nodeid": "3",
"facet": "Mike Johnson"
}
],
"to": [
{
"name": "John Doe",
"address": "JDoe#gmail.com",
"nodeid": "367",
"facet": "John Doe"
}
],
"cc": [],
"bcc": [],
"textbody": "this is a test email with no further lines of text",
"htmlbody": "",
"snippet": "",
"transmitted": "",
"replyto": "",
"references": "",
"attachments": [],
"entities": [
{
"name": "google",
"type": "organization",
"nodeid": "656",
"facet": "google"
}
],
"domains": [
"google.com"
],
"path": ""
}

You need to address the nested objects name in your query again
{
"query": {
"nested": {
"path": "from",
"query": {
"term": {
"from.name": "mike"
}
}
}
}
}

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

ES NGram with multi-languages - elasticsearch

Related

How to migrate data from ES2.1 to ES7 by logstash

Error while creating an index on Elasticsearch with custom analyzer

elasticsearch nested query does not give proper result

Elasticsearch - How to provide custom synonyms when querying?

no results elasticsearch when query by multi_field

Categories

Resources