ElasticSearch query relevance - elasticsearch

I would like to find a product with the search priority : pickRef, name, synonym (it's an array) and the others after. I don"t succeed to have a working query.. I have to boost synonym with "50" in order to have the product in top 8 results...
The aim of my query is to make an autocompletion search with fuzzy (to avoid mispelling)
I have a product with the synonym "caca" When I want to search "caca" ES return every coca products. but not the product with the synonym "caca". However, the term "caca" must be the first result beceause it match perfectly with synonym field and coca products must come after (due to fuzzy parameter)
There is my index :
{
"product": {
"aliases": {},
"mappings": {
"properties": {
"brand": {
"type": "keyword",
"boost": 3
},
"catalogue": {
"type": "keyword"
},
"category": {
"type": "text",
"analyzer": "standard"
},
"description": {
"properties": {
"de": {
"type": "text",
"boost": 3,
"analyzer": "german"
},
"en": {
"type": "text",
"boost": 3,
"analyzer": "english"
},
"fr": {
"type": "text",
"boost": 3,
"analyzer": "french"
},
"lu": {
"type": "text",
"boost": 3
}
}
},
"description_ecology": {
"properties": {
"de": {
"type": "text",
"boost": 3,
"analyzer": "german"
},
"en": {
"type": "text",
"boost": 3,
"analyzer": "english"
},
"fr": {
"type": "text",
"boost": 3,
"analyzer": "french"
},
"lu": {
"type": "text",
"boost": 3
}
}
},
"enabled": {
"type": "boolean"
},
"image": {
"type": "text"
},
"name": {
"properties": {
"de": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "german"
},
"en": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "english"
},
"fr": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "french"
},
"lu": {
"type": "text",
"boost": 3,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"pickRef": {
"type": "keyword",
"boost": 5
},
"replaced": {
"type": "boolean"
},
"slug": {
"type": "text"
},
"synonym": {
"type": "keyword",
"boost": 3
}
}
},
"settings": {
"index": {
"routing": {
"allocation": {
"include": {
"_tier_preference": "data_content"
}
}
},
"number_of_shards": "1",
"provided_name": "product",
"creation_date": "1634287857507",
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"char_filter": {
"pre_negs": {
"pattern": "a \\w",
"type": "pattern_replace",
"replacement": ""
}
}
},
"number_of_replicas": "0",
"uuid": "EGLmpv8bRlCnfLBxHZOKmA",
"version": {
"created": "7150099"
}
}
}
}
}
There is my query :
{
"index": "product",
"size": 8,
"body": {
"query": {
"bool": {
"must": [
{
"match": {
"enabled": true
}
},
{
"match": {
"replaced": false
}
}
],
"should": [
{
"match": {
"name.fr": {
"query": "caca",
"analyzer": "standard"
}
}
},
{
"match": {
"synonym": {
"query": "caca",
"boost": 20,
"analyzer": "standard"
}
}
},
{
"multi_match": {
"query": "caca",
"fields": [
"brand^2",
"pickRef^5",
"catalogue",
"name.fr^3",
"name.en^1",
"name.de^1",
"name.lu^1",
"description.fr^1",
"description.en^1",
"description.de^1",
"description.lu^1",
"description_ecologique.fr^1",
"description_ecologique.en^1",
"description_ecologique.de^1",
"description_ecologique.lu^1"
],
"fuzziness": "AUTO"
}
},
{
"query_string": {
"query": "caca"
}
}
]
}
}
}
}
Those are my products :
{
"_index": "product",
"_type": "_doc",
"_id": "1594",
"_version": 1,
"_seq_no": 1593,
"_primary_term": 1,
"found": true,
"_source": {
"name": {
"fr": "PLANTE ARTIFICIELLE BAMBOU 120cm"
},
"pickRef": "122638",
"description": {
"fr": "Agrémentez votre lieu de travail avec cette superbe plante ! Elle garantit un environnement très naturel, ne nécessite pas d'entretien et agrémente n'importe quel espace. Tronc en bois, feuillage en polyester , livrée dans un pot standard en plastique."
},
"description_ecology": {
"fr": ""
},
"catalogue": "P399",
"image": "uploads/product/122638/122638.png",
"brand": "PAPERFLOW",
"category": "Autres",
"slug": "plante-artificielle-bambou-120cm-122638-122638",
"enabled": true,
"synonym": [],
"replaced": false
}
}
{
"_index": "product",
"_type": "_doc",
"_id": "3131",
"_version": 1,
"_seq_no": 3130,
"_primary_term": 1,
"found": true,
"_source": {
"name": {
"fr": "ROYCO MINUTE SOUP \"POIS AU JAMBON\""
},
"pickRef": "141065",
"description": {
"fr": "Retrouvez le bon goût des légumes dans ces recettes de tradition alliant tout le savoir-faire de Royco Minute Soup à la saveur des meilleurs ingrédients."
},
"description_ecology": {
"fr": ""
},
"catalogue": "P038",
"image": "uploads/product/141065/141065.png",
"brand": "ROYCO",
"category": "Soupe & pâtes",
"slug": "royco-minute-soup-pois-au-jambon-5410056186552-141065",
"enabled": true,
"synonym": [],
"replaced": false
}
}
{
"_index": "product",
"_type": "_doc",
"_id": "6",
"_version": 2,
"_seq_no": 24511,
"_primary_term": 1,
"found": true,
"_source": {
"name": {
"fr": "AGRAFES 26/6 GALVANISEES"
},
"pickRef": "100110",
"description": {
"fr": "<div>Boîte de 1000 agrafes 26/6 galvanisées.</div>"
},
"description_ecology": {
"fr": null
},
"catalogue": "S",
"image": "uploads/product/233163/233163.png",
"brand": "autres",
"category": "Autres",
"slug": "agrafes-26-6-galvanisees-jambon-5010255827746-100110",
"enabled": true,
"synonym": [
"caca",
"jambon"
],
"replaced": false
}
}
PS : I know the example is not perfect but I don't have a better one...

do you try to sort by _score?
{
"index": "product",
"size": 8,
"body": {
"query": {
.
.
.
},
"sort": [
{
"_score": {
"order": "desc"
}
}
]
}
}

Related

Elasticsearch Aggregation (scripted metric aggregation)

I have product collection with contains array of categories.
What i need is aggregation(?) - an array of distinct category_id's.
Also nice to have - count of matching product number per category_id.
Sample index data
{
"data": [
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 4257,
"max_score": 1.0,
"hits": [
{
"_index": "index_name",
"_type": "product",
"_id": "4",
"_score": 1.0,
"_source": {
"entity_id": "4",
"attribute_set_id": "4",
"type_id": "simple",
"sku": "skuxxx",
"has_options": false,
"required_options": false,
"created_at": "2020-01-29 06:38:52",
"updated_at": "2020-09-04 12:31:00",
"visibility": "4"
},
"category": [
{
"category_id": 2,
"is_virtual": "false"
},
{
"category_id": 3,
"is_parent": true,
"is_virtual": "false",
"name": "Category name1"
},
{
"category_id": 4,
"is_parent": true,
"is_virtual": "false",
"name": "Category name2"
},
{
"category_id": 7,
"is_parent": true,
"is_virtual": "false",
"name": "Category name3"
}
]
}
]
}
}
]
}
What i need is array of category ids, sorted by product count and possibility to filter categories ("is_virtual" or "is_parent"):
Expected result
{
"data": [
{
"categories": [
{
"category_id": 2,
"doc_count": 555
},
{
"category_id": 3,
"doc_count": 150
},
{
"category_id": 56,
"doc_count": 12
}
]
}
]
}
Index mapping
{
"index": {
"mappings": {
"product": {
"_all": {
"enabled": false
},
"properties": {
"EAN01": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN02": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN03": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN04": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN05": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN06": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN07": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN08": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN09": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN10": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN11": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN12": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN13": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN14": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN15": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN16": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN17": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN18": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN19": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"EAN20": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"activity": {
"type": "integer"
},
"attribute_set1_1": {
"type": "integer"
},
"attribute_set_id": {
"type": "integer"
},
"attributeset2attribute1": {
"type": "integer"
},
"attributeset3attribute1": {
"type": "integer"
},
"autocomplete": {
"type": "text",
"fields": {
"shingle": {
"type": "text",
"analyzer": "shingle"
},
"whitespace": {
"type": "text",
"analyzer": "whitespace"
}
},
"analyzer": "standard"
},
"belongs_to_catalog": {
"type": "boolean"
},
"ca_1_1243545189": {
"type": "integer"
},
"category": {
"type": "nested",
"properties": {
"category_id": {
"type": "integer"
},
"is_blacklisted": {
"type": "boolean"
},
"is_parent": {
"type": "boolean"
},
"is_virtual": {
"type": "boolean"
},
"name": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"position": {
"type": "integer"
}
}
},
"category_gear": {
"type": "integer"
},
"children_attributes": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"children_ids": {
"type": "integer"
},
"color": {
"type": "integer"
},
"configurable_attributes": {
"type": "keyword"
},
"configurable_variation": {
"type": "integer"
},
"created_at": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd"
},
"custom_price": {
"type": "keyword",
"ignore_above": 256
},
"description": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"eco_collection": {
"type": "boolean"
},
"entity_id": {
"type": "integer"
},
"erin_recommends": {
"type": "boolean"
},
"features_bags": {
"type": "integer"
},
"gender": {
"type": "integer"
},
"has_options": {
"type": "boolean"
},
"image": {
"type": "keyword",
"ignore_above": 256
},
"indexed_attributes": {
"type": "keyword"
},
"manufacturer": {
"type": "integer"
},
"material": {
"type": "integer"
},
"mycolor": {
"type": "integer"
},
"mysize": {
"type": "integer"
},
"name": {
"type": "text",
"fields": {
"shingle": {
"type": "text",
"analyzer": "shingle"
},
"sortable": {
"type": "text",
"analyzer": "sortable",
"fielddata": true
},
"whitespace": {
"type": "text",
"analyzer": "whitespace"
}
},
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"new": {
"type": "boolean"
},
"option_text_activity": {
"type": "keyword",
"ignore_above": 256
},
"option_text_attribute_set1_1": {
"type": "keyword",
"ignore_above": 256
},
"option_text_attributeset2attribute1": {
"type": "text",
"fields": {
"untouched": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"option_text_attributeset3attribute1": {
"type": "text",
"fields": {
"untouched": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"option_text_belongs_to_catalog": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"option_text_ca_1_1243545189": {
"type": "text",
"fields": {
"untouched": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"option_text_category_gear": {
"type": "keyword",
"ignore_above": 256
},
"option_text_color": {
"type": "keyword",
"ignore_above": 256
},
"option_text_configurable_variation": {
"type": "text",
"fields": {
"untouched": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"option_text_eco_collection": {
"type": "keyword",
"ignore_above": 256
},
"option_text_erin_recommends": {
"type": "keyword",
"ignore_above": 256
},
"option_text_features_bags": {
"type": "keyword",
"ignore_above": 256
},
"option_text_gender": {
"type": "keyword",
"ignore_above": 256
},
"option_text_manufacturer": {
"type": "text",
"fields": {
"untouched": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"option_text_material": {
"type": "keyword",
"ignore_above": 256
},
"option_text_mycolor": {
"type": "text",
"fields": {
"untouched": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"option_text_mysize": {
"type": "text",
"fields": {
"untouched": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"option_text_new": {
"type": "keyword",
"ignore_above": 256
},
"option_text_performance_fabric": {
"type": "keyword",
"ignore_above": 256
},
"option_text_sale": {
"type": "keyword",
"ignore_above": 256
},
"option_text_size": {
"type": "keyword",
"ignore_above": 256
},
"option_text_status": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"option_text_strap_bags": {
"type": "keyword",
"ignore_above": 256
},
"option_text_style_bags": {
"type": "keyword",
"ignore_above": 256
},
"option_text_tax_class_id": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"performance_fabric": {
"type": "boolean"
},
"price": {
"type": "nested",
"properties": {
"customer_group_id": {
"type": "integer"
},
"final_price": {
"type": "double"
},
"is_discount": {
"type": "boolean"
},
"max_price": {
"type": "double"
},
"min_price": {
"type": "double"
},
"original_price": {
"type": "double"
},
"price": {
"type": "double"
},
"tax_class_id": {
"type": "integer"
}
}
},
"required_options": {
"type": "boolean"
},
"sale": {
"type": "boolean"
},
"search": {
"type": "text",
"fields": {
"shingle": {
"type": "text",
"analyzer": "shingle"
},
"whitespace": {
"type": "text",
"analyzer": "whitespace"
}
},
"analyzer": "standard"
},
"search_query": {
"type": "nested",
"properties": {
"is_blacklisted": {
"type": "boolean"
},
"position": {
"type": "integer"
},
"query_id": {
"type": "integer"
}
}
},
"short_description": {
"type": "text",
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"size": {
"type": "integer"
},
"sku": {
"type": "text",
"fields": {
"shingle": {
"type": "text",
"analyzer": "shingle"
},
"sortable": {
"type": "text",
"analyzer": "sortable",
"fielddata": true
},
"untouched": {
"type": "keyword",
"ignore_above": 256
},
"whitespace": {
"type": "text",
"analyzer": "whitespace"
}
},
"copy_to": [
"search",
"spelling"
],
"analyzer": "reference"
},
"spelling": {
"type": "text",
"fields": {
"phonetic": {
"type": "text",
"analyzer": "phonetic"
},
"shingle": {
"type": "text",
"analyzer": "shingle"
},
"whitespace": {
"type": "text",
"analyzer": "whitespace"
}
},
"analyzer": "standard"
},
"status": {
"type": "integer"
},
"stock": {
"properties": {
"is_in_stock": {
"type": "boolean"
},
"qty": {
"type": "integer"
}
}
},
"strap_bags": {
"type": "integer"
},
"style_bags": {
"type": "integer"
},
"tax_class_id": {
"type": "integer"
},
"type_id": {
"type": "keyword"
},
"updated_at": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd"
},
"url_key": {
"type": "text",
"fields": {
"untouched": {
"type": "keyword",
"ignore_above": 256
}
},
"copy_to": [
"search",
"spelling"
],
"analyzer": "standard"
},
"visibility": {
"type": "integer"
}
}
}
}
}
}
Adding a working example with index data, mapping, search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"category": {
"type": "nested"
}
}
}
}
Index Data:
{
"entity_id": "4",
"attribute_set_id": "4",
"type_id": "simple",
"sku": "skuxxx",
"has_options": false,
"required_options": false,
"created_at": "2020-01-29 06:38:52",
"updated_at": "2020-09-04 12:31:00",
"visibility": "4",
"category": [
{
"category_id": 2,
"is_virtual": "false"
},
{
"category_id": 3,
"is_parent": true,
"is_virtual": "false",
"name": "Category name3"
}
]
}
{
"entity_id": "4",
"attribute_set_id": "4",
"type_id": "simple",
"sku": "skuxxx",
"has_options": false,
"required_options": false,
"created_at": "2020-01-29 06:38:52",
"updated_at": "2020-09-04 12:31:00",
"visibility": "4",
"category": [
{
"category_id": 2,
"is_parent": true,
"is_virtual": "false",
"name": "Category name1"
}
]
}
{
"entity_id": "4",
"attribute_set_id": "4",
"type_id": "simple",
"sku": "skuxxx",
"has_options": false,
"required_options": false,
"created_at": "2020-01-29 06:38:52",
"updated_at": "2020-09-04 12:31:00",
"visibility": "4",
"category": [
{
"category_id": 2,
"is_virtual": "false"
},
{
"category_id": 3,
"is_parent": true,
"is_virtual": "false",
"name": "Category name1"
},
{
"category_id": 4,
"is_parent": true,
"is_virtual": "false",
"name": "Category name2"
}
]
}
Search Query:
{
"size": 0,
"aggs": {
"nested_path": {
"nested": {
"path": "category"
},
"aggs": {
"distinct_categories": {
"terms": {
"field": "category.category_id"
},
"aggs": {
"top_category_hits": {
"top_hits": {
"_source": {
"includes": [
"category.category_id"
]
}
}
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"nested_path": {
"doc_count": 6,
"distinct_categories": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 3, <-- note this
"top_category_hits": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64259310",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "category",
"offset": 0
},
"_score": 1.0,
"_source": {
"category_id": 2
}
},
{
"_index": "stof_64259310",
"_type": "_doc",
"_id": "3",
"_nested": {
"field": "category",
"offset": 0
},
"_score": 1.0,
"_source": {
"category_id": 2
}
},
{
"_index": "stof_64259310",
"_type": "_doc",
"_id": "2",
"_nested": {
"field": "category",
"offset": 0
},
"_score": 1.0,
"_source": {
"category_id": 2 <-- note this
}
}
]
}
}
},
{
"key": 3,
"doc_count": 2, <-- note this
"top_category_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64259310",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "category",
"offset": 1
},
"_score": 1.0,
"_source": {
"category_id": 3
}
},
{
"_index": "stof_64259310",
"_type": "_doc",
"_id": "2",
"_nested": {
"field": "category",
"offset": 1
},
"_score": 1.0,
"_source": {
"category_id": 3 <-- note this
}
}
]
}
}
},
{
"key": 4,
"doc_count": 1, <-- note this
"top_category_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64259310",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "category",
"offset": 2
},
"_score": 1.0,
"_source": {
"category_id": 4 <-- note this
}
}
]
}
}
}
]
}

Elasticsearch Sorting fields anomaly

Trying to sort a list on certain fields. firstName and lastName but I have noticed some inconstant result.
I am running a simple query
//Return all the employees from a specific company ordering by lastName asc | desc
GET employee-index-sorting
{
"query": {
"bool": {
"filter": {
"term": {
"companyId": 3179
}
}
}
},
"sort": [
{
"lastName.keyword": { <-- Should this be keyword? or not_analyzed
"order": "desc"
}
}
]
}
In the result why would van der Mescht and van Breda be before Zwane and Zwezwe?
I suspect there is something wrong with my mappings
{
"_index": "employee-index",
"_type": "_doc",
"_id": "637467",
"_score": null,
"_source": {
"companyId": 3179,
"firstName": "Name",
"lastName": "van der Mescht",
},
"sort": [
"van der Mescht"
]
},
{
"_index": "employee-index",
"_type": "_doc",
"_id": "678335",
"_score": null,
"_source": {
"companyId": 3179,
"firstName": "Name3",
"lastName": "van Breda",
},
"sort": [
"van Breda"
]
},
{
"_index": "employee-index",
"_type": "_doc",
"_id": "113896",
"_score": null,
"_source": {
"companyId": 3179,
"firstName": "Name2",
"lastName": "Zwezwe",
},
"sort": [
"Zwezwe"
]
},
{
"_index": "employee-index",
"_type": "_doc",
"_id": "639639",
"_score": null,
"_source": {
"companyId": 3179,
"firstName": "Name1",
"lastName": "Zwane",
},
"sort": [
"Zwane"
]
}
Mappings
Posting the entire map because I am not sure if there might be something else wrong with it.
How should i change the lastName and firstName propery to allow for sorting on them?
PUT employee-index-sorting
{
"settings": {
"index": {
"analysis": {
"filter": {},
"analyzer": {
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 5,
"token_chars": [
"letter"
]
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"employeeId": {
"type": "keyword"
},
"companyGroupId": {
"type": "keyword"
},
"companyId": {
"type": "keyword"
},
"number": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"preferredName": {
"type": "text",
"index": false
},
"firstName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"middleName": {
"type": "text",
"index": false
},
"lastName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fullName": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
}
},
"analyzer": "standard"
},
"terminationDate": {
"type": "date"
},
"companyName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"email": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"idNumber": {
"type": "text"
},
"description": {
"type": "text",
"index": false
},
"jobNumber": {
"type": "keyword"
},
"frequencyId": {
"type": "long"
},
"frequencyCode": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"frequencyAccess": {
"type": "boolean"
}
}
}
}
}
For sorting you need to use lastName.keyword, that's correct, no need to change anything there.
The reason why van der Mescht and van Breda are before Zwane and Zwezwe is because sorting on strings happens on a lexicographical level, i.e. basically using the ASCII table and uppercase characters happen before lowercase ones, so words are sorted in that same order. But since you're sorting in desc mode, that's exactly the opposite:
z...
...
van der Mescht
...
van Breda
...
a...
...
Zwezwe
...
Zwane
...
Z...
...
A...
To fix this, what you simply need to do is to add a normalizer to your lastName.keyword field, i.e. change your mapping to this and it will work:
{
"settings": {
"index": {
"analysis": {
"filter": {},
"analyzer": {
...
},
"tokenizer": {
...
},
"normalizer": { <-- add this
"lowersort": {
"type": "custom",
"filter": [
"lowercase"
]
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
...
"lastName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"normalizer": "lowersort", <-- add this
"ignore_above": 256
}
}
},
...
}
}
}
}

Elastisearch query filter

I am trying to make a query with a filter on my index but when I try to filter on any attribute in the mapping the query returns no result.
The query is the following, if I run just with the geo_distance part I get results. I would like to filter the results using one of the properties in the mapping (in this case rating, but it can be city, state ecc).
Query is generated in Java via QueryBuilder from elasticsearch library (v 52.0). But for now I am trying to understand how to build a working query and executing via CURL.
{
"query": {
"bool": {
"filter": [
{
"geo_distance": {
"geometry.coordinates": [
12.3232,
12.2323
],
"distance": 200000,
"distance_type": "plane",
"validation_method": "STRICT",
"ignore_unmapped": false,
"boost": 1
}
},
{
"bool": {
"must": [
{
"terms": {
"rating": [
"0"
],
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
}
If I run a query filtering on zipcode or id it works.
For example a query like this:
{"query":{"bool":{"filter":{"term":{"zipCode":"111111"}}}}}
A snippet of my mapping is this
{
"my_index": {
"mappings": {
"poielement": {
"dynamic_templates": [
{
"suggestions": {
"match": "suggest_*",
"mapping": {
"analyzer": "my_analyzer",
"copy_to": "auto_suggest",
"search_analyzer": "my_analyzer",
"store": true,
"type": "text"
}
}
},
{
"integers": {
"match_mapping_type": "long",
"mapping": {
"type": "text"
}
}
},
{
"geopoint": {
"match": "coordinates",
"mapping": {
"type": "geo_point"
}
}
},
{
"property": {
"match": "*",
"mapping": {
"analyzer": "my_analyzer",
"search_analyzer": "my_analyzer"
}
}
}
],
"date_detection": false,
"numeric_detection": false,
"properties": {
"city": {
"type": "text",
"analyzer": "my_analyzer"
},
"country": {
"type": "text",
"analyzer": "my_analyzer"
},
"geometry": {
"properties": {
"coordinates": {
"type": "geo_point"
},
"type": {
"type": "text",
"analyzer": "my_analyzer"
}
}
},
"id": {
"type": "text"
},
"name": {
"type": "keyword"
},
"rating": {
"type": "text"
},
"total_rate": {
"type": "text",
"analyzer": "my_analyzer"
},
"type": {
"type": "text",
"analyzer": "my_analyzer"
},
"zipCode": {
"type": "text"
}
}
}
}
}
}
When I retrieve data via http://elasticsearchpat/my_index/_search data looks like this
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 4,
"successful": 4,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 7517,
"max_score": 1,
"hits": [
{
"_index": "my_index",
"_type": "poielement",
"_id": "58768",
"_score": 1,
"_source": {
"zipCode": 111111,
"country": "USA",
"city": "Portland",
"rating": 0,
"type": "",
"id": 123,
"geometry": {
"coordinates": [
12.205061,
12.490463
],
"type": "Point"
}
}
}
]
}
}
I will be very grateful for any help.
Thanks
Try this query instead
{
"query": {
"bool": {
"must": [
{
"match": {
"rating": 0
}
}
],
"filter": [
{
"geo_distance": {
"geometry.coordinates": [
12.3232,
12.2323
],
"distance": 200000,
"distance_type": "plane",
"validation_method": "STRICT",
"ignore_unmapped": false,
"boost": 1
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
}

Elastic search top_hits aggregation on nested

I have an index which contains CustomerProfile documents. Each of this document in the CustomerInsightTargets(with the properties Source,Value) property can be an array with x items. What I am trying to achieve is an autocomplete (of top 5) on CustomerInsightTargets.Value grouped by CustomerInisghtTarget.Source.
It will be helpful if anyone gives me hint about how to select only a subset of nested objects from each document and use that nested obj in aggregations.
{
"customerinsights": {
"aliases": {},
"mappings": {
"customerprofile": {
"properties": {
"CreatedById": {
"type": "long"
},
"CreatedDateTime": {
"type": "date"
},
"CustomerInsightTargets": {
"type": "nested",
"properties": {
"CustomerInsightSource": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"CustomerInsightValue": {
"type": "text",
"term_vector": "yes",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ngram_tokenizer_analyzer"
},
"CustomerProfileId": {
"type": "long"
},
"Guid": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
}
}
},
"DisplayName": {
"type": "text",
"term_vector": "yes",
"analyzer": "ngram_tokenizer_analyzer"
},
"Email": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
},
"ImageUrl": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
},
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "customerinsights",
"creation_date": "1484860145041",
"analysis": {
"analyzer": {
"ngram_tokenizer_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "10"
}
}
},
"number_of_replicas": "2",
"uuid": "nOyI0O2cTO2JOFvqIoE8JQ",
"version": {
"created": "5010199"
}
}
}
}
}
Having as example a document:
{
{
"Id": 9072856,
"CreatedDateTime": "2017-01-12T11:26:58.413Z",
"CreatedById": 9108469,
"DisplayName": "valentinos",
"Email": "valentinos#mail.com",
"CustomerInsightTargets": [
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Tags",
"CustomerInsightValue": "Tag1",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "ProfileName",
"CustomerInsightValue": "valentinos",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Playground",
"CustomerInsightValue": "Wiki",
"Guid": "00000000-0000-0000-0000-000000000000"
}
]
}
}
If i ran an aggregation on the top_hits the result will include all targets from a document -> if one of them match my search text.
Example
GET customerinsights/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "CustomerInsightTargets",
"query": {
"bool": {
"must": [
{
"match": {
"CustomerInsightTargets.CustomerInsightValue": {
"query": "2017",
"operator": "AND",
"fuzziness": 2
}
}
}
]
}
}
}
}
]
}
} ,
"aggs": {
"root": {
"nested": {
"path": "CustomerInsightTargets"
},
"aggs": {
"top_tags": {
"terms": {
"field": "CustomerInsightTargets.CustomerInsightSource.keyword"
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"size": 5,
"_source": "CustomerInsightTargets"
}
}
}
}
}
}
},
"size": 0,
"_source": "CustomerInsightTargets"
}
My question is how I should use the aggregation to get the "autocomplete" Values grouped by Source and order by the _score. I tried to use a significant_terms aggregation but doesn't work so well, also terms aggs doesn't sort by score (and by _count) and having fuzzy also adds complexity.

elasticsearch not returning text when entered partial word

I have my analyzers set like this:
"analyzer": {
"edgeNgram_autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "autocomplete"]
},
"full_name": {
"filter":["standard","lowercase","asciifolding"],
"type":"custom",
"tokenizer":"standard"
}
My filter:
"filter": {
"autocomplete": {
"type": "edgeNGram",
"side":"front",
"min_gram": 1,
"max_gram": 50
}
Name field analyzer:
"textbox": {
"_parent": {
"type": "document"
},
"properties": {
"text": {
"fields": {
"text": {
"type":"string",
"analyzer":"full_name"
},
"autocomplete": {
"type": "string",
"index_analyzer": "edgeNgram_autocomplete",
"search_analyzer": "full_name",
"analyzer": "full_name"
}
},
"type":"multi_field"
}
}
}
Put all together, makes up my mapping for docstore index:
PUT http://localhost:9200/docstore
{
"settings": {
"analysis": {
"analyzer": {
"edgeNgram_autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": ["lowercase", "autocomplete"]
},
"full_name": {
"filter":["standard","lowercase","asciifolding"],
"type":"custom",
"tokenizer":"standard"
}
},
"filter": {
"autocomplete": {
"type": "edgeNGram",
"side":"front",
"min_gram": 1,
"max_gram": 50
} }
}
},
"mappings": {
"space": {
"properties": {
"name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"document": {
"_parent": {
"type": "space"
},
"properties": {
"name": {
"type": "string",
"index": "not_analyzed"
}
}
},
"textbox": {
"_parent": {
"type": "document"
},
"properties": {
"bbox": {
"type": "long"
},
"text": {
"fields": {
"text": {
"type":"string",
"analyzer":"full_name"
},
"autocomplete": {
"type": "string",
"index_analyzer": "edgeNgram_autocomplete",
"search_analyzer": "full_name",
"analyzer":"full_name"
}
},
"type":"multi_field"
}
}
},
"entity": {
"_parent": {
"type": "document"
},
"properties": {
"bbox": {
"type": "long"
},
"name": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
Add a space to hold all docs:
POST http://localhost:9200/docstore/space
{
"name": "Space 1"
}
When user enters word: proj
this should return, all text:
SampleProject
Sample Project
Project Name
myProjectname
firstProjectName
my ProjectName
But it returns nothing.
My query:
POST http://localhost:9200/docstore/textbox/_search
{
"query": {
"match": {
"text": "proj"
}
},
"filter": {
"has_parent": {
"type": "document",
"query": {
"term": {
"name": "1-a1-1001.pdf"
}
}
}
}
}
If I search by project, I get:
{ "took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 3.0133555,
"hits": [
{
"_index": "docstore",
"_type": "textbox",
"_id": "AVRuV2d_f4y6IKuxK35g",
"_score": 3.0133555,
"_routing": "AVRuVvtLf4y6IKuxK33f",
"_parent": "AVRuV2cMf4y6IKuxK33g",
"_source": {
"bbox": [
8750,
5362,
9291,
5445
],
"text": [
"Sample Project"
]
}
},
{
"_index": "docstore",
"_type": "textbox",
"_id": "AVRuV2d_f4y6IKuxK35Y",
"_score": 2.4106843,
"_routing": "AVRuVvtLf4y6IKuxK33f",
"_parent": "AVRuV2cMf4y6IKuxK33g",
"_source": {
"bbox": [
8645,
5170,
9070,
5220
],
"text": [
"Project Name and Address"
]
}
}
]
}
}
Maybe my edgengram is not suited for this?
I am saying:
side":"front"
Should I do it differently?
Does anyone know what I am doing wrong?
The problem is with the autocomplete indexing analyzer field name.
Change:
"index_analyzer": "edgeNgram_autocomplete"
To:
"analyzer": "edgeNgram_autocomplete"
And also search like (#Andrei Stefan) showed in his answer:
POST http://localhost:9200/docstore/textbox/_search
{
"query": {
"match": {
"text.autocomplete": "proj"
}
}
}
And it will work as expected!
I have tested your configuration on Elasticsearch 2.3
By the way, type multi_field is deprecated.
Hope I have managed to help :)
Your query should actually try to match on text.autocomplete and not text:
"query": {
"match": {
"text.autocomplete": "proj"
}
}

Resources