Agg counts on nested objects in elasitcsearch query count only once - elasticsearch

we have a mapping with nested objects.
We use it for storing variants in an ecommerce environment.
To build filters in ecommerce we check agg bucket counts.
What happens now is that we get agg counts for every variant that this matching.
But i would like to count only once per parent (even if more than one children match).
here is our mapping:
{
"products": {
"mappings": {
"dynamic": "false",
"properties": {
"product_id": {
"type": "keyword"
},
"variants": {
"type": "nested",
"properties": {
"brand": {
"type": "keyword"
},
"color": {
"type": "keyword"
}
}
}
}
}
}
example of documents:
{
"_index": "products",
"_type": "_doc",
"_id": "5e42f759de235d5b42e6e35524141bd2cfcbc5d0",
"_score": 1.0,
"_source": {
"product_id": "5e42f759de235d5b42e6e35524141bd2cfcbc5d0",
"variants": [
{
"color": [
"Black"
]
"brand": "Fackelmann"
},
{
"color": [],
"brand": "Fackelmann"
},
{
"color": [],
"brand": "Fackelmann",
},
{
"color": [],
"brand": "Fackelmann"
}
]
}
},
{
"_index": "products",
"_type": "_doc",
"_id": "768af68018654b04f20d32003348ee9bd81d9f65",
"_score": 1.0,
"_source": {
"product_id": "768af68018654b04f20d32003348ee9bd81d9f65",
"variants": [
{
"color": [
"Grey"
]
"brand": "Fackelmann"
},
{
"color": [],
"brand": "Fackelmann"
},
{
"color": [],
"brand": "Fackelmann",
},
{
"color": [],
"brand": "IKEA"
}
]
}
}
in this example we get those counts for aggs:
brand:Fackelmann[7]
brand:IKEA[1]
color:Grey[1]
color:Black[1]
What i expect to get is:
brand:Fackelmann[2]
brand:IKEA[1]
color:Grey[1]
color:Black[1]

Related

ElasticSearch wildcard highlighting with hyphen

I am having trouble with wildcard query. When i have some hyphen - it does not highlight anything after it. I played with highlight settings but did not found any solution yet. Is it normal behavior?
I am making some index:
PUT testhighlight
PUT testhighlight/_mapping/_doc
{
"properties": {
"title": {
"type": "text",
"term_vector": "with_positions_offsets"
},
"content": {
"type": "text",
"term_vector": "with_positions_offsets"
}
}
}
Then i create documents:
PUT testhighlight/_doc/1
{
"title": "1",
"content": "test-input"
}
PUT testhighlight/_doc/2
{
"title": "2",
"content": "test input"
}
PUT testhighlight/_doc/3
{
"title": "3",
"content": "testinput"
}
Then i execute this search request:
GET testhighlight/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"title",
"content"
],
"query": "test*"
}
}
]
}
},
"highlight": {
"fields": {
"content": {
"boundary_max_scan": 10,
"fragment_offset": 5,
"fragment_size": 250,
"type": "fvh",
"number_of_fragments": 5,
"order": "score",
"boundary_scanner": "word",
"post_tags": [
"</span>"
],
"pre_tags": [
"""<span class="highlight-search">"""
]
}
}
}
}
It returns these hits:
"hits": [
{
"_index": "testhighlight",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"title": "2",
"content": "test input"
},
"highlight": {
"content": [
"""<span class="highlight-search">test</span> input"""
]
}
},
{
"_index": "testhighlight",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"title": "1",
"content": "test-input"
},
"highlight": {
"content": [
"""<span class="highlight-search">test</span>-input"""
]
}
},
{
"_index": "testhighlight",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"title": "3",
"content": "testinput"
},
"highlight": {
"content": [
"""<span class="highlight-search">testinput</span>"""
]
}
}
]
It looks alright, but didn't highlighted the whole "test-input" in document with ID 1. Is there any way to do so?

How can I prioritize documents in Elasticsearch query

I have products in my index. Documents are basically structured like these:
{
"_id": "product",
"_source": {
...
"type": "product",
"id": 1,
"mainTaxon": {
"name": "T-SHIRT",
},
"attributes": [
{
"code": "name",
"name": "Name",
"value": [
"BANANA T-SHIRT"
],
"score": 50
},
]
}
},
{
"_id": "product",
"_source": {
...
"type": "product",
"id": 2,
"mainTaxon": {
"name": "JEANS",
},
"attributes": [
{
"code": "name",
"name": "Name",
"value": [
"BANANA JEANS"
],
"score": 50
},
]
}
}
}
When I search for 'BANANA' I would prioritize products with mainTaxon different from JEANS. So, every product with the mainTaxon name T_SHIRT or something else would be listed before products with mainTaxon JEANS.
You can use boosting query to prioritize documents
{
"query": {
"boosting": {
"positive": {
"match": {
"attributes.value": "banana"
}
},
"negative": {
"match": {
"mainTaxon.name": "JEANS"
}
},
"negative_boost": 0.5
}
}
}
Search Result will be
"hits": [
{
"_index": "67164768",
"_type": "_doc",
"_id": "1",
"_score": 0.5364054,
"_source": {
"type": "product",
"id": 1,
"mainTaxon": {
"name": "T-SHIRT"
},
"attributes": [
{
"code": "name",
"name": "Name",
"value": [
"BANANA T-SHIRT"
],
"score": 50
}
]
}
},
{
"_index": "67164768",
"_type": "_doc",
"_id": "2",
"_score": 0.32743764,
"_source": {
"type": "product",
"id": 2,
"mainTaxon": {
"name": "JEANS"
},
"attributes": [
{
"code": "name",
"name": "Name",
"value": [
"BANANA JEANS"
],
"score": 50
}
]
}
}
]

Search across _all field in Elastic and return results with highlighting

I am using Elastic 5.4 and wanted to query across index containing documents of multiple types.(type a and type b). Below are example documents in the index:
Documents:
{
"_index": "test",
"_type": "a",
"_id": "1",
"_source": {
"id": "1",
"name": "john-usa-soccer",
"class": "5",
"lastseen": "2017-07-05",
"a_atts": {
"lastname": "tover",
"hobby": "soccer",
"country": "usa"
}
}
}
{
"_index": "test",
"_type": "b",
"_id": "2",
"_source": {
"id": "2",
"name": "john-usa",
"class": "5",
"lastseen": "2017-07-05",
"b_atts": {
"lastname": "kaml",
"hobby": "baseball",
"country": "usa"
}
}
}
Mapping:
{
"settings": {
"analysis": {
"analyzer": {
"my_ngram_analyzer": {
"tokenizer": "my_ngram_tokenizer"
}
},
"tokenizer": {
"my_ngram_tokenizer": {
"type": "ngram",
"min_gram": "3",
"max_gram": "3",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"a": {
"dynamic_templates": [
{
"strings": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "text",
"analyzer": "my_ngram_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"suggest": {
"type": "completion",
"analyzer": "simple"
},
"analyzer1": {
"type": "text",
"analyzer": "simple"
},
"analyzer2": {
"type": "text",
"analyzer": "standard"
}
}
}
}
}
]
},
"b": {
"dynamic_templates": [
{
"strings": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "text",
"analyzer": "my_ngram_analyzer",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"suggest": {
"type": "completion",
"analyzer": "simple"
},
"analyzer1": {
"type": "text",
"analyzer": "simple"
},
"analyzer2": {
"type": "text",
"analyzer": "standard"
}
}
}
}
}
]
}
}
}
My query is to search all documents which contain 'john' across any of the fields in any type and highlight the fields where the match was found. This query is constructed as per Elastic documentation. My Schema mappings has ngram_analyzer configured as analyzer instead of default analyzer for all fields of type string in the schema.
Query: http://localhost:9200/student/_search
{
"query": {
"bool": {
"should": [
{ "match": { "_all": "john"} }
]
}
},
"highlight": {
"fields": {
"name": {
"require_field_match": false
},
"a_atts.lastname":{
"require_field_match": false
},
"a_atts.hobby":{
"require_field_match": false
},
"a_atts.country":{
"require_field_match": false
}
}
}
}
Response:
{
"took": 79,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.17669111,
"hits": [
{
"_index": "student",
"_type": "a",
"_id": "AV1WjBeYEZrDBYsdGMtY",
"_score": 0.17669111,
"_source": {
"name": "john-usa-soccer",
"class": "5",
"lastseen": "2017-07-05",
"a_atts": {
"lastname": "tover",
"hobby": "soccer",
"country": "usa"
}
}
},
{
"_index": "student",
"_type": "b",
"_id": "AV1WjHFxEZrDBYsdGMtZ",
"_score": 0.17669111,
"_source": {
"name": "john-usa",
"class": "5",
"lastseen": "2017-07-05",
"b_atts": {
"lastname": "kaml",
"hobby": "baseball",
"country": "usa"
}
}
}
]
}
}
However, executing the above query against an index, returns documents matched with their _source content but not highlight field. It is missing the following:
"highlight": {
"name": [
"<em>john</em>-usa-soccer"
]
}
How can I return highlight in the results?
I got highlighter to work by following the answer provided in this link.
"highlight": {
"fields": {
"*": {}
},
"require_field_match": false
}

How to aggregate on nested objects in elasticsearch

I have the following mapping in ES:
"mappings": {
"products": {
"properties": {
"product": {
"type" : "nested",
"properties": {
"features": {
"type": "nested"
},
"sitedetails": {
"type": "nested"
}
}
}
}
}
}
and then 3 products like this:
"hits": [
{
"_index": "catalog",
"_type": "products",
"_id": "AVNE8F4mFYOWvB4rMqdO",
"_score": 1,
"_source": {
"product": {
"ean": "abc",
"features": {
"productType": "DVD player"
},
"color": "Black",
"manufacturer": "Sony",
"sitedetails": [
{
"name": "amazon.com",
"sku": "zzz",
"url": "http://www.amazon.com/dp/zzz"
}
],
"category": "Portable DVD Players"
}
}
},
{
"_index": "catalog",
"_type": "products",
"_id": "AVNE8XkXFYOWvB4rMqdQ",
"_score": 1,
"_source": {
"product": {
"ean": "def",
"features": {
"ProductType": "MP3 player"
},
"color": "Black",
"manufacturer": "LG",
"sitedetails": [
{
"name": "amazon.com",
"sku": "aaa",
"url": "http://www.amazon.com/dp/aaa"
}
],
"category": "MP3 Players"
}
}
},
{
"_index": "catalog",
"_type": "products",
"_id": "AVNIh-xVWwxj6Cz_r8AT",
"_score": 1,
"_source": {
"product": {
"ean": "abc",
"features": {
"productType": "DVD player"
},
"color": "White",
"manufacturer": "Sony",
"sitedetails": [
{
"name": "amazon.com",
"sku": "ggg",
"url": "http://www.amazon.com/dp/ggg"
}
],
"category": "Portable DVD Players"
}
}
}
]
I need to display on the UI side 2 filters, one for Manufacturer and one for website.
How can I aggregate on product.manufacturer and product.sitedetails.name?
tnx!
Figured it out:
GET /catalog/products/_search
{
"aggs": {
"byManufacturer": {
"nested": {
"path": "product"
},
"aggs": {
"byManufacturer": {
"terms": {
"field": "product.manufacturer"
}
}
}
},
"bySeller": {
"nested": {
"path": "product.sitedetails"
},
"aggs": {
"bySeller": {
"terms": {
"field": "product.sitedetails.name"
}
}
}
}
}
}

Unexpected (case-insensitive) string sorting in Elasticsearch

I have a list of console platforms that I'm sorting in Elasticsearch.
Here is the mapping for the "name" field:
{
"name": {
"type": "multi_field",
"fields": {
"name": {
"type": "string",
"index": "analyzed"
},
"sort_name": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
When I execute the following query
{
"query": {
"match_all": {}
},
"sort": [
{
"name.sort_name": { "order": "asc" }
}
],
"fields": ["name"]
}
I get these results:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"failed": 0
},
"hits": {
"total": 17,
"max_score": null,
"hits": [
{
"_index": "platforms",
"_type": "platform",
"_id": "1393602489",
"_score": null,
"fields": {
"name": "GameCube"
},
"sort": [
"GameCube"
]
},
{
"_index": "platforms",
"_type": "platform",
"_id": "1393602490",
"_score": null,
"fields": {
"name": "Gameboy Advance"
},
"sort": [
"Gameboy Advance"
]
},
{
"_index": "platforms",
"_type": "platform",
"_id": "1393602498",
"_score": null,
"fields": {
"name": "Nintendo 3DS"
},
"sort": [
"Nintendo 3DS"
]
},
...remove for brevity ...
{
"_index": "platforms",
"_type": "platform",
"_id": "1393602493",
"_score": null,
"fields": {
"name": "Xbox 360"
},
"sort": [
"Xbox 360"
]
},
{
"_index": "platforms",
"_type": "platform",
"_id": "1393602502",
"_score": null,
"fields": {
"name": "Xbox One"
},
"sort": [
"Xbox One"
]
},
{
"_index": "platforms",
"_type": "platform",
"_id": "1393602497",
"_score": null,
"fields": {
"name": "iPhone/iPod"
},
"sort": [
"iPhone/iPod"
]
}
]
}
Everything is sorted as expected except the iPhone/iPod result is at the end (instead of after GameBoy Advance) - why does the / in the name have an effect on the sorting?
Thanks
Okay so I discovered the reason wasn't anything to do with the /. ES will sort by capital letters then lower case letters.
I added a custom analyzer to the settings of the index creation:
{
"analysis": {
"analyzer": {
"sortable": {
"tokenizer": "keyword",
"filter": [
"lowercase"
]
}
}
}
}
Then in the field mapping I added 'analyzer': 'sortable' to the sort_name multi field.
Use Normalizer with keyword to handle the sort
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-normalizers.html#analysis-normalizers
PUT index_name
{
"settings": {
"analysis": {
"normalizer": {
"my_normalizer": {
"type": "custom",
"char_filter": ["quote"],
"filter": ["lowercase", "asciifolding"]
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "keyword",
"normalizer": "my_normalizer"
}
}
}
}
Search query may be modified like this
{
"query": {
"match_all": {}
},
"sort": [
{
"name.sort_name": { "order": "asc" }
}
],
"fields": "name.keyword"
}
According to https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-normalizers.html (ElasticSearch 7.16) ...
Elasticsearch ships with a lowercase built-in normalizer.
So you can define an additional field (in the example below named "lowersortable"):
PUT /myindex/_mapping
{
"properties": {
"myproperty": {
"type": "text",
"fields": {
"lowersortable": {
"type": "keyword",
"normalizer": "lowercase"
}
}
}
}
}
... and use this field myproperty.lowersortable for sorting in the search query.

Resources