Elasticsearch: Ordering with respect to Nested Sum Aggregation - sorting

The use case is to create aggregations to find top selling products in a region sorted by the counts of products sold. The data is stored in an index in elasticsearch.
I want to sort my 'group_by_name' aggregations output by the sum_quantity value aggregation which is in the last nested aggregation/ two levels in after 'group_by_name' with an intermediate aggregation 'group_by_sku'. The default output is sorted by doc_count. I want the aggregation to be sorted by the 'sum_quantity' aggregation value.
I have an index with the following mapping:
{"settings": {
"index": {
"number_of_shards": 2,
"number_of_replicas": 0
},
"analysis":{
"analyzer":{
"autocomplete":{
"type":"custom",
"tokenizer":"standard",
"filter":[ "standard", "lowercase", "ngram" ]
}
},
"filter":{
"ngram":{
"type":"ngram",
"min_gram":3,
"max_gram":25
}
}
} },"mappings": {
"farmer_products_map":{
"properties": {
"state": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"district": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"taluka": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"village": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"createdOn": {
"type": "date",
"format": "epoch_millis"
},
"category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"productName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
},
"autocomplete":{
"analyzer": "autocomplete",
"type": "string"
}
}
},
"crop": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"quantity": {
"type": "integer"
},
"farmerId": {
"type": "integer"
},
"orderId": {
"type": "integer"
}
}} }}
The following are the sample documents:
[{
"_index": "farmer_products_index_adv",
"_type": "farmer_products_map",
"_id": "AVtCttf0IP9v8cUTtoiz",
"_score": 1,
"_source": {
"orderId": 469173,
"category": "Hardware",
"farmerId": 509583,
"district": "",
"brand": "Honda",
"taluka": "",
"crop": "",
"productName": "Honda BRUSH CUTTER UMK 435-T U2NT",
"state": "",
"sku": "AGS-HW-471",
"village": "",
"quantity": 1
},{
"_index": "farmer_products_index_adv",
"_type": "farmer_products_map",
"_id": "AVtCttf0IP9v8cUTtoi1",
"_score": 1,
"_source": {
"orderId": 469177,
"category": "Crop Nutrition",
"farmerId": 13732,
"district": "Banaskantha",
"brand": "Unassigned Brand",
"taluka": "Kankrej",
"crop": "",
"productName": "Free Power Gel - Plant Nutrient (500 Ml)",
"state": "Gujarat",
"sku": "AGS-CN-006",
"village": "Nanota",
"quantity": 1
}}]
I wish to perform the following query aggregation:
{
"query": {
"bool": {
"must": [{
"match": {
"state": {
"query": "Maharashtra",
"fuzziness": 3,
"prefix_length": 2
}
}
}, {
"match": {
"district": {
"query": "Wardha",
"fuzziness": 3,
"prefix_length": 2
}
}
}, {
"match": {
"taluka": {
"query": "Wardha",
"fuzziness": 3,
"prefix_length": 2
}
}
}]
}
},
"size": 0,
"aggs": {
"group_by_state": {
"terms": {
"field": "state.keyword"
},
"aggs": {
"group_by_district": {
"terms": {
"field": "district.keyword"
},
"aggs": {
"group_by_taluka": {
"terms": {
"field": "taluka.keyword"
},
"aggs": {
"group_by_name": {
"terms": {
"field": "productName.keyword"
},
"aggs": {
"group_by_sku": {
"terms": {
"field": "sku.keyword"
},
"aggs": {
"sum_quantity": {
"sum": {
"field": "quantity"
}
}
}
}
}
}
}
}
}
}
}
}
}}
The current output of the aggregation is:
[{
"key": "Free MH HDPE Tarpaulin Tape Black 3mtr roll",
"doc_count": 13,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-410",
"doc_count": 13,
"sum_quantity": {
"value": 13
}
}]}}, {
"key": "Tarpaulin Sheet 11*15 (Tadpatri) 250 GSM",
"doc_count": 10,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-326",
"doc_count": 10,
"sum_quantity": {
"value": 10
}
}]
}}, {
"key": "Free Humic power Advanced powder 95% (250 Gms)",
"doc_count": 6,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-CN-036",
"doc_count": 6,
"sum_quantity": {
"value": 18
}
}]
}}]
I want the output to be sorted by the sum_quantity value:
[{
"key": "Free Humic power Advanced powder 95% (250 Gms)",
"doc_count": 6,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-CN-036",
"doc_count": 6,
"sum_quantity": {
"value": 18
}
}]}}, {
"key": "Free MH HDPE Tarpaulin Tape Black 3mtr roll",
"doc_count": 13,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-410",
"doc_count": 13,
"sum_quantity": {
"value": 13
}
}]
}}, {
"key": "Tarpaulin Sheet 11*15 (Tadpatri) 250 GSM",
"doc_count": 10,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-326",
"doc_count": 10,
"sum_quantity": {
"value": 10
}
}]
}}]
How can I achieve this? I tried approaches suggested in other stackoverflow questions using "reverse_nested" but was unable to arrive at a solution.

Related

elastic search nested sub aggregations

We are using elastic search which holds records as documents with following definition
{
"loadtender": {
"aliases": {},
"mappings": {
"_doc": {
"_meta": {
"version": 20
},
"properties": {
"carrierId": {
"type": "long"
},
"destinationData": {
"type": "keyword"
},
"destinationZip": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 50
}
}
},
"effStartTime": {
"type": "date"
},
"endTime": {
"type": "date"
},
"id": {
"type": "long"
},
"mustRespondByTime": {
"type": "date"
},
"orgdiv": {
"type": "keyword"
},
"originData": {
"type": "keyword"
},
"originZip": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 50
}
}
},
"purchaseOrderNum": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 255
}
}
},
"startTime": {
"type": "date"
},
"tenderStatus": {
"type": "keyword"
},
"tenderedTime": {
"type": "date"
}
}
}
},
"settings": {
"index": {
"creation_date": "1655105542470",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "ohcXgA8EQ5iJj0X6_4BqXA",
"version": {
"created": "6080499"
},
"provided_name": "loadtender"
}
}
}
}
I am trying to search records to return me following filtered results
Input Parameter : startDate (yesterday), originData.originCity and originData.destinationCity
Output Required:
Three buckets for 0-30 days, 30-60 days and 60-90 days
buckets of distinct originData.city and destinationData.city combinations under each of the above
Under each of the above, buckets of data for each unique carrierId and the corresponding record list / count
Basically I was trying to achieve something like the below
{
"aggregations": {
"aggr": {
"buckets": [
{
"key": "0-30 days",
"doc_count": 10,
"aggr": {
"buckets": [
{
"key": "(originCity)Menasha, WI, US|Hanover, MD, US (DestinationCity)",
"aggr": {
"buckets": [
{
"key": "10183-carrierId",
"count": 10
}
]
}
}
]
}
},
{
"key": "30-60 days",
"doc_count": 11,
"aggr": {
"buckets": [
{
"key": "Dallas, TX, US|Houston, TX, US",
"aggr": {
"buckets": [
{
"key": "10183-carrierId",
"count": 10
},
{
"key": "10022-carrierId",
"count": 1
}
]
}
}
]
}
}
]
}
}
}
I've tried the following but I think I am not finding a way to filter it further using the sub aggregators.
{
"_source":["id", "effStartTime", "carrierId", "originData", "destinationData"],
"size": 100,
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"range": {
"startTime": {
"from": "2021-08-27T23:59:59.000Z",
"to": "2022-09-01T00:00:00.000Z",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
],
"must_not": [
{
"term": {
"tenderStatus": {
"value": "REMOVED",
"boost": 1
}
}
}
],
"filter" : {
"exists" : {
"field" : "carrierId"
}
},
"adjust_pure_negative": true,
"boost": 1
}
},
"aggregations": {
"aggr": {
"terms": {
"script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]"
}
}
}
}
I started beginning to think if this is even possible OR should I shift to issuing multiple queries for the same
I was able to achieve the same using the following sub-aggregations:
"aggregations": {
"aggr":{
"date_range": {
"field": "startTime",
"format": "MM-yyyy",
"ranges": [
{"to": "now-1M/M", "from": "now"}, --> now to 30 days back
{"to": "now-1M/M", "from": "now-2M/M"}, from 30 days back to 60 days back
{"to": "now-2M/M", "from": "now-3M/M"}, from 60 days back to 90 days back
{"to": "now-3M/M", "from": "now-12M/M"}
]
},
"aggregations": {
"aggr":{
"terms": {
"script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]" --> concatenated origin and destination address as a key
},
"aggregations": {
"aggr": {
"terms": {
"field": "carrierId" --> nested carrier count
}
}
}
}
}
}
}
Following is the response template that I receive.
"aggregations": {
"aggr": {
"buckets": [
{
"key": "09-2021-06-2022",
"from": 1630454400000,
"from_as_string": "09-2021",
"to": 1654041600000,
"to_as_string": "06-2022",
"doc_count": 1,
"aggr": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Dallas, TX, US|Houston, TX, US",
"doc_count": 14,
"aggr": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 10022,
"doc_count": 14
}
]
}
}
]
}
}
]
}
}
Thank you to all of you for your efforts and time. Do let me know if you discover any better way.

ElasticSearc - unable to get nested object from multilevel nested object

I have multilevel nested object. the first nested object is category and inside category there is one more nested object is group.
So I want to get distinct category along with nested groups using aggregation query.
am successful to get distinct category but unable to get group detail with it.
Mapping:
"mappings": {
"doc": {
"properties": {
"categories": {
"type": "nested",
"properties": {
"cat_id": {
"type": "integer"
},
"cat_name": {
"type": "keyword"
},
"cat_slug": {
"type": "keyword"
},
"cat_type": {
"type": "long"
},
"groups": {
"type": "nested",
"properties": {
"group": {
"type": "keyword"
},
"group_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
}
}
},
"ordering": {
"type": "integer"
},
"parent_id": {
"type": "integer"
},
"parent_name": {
"type": "keyword"
},
"parent_slug": {
"type": "keyword"
},
"parent_type": {
"type": "long"
}
}
}
}
}
}
Sample data:
{
"_index": "product",
"_type": "doc",
"_id": "18556",
"_score": 1,
"_source": {
"sku": "GR0005P08",
"product_id": 18556,
"slug": "blue-garter-with-sexy-laces",
"categories": [
{
"ordering": 10,
"cat_id": 343,
"cat_type": 1,
"cat_slug": "t-thisr",
"cat_name": "cat1"
},
{
"ordering": 9999999,
"cat_id": 2,
"cat_type": 3,
"cat_slug": "pajams",
"cat_name": "pajams"
},
{
"ordering": 5,
"cat_id": 77,
"cat_type": 3,
"cat_slug": "accessories",
"cat_name": "Accessories"
},
{
"parent_name": "Pajams",
"cat_name": "Night",
"ordering": 1,
"cat_id": 139,
"parent_type": 3,
"cat_slug": "night",
"parent_id": 2,
"groups": [
{
"id": 146,
"group_name": "Shop By Style"
},
{
"id": 481,
"group_name": "Shop By Offer "
}
],
"parent_slug": "pajams",
"cat_type": 1
}
],
"name": "love for pajams"
}
}
This is my aggregation query:
GET product/_search
{
"_source": [
"product_id"
],
"query": {
"nested": {
"path": "categories",
"query": {
"bool": {
"must": [
{
"match": {
"categories.cat_slug": "xyz"
}
}
]
}
}
}
},
"aggs": {
"categories": {
"nested": {
"path": "categories"
},
"aggs": {
"distinct_categories.cat_name": {
"terms": {
"field": "categories.cat_name"
},
"aggs": {
"categories.groups.group_name": {
"terms": {
"field": "categories.groups.group_name.keyword"
}
}
}
}
}
}
}
}
And this is my Response:
{
"took": 11,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 8,
"max_score": 3.232121,
"hits": [
{
"_index": "product",
"_type": "doc",
"_id": "15621",
"_score": 3.232121,
"_source": {
"product_id": 15621
}
},
{
"_index": "product",
"_type": "doc",
"_id": "18556",
"_score": 2.5758784,
"_source": {
"product_id": 18556
}
}
]
},
"aggregations": {
"categories": {
"doc_count": 98,
"distinct_categories.cat_name": {
"doc_count_error_upper_bound": 2,
"sum_other_doc_count": 50,
"buckets": [
{
"key": "Accessories",
"doc_count": 8,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "T-shirt",
"doc_count": 8,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Sexy",
"doc_count": 7,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "clothing",
"doc_count": 6,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Pants",
"doc_count": 6,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Colour Me",
"doc_count": 4,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Pajamas",
"doc_count": 3,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
]
}
}
}
}
Probably try "field": "categories.groups.group_name" instead of "field": "categories.groups.group_name.keyword" in you deepest aggregation?
Edit:
Using "categories.groups.group_name.keyword" is the correct way to achieve this. The problem should be that you would need to add nested for each level of your nested structure for your query.
"aggs": {
"categories": {
"nested": {
"path": "categories"
},
"aggs": {
"distinct_categories.cat_name": {
"terms": {
"field": "categories.cat_name"
},
"aggs": {
"deeper_nested_agg": {
"nested": {
"path": "categories.groups"
},
"aggs": {
"categories.groups.group_name": {
"terms": {
"field": "categories.groups.group_name.keyword"
}
}
}
}
}
}
}
}
Please give it a try. Hope this helps!
I found issue with mapping definition of groups after changing query work fine.
{
"type": "nested",
"properties": {
"group_name": {
"type": "keyword"
},
"id": {
"type": "long"
}
}
}

How to group documents by hour without day in elasticsearch?

i have a application who a survey is asked every day by users, and i want to have average answers hours. I tried some request but i can't group all documents by hours, it's grouped by hour by day..
I do this :
{
"aggs": {
"byHour": {
"date_histogram": {
"field": "date",
"interval": "hour",
"format" : "H"
}
}
}
}
}
It's wrapped by hour but also by date, and i want day as ignored.
[
{
"key_as_string": "0",
"key": 1533945600000,
"doc_count": 40,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "1",
"key": 1533949200000,
"doc_count": 345,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "23",
"key": 1534028400000,
"doc_count": 15,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "0",
"key": 1534032000000,
"doc_count": 0,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "1",
"key": 1534035600000,
"doc_count": 2,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "2",
"key": 1534039200000,
"doc_count": 3,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
]
Mapping of type
{
"myIndex": {
"mappings": {
"answer": {
"properties": {
"date": {
"type": "date"
},
"lang": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"level": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"offset": {
"type": "long"
},
"patientCaretrackId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"protocolId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"query": {
"properties": {
"constant_score": {
"properties": {
"filter": {
"properties": {
"bool": {
"properties": {
"must": {
"properties": {
"term": {
"properties": {
"questionId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"questionnaireId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
}
}
}
}
}
},
"questionId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"questionnaireId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"surgeonId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"fielddata": true
}
}
}
}
}
}
Example of documents :
[
{
"date": "2018-09-11T00:00:00.000Z",
"lang": "fr",
"level": "red",
"offset": 21,
"patientCaretrackId": "5b894b10a9f7afec73762113",
"protocolId": "ptg-koos-long-v1",
"questionnaireId": "j21",
"surgeonId": "699362de-f040-4799-b1ea-53f5b4a2fe03",
"value": "permanentes",
"questionId": "frequence-douleur-2"
},
{
"date": "2018-09-11T00:00:00.000Z",
"lang": "fr",
"level": "red",
"offset": 21,
"patientCaretrackId": "5b894b10a9f7afec73762113",
"protocolId": "ptg-koos-long-v1",
"questionnaireId": "j21",
"surgeonId": "699362de-f040-4799-b1ea-53f5b4a2fe03",
"value": "permanentes",
"questionId": "frequence-douleur-2"
}
]
It's possible to do this with an query Elasticsearch ?
Thank you,
You can use a terms aggregation with a script:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-script
GET /_search
{
"aggs" : {
"hours" : {
"terms" : {
"script" : {
"source": "doc['date'].getHour()",
"lang": "painless"
}
}
}
}
}
(Just to give you an idea, not sure at all about the script itself ..)
I found thx
{
"size": 0,
"aggs": {
"byHour": {
"date_histogram": {
"field": "date",
"interval": "hour",
"format" : "H",
"keyed": true,
"time_zone": "+02:00"
}
}
}
}
The response :
{
"took": 9,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2796,
"max_score": 0,
"hits": []
},
"aggregations": {
"byHour": {
"buckets": {
"17": {
"key_as_string": "17",
"key": 1536159600000,
"doc_count": 2006
},
"18": {
"key_as_string": "18",
"key": 1536163200000,
"doc_count": 790
}
}
}
}
}

how do I implement a single-word auto complete using Elasticsearch 6

I would like to implement an single word autocomplete using elasticsearch 6. I have seen a fair amount of posts on how to do this using lesser versions however, it seems that autocomplete has changed significantly in the last version.
I am using the standard mapping for autocomplete:
PUT advertising_tins
{
"settings": {
"analysis": {
"analyzer": {
"completion_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"completion_filter"
],
"tokenizer": "keyword"
}
},
"filter": {
"completion_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 24
}
}
}
},
"mappings": {
"item": {
"properties": {
"date": {
"type": "long"
},
"id": {
"type": "text"
},
"title": {
"type": "text"
},
"suggest": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
},
"completion": {
"type": "text",
"analyzer": "completion_analyzer",
"search_analyzer": "standard"
}
}
}
}
}
}
}
I am indexing like this:
POST advertising_tins/item/_bulk
{"index":{}}
{"date": 20180217, "title": "Vintage Spice Cardboard Tin of Mace Dainty Brand St. Paul, MN 1 oz.","id": "305232814","suggest": [ "spice","cardboard","tin","mace","dainty","brand","st","paul","mn","oz"]}
And querying like this:
POST advertising_tins/_search?pretty
{
"size": 0,
"query": {
"term": {
"suggest.completion": "car"
}
},
"aggs": {
"suggestions": {
"terms": {
"field": "suggest.raw"
}
}
}
}
However my results return all terms in the suggest field instead of just single term "cardboard".
{
"took": 4,
"hits": {
"total": 1,
"max_score": 0,
"hits": []
},
"aggregations": {
"suggestions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "brand",
"doc_count": 1
},
{
"key": "cardboard",
"doc_count": 1
},
{
"key": "dainty",
"doc_count": 1
},
{
"key": "mace",
"doc_count": 1
},
{
"key": "mn",
"doc_count": 1
},
{
"key": "oz",
"doc_count": 1
},
{
"key": "paul",
"doc_count": 1
},
{
"key": "spice",
"doc_count": 1
},
{
"key": "st",
"doc_count": 1
},
{
"key": "tin",
"doc_count": 1
}
]
}
}
}
And idea how I fix this and get just a single term match?
You are almost there. It can be achieved with the default Completion Suggester, you only need to change the type of your completion field to "completion":
"mappings": {
"item": {
"properties": {
"date": {
"type": "long"
},
"id": {
"type": "text"
},
"title": {
"type": "text"
},
"suggest": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
},
"completion": {
"type" : "completion", <--- here
"analyzer": "completion_analyzer",
"search_analyzer": "standard"
}
}
}
}
}
}
And add a "suggest" part into your query:
POST advertising_tins/_search
{
"size": 0,
"query": {
"term": {
"suggest.completion": "car"
}
},
"suggest" : { <--- Here goes he suggest query
"my-suggestion" : {
"text" : "car",
"completion" : {
"field" : "suggest.completion"
}
}
},
"aggs": {
"suggestions": {
"terms": {
"field": "suggest.raw"
}
}
}
}
The response will look like this:
{
// ...
"hits": //... ,
"aggregations": // ...,
"suggest": {
"my-suggestion": [
{
"text": "car",
"offset": 0,
"length": 3,
"options": [
{
"text": "cardboard", <--- here is the suggestion
"_index": "advertising_tins",
"_type": "item",
"_id": "GLeUqGEBVrFe7u7pR5uA",
"_score": 1,
"_source": {
"date": 20180217,
"title": "Vintage Spice Cardboard Tin of Mace Dainty Brand St. Paul, MN 1 oz.",
"id": "305232814",
"suggest": [
"spice",
"cardboard",
"tin",
"mace",
"dainty",
"brand",
"st",
"paul",
"mn",
"oz"
]
}
}
]
}
]
}
}
The response also includes the _source of the suggested document, so you might not even need to use "query" and "aggs" parts.
Hope that helps!

Further filtering of aggregations

I have a question regarding aggregation in elastic search. I have a document like the following:
{
"_index": "products",
"_type": "product",
"_id": "ID-12345",
"_score": 1,
"_source": {
"created_at": "2017-08-04T17:56:44.592Z",
"updated_at": "2017-08-04T17:56:44.592Z",
"product_information": {
"sku": "12345",
"name": "Product Name",
"price": 25,
"brand": "Brand Name",
"url": "URL"
},
"product_detail": {
"description": "Product description text here.",
"string_facets": [
{
"facet_name": "Colour",
"facet_value": "Grey"
},
{
"facet_name": "Category",
"facet_value": "Linen"
},
{
"facet_name": "Category",
"facet_value": "Throws & Blanket"
},
{
"facet_name": "Keyword",
"facet_value": "Contemporary"
},
{
"facet_name": "Keyword",
"facet_value": "Sophisticated"
}
]
}
}
}
I am storing product information such as Colour, Material, Category and Keywords within the product_detail.string_facets field. I'd like to use this for aggregation to get Colour/Material/Category/Keyword suggestions but as separate buckets. I.e, there is a separate bucket for each of those string_facet types as defined in product_detail.string_facets.facet_name.
This is the query I have at the moment which is returning data, but not as I expect. First the query (this was just to try and get Colours):
{
"from": 0,
"size": 12,
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Rug",
"fields": ["product_information.name", "product_detail.string_facets.facet_value"]
}
},
{
"multi_match": {
"query": "Blue",
"fields": ["product_information.name", "product_detail.string_facets.facet_name"]
}
}
],
"minimum_should_match": "100%"
}
},
"aggs": {
"suggestions": {
"filter": { "term": { "product_detail.string_facets.facet_name.keyword": "Colour" }},
"aggs": {
"colours": {
"terms": {
"field": "product_detail.string_facets.facet_value.keyword",
"size": 10
}
}
}
}
}
}
This is giving me output like the following:
"aggregations": {
"suggestions": {
"doc_count": 21,
"colours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 23,
"buckets": [
{
"key": "Rug",
"doc_count": 21
},
{
"key": "Blue",
"doc_count": 18
},
{
"key": "Bold",
"doc_count": 7
},
{
"key": "Modern",
"doc_count": 6
},
{
"key": "Multi-Coloured",
"doc_count": 5
},
{
"key": "Contemporary",
"doc_count": 4
},
{
"key": "Traditional",
"doc_count": 4
},
{
"key": "White",
"doc_count": 4
},
{
"key": "Luxurious",
"doc_count": 3
},
{
"key": "Minimal",
"doc_count": 3
}
]
}
}
}
It has given me the results of all facet_name rather those of facet_type Colour as I thought it would.
Any help would be greatly appreciated. Elasticsearch seems very powerful but the documentation is quite daunting!
You did not show how the mapping looks like, but I suppose that product_detail.string_facets field is just an inner object field and that is the reason why you get this kind of result. With this type of mapping Elasticsearch flattens the array into a simple list of field names and values. In your case it becomes:
{
"product_detail.string_facets.facet_name": ["Colour", "Category", "Keyword"],
"product_detail.string_facets.facet_value": ["Grey", "Linen", "Throws & Blanket", "Contemporary", "Sophisticated"]
}
As you can see, based on this structure, Elasticsearch cannot know how to aggregate the data.
To make it work product_detail.string_facets field should be of type nested. Mapping for string_facets should be similar to this (note "type": "nested"):
"string_facets": {
"type": "nested",
"properties": {
"facet_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"facet_value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
Now I index following document:
{
"created_at": "2017-08-04T17:56:44.592Z",
"updated_at": "2017-08-04T17:56:44.592Z",
"product_information": {
"sku": "12345",
"name": "Rug",
"price": 25,
"brand": "Brand Name",
"url": "URL"
},
"product_detail": {
"description": "Product description text here.",
"string_facets": [
{
"facet_name": "Colour",
"facet_value": "Blue"
},
{
"facet_name": "Colour",
"facet_value": "Red"
},
{
"facet_name": "Category",
"facet_value": "Throws & Blanket"
},
{
"facet_name": "Keyword",
"facet_value": "Contemporary"
}
]
}
}
Now, to get aggregation of colour suggestions as separate buckets, you can try this query (I simplified the bool query for the need of my document):
{
"from": 0,
"size": 12,
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Rug",
"fields": ["product_information.name", "product_detail.string_facets.facet_value"]
}
}
]
}
},
"aggs": {
"facets": {
"nested" : {
"path" : "product_detail.string_facets"
},
"aggs": {
"suggestions": {
"filter": { "term": { "product_detail.string_facets.facet_name.keyword": "Colour" }},
"aggs": {
"colours": {
"terms": {
"field": "product_detail.string_facets.facet_value.keyword",
"size": 10
}
}
}
}
}
}
}
}
And result:
{
...,
"hits": {
...
},
"aggregations": {
"facets": {
"doc_count": 5,
"suggestions": {
"doc_count": 2,
"colours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Blue",
"doc_count": 1
},
{
"key": "Red",
"doc_count": 1
}
]
}
}
}
}
}

Resources