Sample documents:
{
"id": "62655",
"attributes": [
{
"name": "genre",
"value": "comedy"
},
{
"name": "year",
"value": "2016"
}
]
}
{
"id": "62656",
"attributes": [
{
"name": "genre",
"value": "horror"
},
{
"name": "year",
"value": "2016"
}
]
}
{
"id": "62657",
"attributes": [
{
"name": "language",
"value": "english"
},
{
"name": "year",
"value": "2015"
}
]
}
Expected Output:
{
"hits" : {
"total": 3,
"hits": []
},
"aggregations": {
"attribCount": {
"language": 1,
"genre": 2,
"year": 3
},
"attribVals": {
"language": {
"english": 1
},
"genre": {
"comedy": 1,
"horror": 1
},
"year": {
"2016": 2,
"2015": 1
}
}
}
}
My Query:
I could get the "attribCount" aggregation using below query. But I don't know how to get each attribute value count.
{
"query": {
"filtered": {
"query": {
"match_all": {}
}
}
},
"aggs": {
"attribCount": {
"terms": {
"field": "attributes.name",
"size": 0
}
}
},
"size": 0
}
When I aggregate using attributes.value, it gives overall count. But I need it listed under the name value as given in expected output.
As you say the attribute field is nested.
Try this, this will work
{
"size": 0,
"aggs": {
"count": {
"nested": {
"path": "attributes"
},
"aggs": {
"attribCount": {
"terms": {
"field": "attributes.name"
}
},
"attribVal": {
"terms": {
"field": "attributes.name"
},
"aggs": {
"attribval2": {
"terms": {
"field": "attributes.value"
}
}
}
}
}
}
}
}
Related
I have index of stores at various location. With each store I have a nested list of discount coupon.
Now I have query to get list of all unique coupons in a x km of radius sorted by the distance of the nearest applicable coupon on given location
Database :: Elasticsearch
Index Mapping ::
{
"mappings": {
"car_stores": {
"properties": {
"location": {
"type": "geo_point"
},
"discount_coupons": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
}
}
}
}
}
}
}
Sample Doc ::
{
"_index": "stores",
"_type": "car_stores",
"_id": "1258c81d-b6f2-400f-a448-bd728f524b55",
"_score": 1.0,
"_source": {
"location": {
"lat": 36.053757,
"lon": 139.525482
},
"discount_coupons": [
{
"name": "c1"
},
{
"name": "c2"
}
]
}
}
Old Query to get unique discount coupon names in x km area for given location ::
{
"size": 0,
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": {
"geo_distance": {
"distance": "100km",
"location": {
"lat": 40,
"lon": -70
}
}
}
}
},
"aggs": {
"coupon": {
"nested": {
"path": "discount_coupons"
},
"aggs": {
"name": {
"terms": {
"field": "discount_coupons.name",
"order": {
"_key": "asc"
},
"size": 200
}
}
}
}
}
}
Updated Response ::
{
"took": 60,
"timed_out": false,
"_shards": {
"total": 3,
"successful": 3,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 245328,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"coupon": {
"doc_count": 657442,
"name": {
"doc_count_error_upper_bound": -1,
"sum_other_doc_count": 641189,
"buckets": [
{
"key": "local20210211",
"doc_count": 1611,
"back_to_base": {
"doc_count": 1611,
"distance_script": {
"value": 160.61034409639765
}
}
},
{
"key": "local20210117",
"doc_count": 1621,
"back_to_base": {
"doc_count": 1621,
"distance_script": {
"value": 77.51459886447356
}
}
},
{
"key": "local20201220",
"doc_count": 1622,
"back_to_base": {
"doc_count": 1622,
"distance_script": {
"value": 84.15734462544432
}
}
},
{
"key": "kisekae1",
"doc_count": 1626,
"back_to_base": {
"doc_count": 1626,
"distance_script": {
"value": 88.23770888201268
}
}
},
{
"key": "local20210206",
"doc_count": 1626,
"back_to_base": {
"doc_count": 1626,
"distance_script": {
"value": 86.78376012847237
}
}
},
{
"key": "local20210106",
"doc_count": 1628,
"back_to_base": {
"doc_count": 1628,
"distance_script": {
"value": 384.12156408078397
}
}
},
{
"key": "local20210113",
"doc_count": 1628,
"back_to_base": {
"doc_count": 1628,
"distance_script": {
"value": 153.61681676703674
}
}
},
{
"key": "local20",
"doc_count": 1629,
"back_to_base": {
"doc_count": 1629,
"distance_script": {
"value": 168.74132991524073
}
}
},
{
"key": "local20210213",
"doc_count": 1630,
"back_to_base": {
"doc_count": 1630,
"distance_script": {
"value": 155.8335679860034
}
}
},
{
"key": "local20210208",
"doc_count": 1632,
"back_to_base": {
"doc_count": 1632,
"distance_script": {
"value": 99.58790590445102
}
}
}
]
}
}
}
}
Now the above query will return first 200 discount coupon default sorted by count but I want to return coupons sorted on distance based to given location i.e. the coupon that is nearest applicable should come first.
Is there any way to sort nested aggregations based on a parent key or can I solve this use case using a different data model?
Update Query ::
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"geo_distance": {
"distance": "100km",
"location": {
"lat": 35.699104,
"lon": 139.825211
}
}
},
{
"nested": {
"path": "discount_coupons",
"query": {
"bool": {
"filter": {
"exists": {
"field": "discount_coupons"
}
}
}
}
}
}
]
}
},
"aggs": {
"coupon": {
"nested": {
"path": "discount_coupons"
},
"aggs": {
"name": {
"terms": {
"field": "discount_coupons.name",
"order": {
"back_to_base": "asc"
},
"size": 10
},
"aggs": {
"back_to_base": {
"reverse_nested": {},
"aggs": {
"distance_script": {
"min": {
"script": {
"source": "doc['location'].arcDistance(35.699104, 139.825211)"
}
}
}
}
}
}
}
}
}
}
}
Interesting question. You can always order a terms aggregation by the result of a numeric sub-aggregation. The trick here is to escape the nested context via a reverse_nested aggregation and then calculate the distance from the pivot using a script:
{
"size": 0,
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": {
"geo_distance": {
"distance": "100km",
"location": {
"lat": 40,
"lon": -70
}
}
}
}
},
"aggs": {
"coupon": {
"nested": {
"path": "discount_coupons"
},
"aggs": {
"name": {
"terms": {
"field": "discount_coupons.name",
"order": {
"back_to_base": "asc"
},
"size": 200
},
"aggs": {
"back_to_base": {
"reverse_nested": {},
"aggs": {
"distance_script": {
"min": {
"script": {
"source": "doc['location'].arcDistance(40, -70)"
}
}
}
}
}
}
}
}
}
}
}
This is the query to get the Top 10 records. There is a Field name Answer inside this we have a record "UNHANDLED". I want to exclude the UNHANDLED inside the Answer field.
How to write the query to get both Top 10 and Exclude UNHANDLED
GET /logstash-sdc-mongo-abcsearch/_search?size=0
{
"aggs": {
"top_tags": {
"terms": {
"field": "question.keyword"
},
"aggs": {
"top_faq_hits": {
"top_hits": {
"_source": {
"includes": [
"answer"
]
},
"size": 1
}
}
}
}
}
}
You can use the must_not clause, to exclude the documents that containsUNHANDLED in the answer field. Try out the below query -
Index Mapping:
{
"mappings": {
"properties": {
"question": {
"type": "keyword"
},
"answer": {
"type": "keyword"
}
}
}
}
Index Data:
{
"question": "a",
"answer": "b"
}
{
"question": "c",
"answer": "UNHANDLED"
}
Search Query:
{
"query": {
"bool": {
"must_not": {
"term": {
"answer": "UNHANDLED"
}
}
}
},
"aggs": {
"top_tags": {
"terms": {
"field": "question"
},
"aggs": {
"top_faq_hits": {
"top_hits": {
"_source": {
"includes": [
"answer"
]
},
"size": 1
}
}
}
}
}
}
Search Result:
"aggregations": {
"top_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "a",
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "65563925",
"_type": "_doc",
"_id": "1",
"_score": 0.0,
"_source": {
"answer": "b"
}
}
]
}
}
}
]
}
}
Update 1:
Based on the comments below, try out the below query:
{
"query": {
"bool": {
"must_not": {
"term": {
"answer": "UNHANDLED"
}
},
"must": {
"term": {
"source": "sonax"
}
}
}
},
"aggs": {
"top_tags": {
"terms": {
"field": "question"
},
"aggs": {
"top_faq_hits": {
"top_hits": {
"_source": {
"includes": [
"answer"
]
},
"size": 1
}
}
}
}
}
}
I'm trying to use total hit count with the bucket_script aggregation as sub aggregation of terms aggregation. Any suggestions are welcome.
I also tried value_count and terms aggregation on the package id field. But it also didn't work.
ES version: 6.7.1
Mapping:
PUT /packages/_mapping/_doc
{
"properties": {
"items": {
"type": "nested"
}
}
}
Example document:
{
"id": 1,
"name": "Lorem",
"items": [
{
"infos": {
"available": true
},
"item": {
"id": 1,
"meta": false,
"name": "Ipsum"
}
},
{
"infos": {
"available": false
},
"item": {
"id": 2,
"meta": false,
"name": "Ipsum 2"
}
},
{
"infos": {
"available": false
},
"item": {
"id": 3,
"meta": false,
"name": "Ipsum 3"
}
}
]
}
My aggregation query:
GET /packages/_search
{
"query": { "match_all": {} },
"size": 0,
"aggs": {
"items": {
"nested": {
"path": "items"
},
"aggs": {
"non_meta": {
"filter": {
"term": {
"items.item.meta": false
}
},
"aggs": {
"item": {
"terms": {
"field": "items.item.id"
},
"aggs": {
"total_count": { <--------------------- **I want to get total document count here, but I can only reach `doc_count` for docs that includes items**
"reverse_nested": {}
},
"available_count": {
"sum": {
"script": {
"source": "doc['items.infos.available'].value == true ? 1 : 0"
}
}
},
"penetration": {
"bucket_script": {
"buckets_path": {
"available": "available_count",
"total": "total_count>_count"
},
"script": "params.available / params.total * 100"
}
}
}
}
}
}
}
}
}
}
This is data sample that I have in my index:
[{
"filters": [
{
"group": "color",
"attribute": "red"
},
{
"group": "category",
"attribute": "office"
},
{
"group": "vendor",
"attribute": "some vendor"
},
{
"group": "sub category",
"attribute": "tables"
},
{
"group": "material",
"attribute": "wood"
}
],
"image": "img",
"itemId": "id"
},
{
"filters": [
{
"group": "color",
"attribute": "green"
},
{
"group": "category",
"attribute": "office"
},
{
"group": "vendor",
"attribute": "some vendor"
},
{
"group": "sub category",
"attribute": "tables"
}
],
"image": "img",
"itemId": "id"
},
{
"filters": [
{
"group": "color",
"attribute": "brown"
},
{
"group": "category",
"attribute": "office"
},
{
"group": "vendor",
"attribute": "some vendor"
},
{
"group": "sub category",
"attribute": "chairs"
},
{
"group": "style",
"attribute": "modern"
}
],
"image": "img",
"itemId": "id"
}]
Example of my query:
{
"size": 48,
"sort": [
{
"sequence": {
"order": "asc"
}
}
],
"aggs": {
"price": {
"range": {
"field": "salePrice",
"ranges": [
{
"to": 50.0
},
{
"from": 50.0,
"to": 100.0
},
{
"from": 100.0,
"to": 250.0
},
{
"from": 250.0,
"to": 500.0
},
{
"from": 500.0,
"to": 750.0
},
{
"from": 750.0,
"to": 1000.0
},
{
"from": 1000.0,
"to": 1500.0
},
{
"from": 1500.0,
"to": 2000.0
},
{
"from": 2000.0,
"to": 2500.0
},
{
"from": 2500.0,
"to": 3000.0
},
{
"from": 3000.0,
"to": 3500.0
},
{
"from": 3500.0
}
]
}
},
"filters": {
"nested": {
"path": "filters"
},
"aggs": {
"groups": {
"terms": {
"field": "filters.group"
},
"aggs": {
"attributes": {
"terms": {
"field": "filters.attribute"
}
}
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"match": {
"searchPattern": {
"query": "chairs",
"operator": "and"
}
}
}
],
"filter": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"filters.group": {
"value": "sub category"
}
}
},
{
"term": {
"filters.attribute": {
"value": "tables"
}
}
}
]
}
},
"path": "filters",
"_name": "filters"
}
}
]
}
}
}
As you can see, here I do double aggregation for nested "filters" array, then do filtration and I receive such data:
category:
office(3)
color:
red(1)
green(1)
brown(1)
sub category:
tables(2)
........
And it's logical, because I do filtration and only then do aggregation. But I want to get other sub categories and show there count. So, if user selects several attributes in one group, I want to do Or filtering. If selects attribute in another group, wanna do and filtering. Like so:
{
"size": 48,
"sort": [
{
"sequence": {
"order": "asc"
}
}
],
"aggs": {
"price": {
"range": {
"field": "salePrice",
"ranges": [
{
"to": 50.0
},
{
"from": 50.0,
"to": 100.0
},
{
"from": 100.0,
"to": 250.0
},
{
"from": 250.0,
"to": 500.0
},
{
"from": 500.0,
"to": 750.0
},
{
"from": 750.0,
"to": 1000.0
},
{
"from": 1000.0,
"to": 1500.0
},
{
"from": 1500.0,
"to": 2000.0
},
{
"from": 2000.0,
"to": 2500.0
},
{
"from": 2500.0,
"to": 3000.0
},
{
"from": 3000.0,
"to": 3500.0
},
{
"from": 3500.0
}
]
}
},
"filters": {
"nested": {
"path": "filters"
},
"aggs": {
"groups": {
"terms": {
"field": "filters.group"
},
"aggs": {
"attributes": {
"terms": {
"field": "filters.attribute"
}
}
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"match": {
"searchPattern": {
"query": "chairs",
"operator": "and"
}
}
}
],
"filter": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"filters.group": {
"value": "Category"
}
}
},
{
"bool": {
"should": [
{
"term": {
"filters.attribute": {
"value": "Decor"
}
}
},
{
"term": {
"filters.attribute": {
"value": "Patio Dining"
}
}
}
]
}
}
]
}
},
"path": "filters",
"_name": "filters"
}
},
{
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"filters.group": {
"value": "Type"
}
}
},
{
"term": {
"filters.attribute": {
"value": "Coverlets"
}
}
}
]
}
},
"path": "filters",
"_name": "filters"
}
}
]
}
}
}
But, of course, this query has the same problem. So, is it possible to solve the problem in ElasticSearch? I found few words about post_filter, but have no ideas how to use it and how it can helps me, because I need to recalculate group attributes each time. Is it possible or I need to "store" group attributes anywhere and show them after each aggregation?
I have a nested object mapping, the sample data:
{
"_index": "simpleindex",
"_type": "games",
"_id": "AU_eC-Uzt6KxlUliF68N",
"_score": 1,
"_source": {
"continents": [
{
"name": "Asia",
"countries": [
{
"name": "India",
"states": [
{
"name": "TN",
"game": "soccor",
"wins": 1
}
]
},
{
"name": "India",
"states": [
{
"name": "KA",
"game": "soccor",
"wins": 1
}
]
}
]
}
]
}
},
{
"_index": "simpleindex",
"_type": "games",
"_id": "AU_eCf5dt6KxlUliF637",
"_score": 1,
"_source": {
"continents": [
{
"name": "Asia",
"countries": [
{
"name": "India",
"states": [
{
"name": "TN",
"game": "soccor",
"wins": 1
}
]
}
]
}
]
}
},
{
"_index": "simpleindex",
"_type": "games",
"_id": "AU_eDIdXt6KxlUliF69i",
"_score": 1,
"_source": {
"continents": [
{
"name": "Asia",
"countries": [
{
"name": "India",
"states": [
{
"name": "TN",
"game": "soccor",
"wins": 1
}
]
},
{
"name": "India",
"states": [
{
"name": "KA",
"game": "soccor",
"wins": 1
}
]
},
{
"name": "Pak",
"states": [
{
"name": "NA",
"game": "soccor",
"wins": 1
}
]
}
]
}
]
}
}
Here is my Filtered Aggregation that returns documents that matches the filter criteria (i.e. continent should be 'Asia' AND country should be 'India'):
{
"aggs": {
"DocumentSet": {
"filter": {
"and": {
"filters": [
{
"nested": {
"path": "continents",
"query": {
"match": {
"continents.name": "asia"
}
}
}
},
{
"nested": {
"path": "continents.countries",
"query": {
"match": {
"continents.countries.name": "india"
}
}
}
}
]
}
},
"aggs": {
"continents": {
"nested": {
"path": "continents"
},
"aggs": {
"countries": {
"nested": {
"path": "continents.countries"
},
"aggs": {
"states": {
"nested": {
"path": "continents.countries.states"
},
"aggs": {
"count": {
"value_count": {
"field": "continents.countries.states.wins"
}
}
}
}
}
}
}
}
}
}}}
And here is the result (copy pasted only the aggregation here):
"aggregations": {
"DocumentSet": {
"doc_count": 3,
"continents": {
"doc_count": 3,
"countries": {
"doc_count": 6,
"states": {
"doc_count": 6,
"count": {
"value": 6
}
}
}
}
}
}
My intention is to get "wins" only from continents.name=asia AND countries.name=india. The filter works as expected but I need to narrow down the aggregation scope only to countries.name=india; essentially another level of scope on the docs returned by Filter aggregation so that leaf aggregation count is 5 instead of 6.
Try this aggregation:
{
"aggs": {
"continents": {
"nested": {
"path": "continents"
},
"aggs": {
"asia_continent": {
"filter": {
"query": {
"match": {
"continents.name": "asia"
}
}
},
"aggs": {
"countries": {
"nested": {
"path": "continents.countries"
},
"aggs": {
"india_country": {
"filter": {
"query": {
"match": {
"continents.countries.name": "india"
}
}
},
"aggs": {
"states": {
"nested": {
"path": "continents.countries.states"
},
"aggs": {
"count": {
"value_count": {
"field": "continents.countries.states.wins"
}
}
}
}
}
}
}
}
}
}
}
}
}
}