elasticsearch aggregations sort on buckets keys - sorting

How do i sort elasticsearch aggregations buckets on keys. I have nested aggregations and want to sort on my 2nd aggregation buckets result.
Like I have:
"result": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 20309,
"doc_count": 752,
"Events": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "impression",
"doc_count": 30
},
{
"key": "page_view",
"doc_count": 10
},
...
]
}
},
{
"key": 20771,
"doc_count": 46,
"Events": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "impression",
"doc_count": 32
},
{
"key": "page_view",
"doc_count": 9
},
...
]
}
},
I want my Events aggregate buckets to sort by desc/asc on key impression or on page_view.
How do I achieve such results set?
Here is my query
GET someindex/useractivity/_search?search_type=count
{
"size": 1000000,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"range": {
"created_on": {
"from": "2015-01-12",
"to": "2016-05-12"
}
}
},
{
"term": {
"group_id": 1
}
}
]
}
}
}
},
"aggs": {
"result": {
"terms": {
"field": "entity_id",
"size": 1000000
},
"aggs": {
"Events": {
"terms": {
"field": "event_type",
"min_doc_count": 0,
"size": 10
}
}
}
}
}
}
I have tried using _key, but it sort within the bucket. I want to sort by looking at all buckets. Like I have a key impression. I want my buckets result to be sorted with this key. Not within the bucket.
I want my results set to be like if I want to sort on impression by descending order then my result should be
"buckets": [
{
"key": 20771,
"doc_count": 46,
"Events": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "impression",
"doc_count": 32
},
{
"key": "page_view",
"doc_count": 9
},
...
]
}
},
{
"key": 20309,
"doc_count": 752,
"Events": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "impression",
"doc_count": 30
},
{
"key": "page_view",
"doc_count": 10
},
...
]
}
},
i.e the bucket with maximum impression should be on top. (order buckets by impression in descending order)

Try this aggregation:
{
"size": 0,
"aggs": {
"result": {
"terms": {
"field": "entity_id",
"size": 10,
"order": {
"impression_Events": "desc"
}
},
"aggs": {
"Events": {
"terms": {
"field": "event_type",
"min_doc_count": 0,
"size": 10
}
},
"impression_Events": {
"filter": {
"term": {
"event_type": "impression"
}
}
}
}
}
}
}

Related

Elasticsearch aggregation to get difference in date per bucket

I would like to find the difference between the min date of a series of buckets and the the date of that bucket. For instance I have an elastic aggregation similar to below
"id": {
"terms": {
"field": "data.id"
},
"aggs": {
"min_date": {
"min": {
"field": "data.dateSold"
}
},
"date": {
"date_histogram": {
"field": "data.dateSold",
"calendar_interval": "year"
},
"aggs": {
"sales": {
"sum": {
"field": "data.sales"
}
}
}
}
}
}
}
this produces a result similar too
{
"uwi": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 8559203,
"buckets": [
{
"key": "Tshirts",
"doc_count": 1826,
"date_histogram#date": {
"buckets": [
{
"key_as_string": "2021-01-01T00:00:00.000Z",
"key": 1609459200000,
"doc_count": 364,
"sum#sales": {
"value": 31438.67796
}
},
{
"key_as_string": "2022-01-01T00:00:00.000Z",
"key": 1640995200000,
"doc_count": 365,
"sum#sales": {
"value": 16095.7913
}
}
]
},
"min#min_date": {
"value": 1609459200000,
"value_as_string": "2021-01-01T00:00:00.000Z"
}
...
...
I would like to add an extra value per bucket that is the difference between the date (key) and the min date e.g. I get a result similar to below with an extra 'difference_with_min_date' value per bucket which is the diff between the 'min#min_date' agg and that buckets 'key'
{
"uwi": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 8559203,
"buckets": [
{
"key": "Tshirts",
"doc_count": 1826,
"date_histogram#date": {
"buckets": [
{
"key_as_string": "2021-01-01T00:00:00.000Z",
"key": 1609459200000,
"doc_count": 364,
sum#sales": {
"value": 31438.67796
},
"difference_with_min_date": {
"value": 0
}
},
{
"key_as_string": "2022-01-01T00:00:00.000Z",
"key": 1640995200000,
"doc_count": 365,
"sum#sales": {
"value": 16095.7913
},
"difference_with_min_date": {
"value": 31536000000
}
}
]
},
"min#min_date": {
"value": 1609459200000,
"value_as_string": "2021-01-01T00:00:00.000Z"
}
...
...
Any ideas would be helpful, I have tried to do this with a script with little success as you need to supply the bucket_script path within the 'sales' aggs (ie as a sibling) to do it per bucket value but then you cant reference the uncle min aggregation.
Thanks

ElasticSearch filtering results according buckets length ( number of unique keys in a bucket)

I want to write down an elastic aggregation which only returns a key only if its inner bucket's length is greater than 1.
"aggs": {
"product_definitions": {
"terms": {
"field": "definition_name",
"size": 200,
"exclude": "NO_MATCH",
"min_doc_count": 5
},
"aggs": {
"product_instances": {
"terms": {
"field": "data_source_name",
"size": 100
}
}
}
}
}
This is my aggregation an it returns:
"aggregations": {
"product_definitions": {
"doc_count_error_upper_bound": 10,
"sum_other_doc_count": 29281,
"buckets": [
{
"key": "DANA ANTRİKOT KG",
"doc_count": 13,
"product_instances": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "SariyerMarketCom",
"doc_count": 13
}
]
}
},
{
"key": "Keskinoğlu Piliç Salam 700G",
"doc_count": 10,
"product_instances": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "HappyCenterComTr",
"doc_count": 9
},
{
"key": "SanalMarketComTr",
"doc_count": 1
}
]
}
},
{
"key": "Doğuş Filiz Çayı 1000 G",
"doc_count": 9,
"product_instances": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "HappyCenterComTr",
"doc_count": 7
},
{
"key": "SanalMarketComTr",
"doc_count": 2
}
]
}
}
]
}
}
I want keys in products definitions only if their product instances buckets has more than two keys. In this example it should only return 2. and 3. keys and not 1. because bucket of 1. key only contains 1 key which is
"buckets": [
{
"key": "SariyerMarketCom",
"doc_count": 13
}
]
You can leverage the bucket_selector pipeline aggregations to achieve that, like this:
"aggs": {
"product_definitions": {
"terms": {
"field": "definition_name",
"size": 200,
"exclude": "NO_MATCH",
"min_doc_count": 5
},
"aggs": {
"product_instances": {
"terms": {
"field": "data_source_name",
"size": 100
}
},
"minimum_2": {
"bucket_selector": {
"buckets_path": {
"count": "product_instances._bucket_count"
},
"script": "params.count >= 2"
}
}
}
}
}

Elasticsearch - Get aggregation key sort as number

I made query result that aggregate some data, and its aggregation key is number. I tried to sort result of aggregation by key. elasticsearch treated key as string.
Since the number of current result bucket is pretty large, it's unable to modify on client side. Any idea of this?
Here is my query.
"aggregations" : {
"startcount" : {
"terms" : {
"script" : "round(doc['startat'].value/1000)",
"size" : 1000,
"order" : { "_term" : "asc" }
}
}
}
and current result bucket.
"buckets": [
{
"key": "0",
"doc_count": 68
},
{
"key": "1",
"doc_count": 21
},
{
"key": "10",
"doc_count": 6
},
{
"key": "11",
"doc_count": 16
},
It's my expect result.
"buckets": [
{
"key": "0",
"doc_count": 68
},
{
"key": "1",
"doc_count": 21
},
{
"key": "2", // not '10'
"doc_count": 6
},
{
"key": "3", // not '11'
"doc_count": 16
},
Using the value_script approach should fix the alphabetical sort issue:
Example:
{
"size": 0,
"aggregations": {
"startcount": {
"terms": {
"field": "startat",
"script": "round(_value/1000)",
"size": 1000,
"order": {
"_term": "asc"
}
}
}
}
}
This is a multiple group by scenario where data are being sorted by the key descending order.
{
"size": 0,
"aggs": {
"categories": {
"filter": {
"exists": {
"field": "organization_industries"
}
},
"aggs": {
"names": {
"terms": {
"field": "organization_revenue_in_thousands_int.keyword",
"size": 200,
"order": {
"_key": "desc"
}
},
"aggs": {
"industry_stats": {
"terms": {
"field": "organization_industries.keyword"
}
}
}
}
}
}
}
}
Output
"aggregations": {
"categories": {
"doc_count": 195161605,
"names": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 19226983,
"buckets": [
{
"key": "99900",
"doc_count": 1742,
"industry_stats": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "internet",
"doc_count": 1605
},
{
"key": "investment management",
"doc_count": 81
},
{
"key": "biotechnology",
"doc_count": 54
},
{
"key": "computer & network security",
"doc_count": 2
}
]
}
},
{
"key": "998000",
"doc_count": 71,
"industry_stats": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "finance",
"doc_count": 48
},
{
"key": "information technology & services",
"doc_count": 23
}
]
}
}
}
]
}
}
enter code here

elasticsearch sort aggregation on categorical value

In elasticsearch, I can aggregate and sort the aggregation on a second aggregation's numeric field.
e.g.
GET myindex/_search
{
"size":0,
"aggs": {
"a1": {
"terms": {
"field": "FIELD1",
"size":0,
"order": {"a2": "desc"}
},
"aggs":{
"a2":{
"sum":{
"field":"FIELD2"
}
}
}
}
}
}
However, I want to sort the aggregation on a categorical field value. ie. let's say the value of FIELD2 was one of ("a", "b", "c") -- I want to sort a1 first by all documents's with FIELD2: "a", then FIELD2: "b", then FIELD2: "c".
In my case, every FIELD1 has a unique FIELD2. So I really just want a way to sort the a1 results by FIELD2.
I am not sure what exactly you want but I tried following.
I created index with mapping
PUT your_index
{
"mappings": {
"your_type": {
"properties": {
"name": {
"type": "string"
},
"fruit" : {"type" : "string", "index": "not_analyzed"}
}
}
}
}
Then I indexed few documents like this
PUT your_index/your_type/1
{
"name" : "federer",
"fruit" : "orange"
}
Then I sorted all players with fruits with following aggregation
{
"size": 0,
"aggs": {
"a1": {
"terms": {
"field": "name",
"order": {
"_term": "asc"
}
},
"aggs": {
"a2": {
"terms": {
"field": "fruit",
"order": {
"_term": "asc"
}
}
}
}
}
}
}
The result I got is
"aggregations": {
"a1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "federer",
"doc_count": 3,
"a2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "green apple",
"doc_count": 1
},
{
"key": "orange",
"doc_count": 2
}
]
}
},
{
"key": "messi",
"doc_count": 2,
"a2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "apple",
"doc_count": 1
},
{
"key": "banana",
"doc_count": 1
}
]
}
},
{
"key": "nadal",
"doc_count": 2,
"a2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "blueberry",
"doc_count": 1
},
{
"key": "watermelon",
"doc_count": 1
}
]
}
},
{
"key": "ronaldo",
"doc_count": 2,
"a2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "banana",
"doc_count": 1
},
{
"key": "watermelon",
"doc_count": 1
}
]
}
}
]
}
}
Make sure your FIELD2 is not_analyzed or you will get unexpected results.
Does this help?
I found a way that works. You must first aggregate on FIELD2, then on FIELD1.
{
"size": 0,
"aggs": {
"a2": {
"terms": {
"size": 0,
"field": "FIELD2",
"order": {
"_term": "asc"
}
},
"aggs": {
"a1": {
"terms": {
"size": 0,
"field": "FIELD1",
"order": {
"_term": "asc"
}
}
}
}
}
}
}

elasticsearch - additional field in aggregation results

I have the following aggregation for Categories
{
"aggs": {
"category": {
"terms": { "field": "category.name" }
}
}
}
// results
"category": {
"buckets": [
{
"key": "computer & office",
"doc_count": 365
},
{
"key": "home & garden",
"doc_count": 171
},
{
"key": "consumer electronics",
"doc_count": 49
},
]
}
How can I pass additional field, like category.id to the category buckets, so I could query by category.id if the certain aggregation is clicked by a user. I'm not really clear how to query aggregations, if there's any direct way or you have to make a new query and pass bucket key to query filters.
Use a sub-aggregation on the category.id, you will do a bit more work when looking at the results, but I think it's better than changing the mapping:
{
"aggs": {
"name": {
"terms": {
"field": "name"
},
"aggs": {
"id": {
"terms": {
"field": "id"
}
}
}
}
}
}
And the results will look like the following:
"aggregations": {
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "consumer electronics",
"doc_count": 2,
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 2
}
]
}
},
{
"key": "computer & office",
"doc_count": 1,
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 5,
"doc_count": 1
}
]
}
},
{
"key": "home & garden",
"doc_count": 1,
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1
}
]
}
},
{
"key": "whatever",
"doc_count": 1,
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 3,
"doc_count": 1
}
]
}
}
]
}
}
You will still have the category name, but now you, also, have the id from the second aggregation as a sub-bucket in the root bucket:
"key": "consumer electronics",
...
"id": {
...
"buckets": [
{
"key": 2,
"doc_count": 2
You could add a sub aggregation:
{
"aggs": {
"category": {
"terms": {
field": "category.name",
"aggs": {
"id": {
"terms": { "field": "category.id" }
}
}
}
}
}
}
This way each category.name bucket will contain a single bucket containing the id for that category.

Resources