Elasticsearch sum_bucket aggregation to sum the values contained in resulting buckets - elasticsearch

I have a query as follows:
{
"size": 0,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"match": {
"_type": "grx-ipx"
}
},
{
"range": {
"#timestamp": {
"gte": "2015-09-08T15:00:00.000Z",
"lte": "2015-09-08T15:10:00.000Z"
}
}
}
]
}
},
"filter": {
"and": [
{
"terms": {
"inSightCustID": [
"ASD001",
"ZXC049"
]
}
},
{
"terms": {
"reportFamily": [
"GRXoIPX",
"LTEoIPX"
]
}
}
]
}
}
},
"_source": [
"inSightCustID",
"fiveMinuteIn",
"reportFamily",
"#timestamp"
],
"aggs": {
"timestamp": {
"terms": {
"field": "#timestamp",
"size": 5
},
"aggs": {
"reportFamily": {
"terms": {
"field": "reportFamily"
},
"aggs": {
"averageFiveMinute": {
"avg": {
"field": "fiveMinuteIn"
}
}
}
}
}
},
"distinct_timestamps": {
"cardinality": {
"field": "#timestamp"
}
}
}
}
This result of this query looks like:
...
"aggregations": {
"distinct_timestamps": {
"value": 3,
"value_as_string": "1970-01-01T00:00:00.003Z"
},
"timestamp": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1441724700000,
"key_as_string": "2015-09-08T15:05:00.000Z",
"doc_count": 10,
"reportFamily": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "GRXoIPX",
"doc_count": 5,
"averageFiveMinute": {
"value": 1687.6
}
},
{
"key": "LTEoIPX",
"doc_count": 5,
"averageFiveMinute": {
"value": 56710.6
}
}
]
}
},
...
What I want to do is for each bucket in the reportFamily aggregation, I want to show the sum of the averageFiveMinute values. So for instance, in the example above, I would also like to show the sum of 1687.6 and 56710.6. I want to do this for all reportFamily aggregations.
Here is what I have tried:
{
"size": 0,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"match": {
"_type": "grx-ipx"
}
},
{
"range": {
"#timestamp": {
"gte": "2015-09-08T15:00:00.000Z",
"lte": "2015-09-08T15:10:00.000Z"
}
}
}
]
}
},
"filter": {
"and": [
{
"terms": {
"inSightCustID": [
"ASD001",
"ZXC049"
]
}
},
{
"terms": {
"reportFamily": [
"GRXoIPX",
"LTEoIPX"
]
}
}
]
}
}
},
"_source": [
"inSightCustID",
"fiveMinuteIn",
"reportFamily",
"#timestamp"
],
"aggs": {
"timestamp": {
"terms": {
"field": "#timestamp",
"size": 5
},
"aggs": {
"reportFamily": {
"terms": {
"field": "reportFamily"
},
"aggs": {
"averageFiveMinute": {
"avg": {
"field": "fiveMinuteIn"
}
}
}
},
"sum_AvgFiveMinute": {
"sum_bucket": {
"buckets_path": "reportFamily>averageFiveMinute"
}
}
}
},
"distinct_timestamps": {
"cardinality": {
"field": "#timestamp"
}
}
}
}
I have added:
"sum_AvgFiveMinute": {
"sum_bucket": {
"buckets_path": "reportFamily>averageFiveMinute"
}
}
But unfortunately, this triggers an exception Parse Failure [Could not find aggregator type [sum_bucket] in [sum_AvgFiveMinute]
I expected the results to be something like:
...
"aggregations": {
"distinct_timestamps": {
"value": 3,
"value_as_string": "1970-01-01T00:00:00.003Z"
},
"timestamp": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1441724700000,
"key_as_string": "2015-09-08T15:05:00.000Z",
"doc_count": 10,
"reportFamily": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "GRXoIPX",
"doc_count": 5,
"averageFiveMinute": {
"value": 1687.6
}
},
{
"key": "LTEoIPX",
"doc_count": 5,
"averageFiveMinute": {
"value": 56710.6
}
}
]
},
"sum_AvgFiveMinute": {
"value": 58398.2
}
},
...
What is wrong with this query and how can I achieve the expected result?
Here is a link to the sum bucket aggregation docs.
Many thanks for the help.

Related

Terms aggregation not returning other buckets

I'm not able to get other buckets with terms aggregation when combining a filter aggregation. Anyway to do this in elasticsearch?
Mapping: customer with nested address. address with nested properties.
I've tried the following,
{
"size": 0,
"aggs": {
"address": {
"nested": {
"path": "address"
},
"aggs": {
"shipping_to_address": {
"aggs": {
"city": {
"terms": {
"field": "address.city.name.keyword",
"size": 10,
"missing": "others"
}
}
},
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "address.properties",
"query": {
"bool": {
"filter": [
{
"term": {
"address.properties.type": "shipping_to"
}
}
]
}
}
}
}
]
}
}
}
}
}
}
}
The above only returns the buckets matching the filter.
{
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"address": {
"doc_count": 3,
"shipping_to_address": {
"doc_count": 1,
"city": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "new york",
"doc_count": 1
}
]
}
}
}
}
}
I would like to see the other buckets as below:
"buckets": [
{
"key": "new york",
"doc_count": 1
},
{
"key": "others",
"doc_count": 2
}
]
You need to add "min_doc_count":0 to terms aggregation, it will return empty buckets.
Link for reference
{
"size": 0,
"aggs": {
"address": {
"nested": {
"path": "address"
},
"aggs": {
"shipping_to_address": {
"aggs": {
"city": {
"terms": {
"field": "address.city.name.keyword",
"size": 10,
"min_doc_count":0,
"missing": "others"
}
}
},
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "address.properties",
"query": {
"bool": {
"filter": [
{
"term": {
"address.properties.type": "shipping_to"
}
}
]
}
}
}
}
]
}
}
}
}
}
}
}

ElasticSearch Filtering By Keys of term aggregation

I have an ES query which returns me data in the following format"
"by_group": {
"doc_count_error_upper_bound": 55,
"sum_other_doc_count": 1094497,
"buckets": [{
"key": "a838c7df-1ea9-48f1-aa71-69936b54f47d",
"doc_count": 69,
"by_subGroup": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "k1",
"doc_count": 45
},
{
"key": "k2",
"doc_count": 7
},
{
"key": "k3",
"doc_count": 6
},
{
"key": "k6",
"doc_count": 6
}
]
}
}]
}
I would like to filter my result(by_group) using the keys of my subgroup.
for example
I only want the by_group which have keys:k1,k2 but not k3.
Is it possible to filter in this way?
my current query looks like:
{
"size": 0,
"query": {
},
"aggs": {
"aggs": {
"by_group": {
"terms": {
"field": "field1",
"size": 10
},
"aggs": {
"by_subGroup": {
"terms": {
"field": "field2",
"size": 1000
}
}
}
}
}
}
}
}
Use Filter in aggregation.
{
"size": 0,
"query": {
},
"aggs": {
"aggs": {
"by_group": {
"terms": {
"field": "field1",
"size": 10
},
"aggs": {
"by_subGroup": {
"filter": {
"terms" : {
"field2": ["k1","k2]
}
},
"terms": {
"field": "field2",
"size": 1000
}
}
}
}
}
}
}
}

Exclude results based on key in elasticsearch

I have the below mapping for a type in elastic search:
"properties": {
"userid": {
"type": "integer"
},
"engid": {
"type": "short"
},
"score": {
"type": "short",
},
"name": {
"type": "string",
"index": "not_analyzed"
},
"submitTime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
And my search query as:
{
"size": 10,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"range": {
"submitTime": {
"gt": "now-18d"
}
}
}
}
},
"aggs": {
"name": {
"terms": {
"field": "name",
"order": {
"_term": "asc"
}
},
"aggs": {
"score": {
"terms": {
"field": "score"
}
}
}
}
}
}
This is giving my expected result as:
"aggregations": {
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "---",
"doc_count": 169529,
"score": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 0,
"doc_count": 160133
},
{
"key": 5,
"doc_count": 9395
},
{
"key": 4,
"doc_count": 1
}
]
}
},
{
"key": "John",
"doc_count": 1,
"score": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 5,
"doc_count": 1
}
]
}
}
Now I want to remove the bucket from my results where name='---'. I tried using 'not', but it didn't worked. Any hint will be appreciated.
PS: I am new to elasticsearch, and just trying to expand my knowledge.
You need to exclude the --- value in your query
{
"size": 10,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"range": {
"submitTime": {
"gt": "now-18d"
}
}
}
],
"must_not": [
{
"term": {
"name": "---"
}
}
]
}
}
}
},
"aggs": {
"name": {
"terms": {
"field": "name",
"order": {
"_term": "asc"
}
},
"aggs": {
"score": {
"terms": {
"field": "score"
}
}
}
}
}
}

Elasticsearch aggregations, get additional field in bucket

I query ES index to filter results and get aggregations by selected terms. A sample query is like this:
GET buyer_requests/vehicle_requests/_search
{
"query": {
"filtered": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
},
{
"range": {
"style.price": {
"gte": 15000,
"lte": 20000
}
}
},
{
"geo_distance": {
"distance": "20000km",
"info.pin": {
"lat": 42,
"lon": 21
}
}
}
]
}
}
},
"aggs": {
"makes": {
"filter": {
"range": {
"style.price": {
"gte": 5000,
"lte": 40000
}
}
},
"aggs": {
"makes": {
"terms": {
"field": "vehicle.make.raw",
"order": {
"_term": "asc"
}
}
}
}
},
"model": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
}
]
},
"aggs": {
"models": {
"terms": {
"field": "vehicle.model.raw",
"size": 10,
"order": {
"_term": "asc"
}
}
}
}
}
}
}
The result I get is something like:
How can I get in "buckets" section on "models" terms another field from result set. I want to get reference to Makes so the result would look like this:
"model": {
"doc_count": 7,
"models": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "3 Series",
"make": "bmw", <----------- this key
"doc_count": 3
},
{
"key": "4 Series",
"make": "bmw", <----------- this key
"doc_count": 4
},
{
"key": "Camaro",
"make": "chevrolet", <----------- this key
"doc_count": 2
}
]
}
}
You need to move your models aggregation as a sub-aggregation of the make aggregation and re-arrange the filter aggregation a bit. The result won't be syntactically like you expect, but semantically you'll get the data you need.
GET buyer_requests/vehicle_requests/_search
{
"query": {
"filtered": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
},
{
"range": {
"style.price": {
"gte": 15000,
"lte": 20000
}
}
},
{
"geo_distance": {
"distance": "20000km",
"info.pin": {
"lat": 42,
"lon": 21
}
}
}
]
}
}
},
"aggs": {
"makes": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
},
{
"range": {
"style.price": {
"gte": 5000,
"lte": 40000
}
}
}
]
},
"aggs": {
"makes": {
"terms": {
"field": "vehicle.make.raw",
"order": {
"_term": "asc"
}
},
"aggs": {
"models": {
"terms": {
"field": "vehicle.model.raw",
"size": 10,
"order": {
"_term": "asc"
}
}
}
}
}
}
}
}
}

How to calculate difference between metrics in different aggregations in elasticsearch

I want to calculate the difference of nested aggregations between two dates.
To be more concrete is it possible to calculate the difference between date_1.buckets.field_1.buckets.field_2.buckets.field_3.value - date_2.buckets.field_1.buckets.field_2.buckets.field_3.value given the below request/response. Is that possible with elasticsearch v.1.0.1?
The aggregation query request looks like this:
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"terms": {
"date": [
"2014-08-18 00:00:00.0",
"2014-08-15 00:00:00.0"
]
}
}
]
}
}
}
},
"aggs": {
"date_1": {
"filter": {
"terms": {
"date": [
"2014-08-18 00:00:00.0"
]
}
},
"aggs": {
"my_agg_1": {
"terms": {
"field": "field_1",
"size": 2147483647,
"order": {
"_term": "desc"
}
},
"aggs": {
"my_agg_2": {
"terms": {
"field": "field_2",
"size": 2147483647,
"order": {
"_term": "desc"
}
},
"aggs": {
"my_agg_3": {
"sum": {
"field": "field_3"
}
}
}
}
}
}
}
},
"date_2": {
"filter": {
"terms": {
"date": [
"2014-08-15 00:00:00.0"
]
}
},
"aggs": {
"my_agg_1": {
"terms": {
"field": "field_1",
"size": 2147483647,
"order": {
"_term": "desc"
}
},
"aggs": {
"my_agg_1": {
"terms": {
"field": "field_2",
"size": 2147483647,
"order": {
"_term": "desc"
}
},
"aggs": {
"my_agg_3": {
"sum": {
"field": "field_3"
}
}
}
}
}
}
}
}
}
}
And the response looks like this:
{
"took": 236,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1646,
"max_score": 0,
"hits": []
},
"aggregations": {
"date_1": {
"doc_count": 823,
"field_1": {
"buckets": [
{
"key": "field_1_key_1",
"doc_count": 719,
"field_2": {
"buckets": [
{
"key": "key_1",
"doc_count": 275,
"field_3": {
"value": 100
}
}
]
}
}
]
}
},
"date_2": {
"doc_count": 823,
"field_1": {
"buckets": [
{
"key": "field_1_key_1",
"doc_count": 719,
"field_2": {
"buckets": [
{
"key": "key_1",
"doc_count": 275,
"field_3": {
"value": 80
}
}
]
}
}
]
}
}
}
}
Thank you.
With elasticsearch new version (eg: 5.6.9) is possible:
{
"size": 0,
"query": {
"constant_score": {
"filter": {
"bool": {
"filter": [
{
"range": {
"date_created": {
"gte": "2018-06-16T00:00:00+02:00",
"lte": "2018-06-16T23:59:59+02:00"
}
}
}
]
}
}
}
},
"aggs": {
"by_millisec": {
"range" : {
"script" : {
"lang": "painless",
"source": "doc['date_delivered'][0] - doc['date_created'][0]"
},
"ranges" : [
{ "key": "<1sec", "to": 1000.0 },
{ "key": "1-5sec", "from": 1000.0, "to": 5000.0 },
{ "key": "5-30sec", "from": 5000.0, "to": 30000.0 },
{ "key": "30-60sec", "from": 30000.0, "to": 60000.0 },
{ "key": "1-2min", "from": 60000.0, "to": 120000.0 },
{ "key": "2-5min", "from": 120000.0, "to": 300000.0 },
{ "key": "5-10min", "from": 300000.0, "to": 600000.0 },
{ "key": ">10min", "from": 600000.0 }
]
}
}
}
}
No arithmetic operations are allowed between two aggregations' result from elasticsearch DSL, not even using scripts. (Upto version 1.1.1, at least I know)
Such operations need to be handeled in client side after processing the aggs result.
Reference
elasticsearch aggregation to sort by ratio of aggregations
In 1.0.1 I couldn't find anything but in 1.4.2 you could try scripted_metric aggregation (still experimental).
Here are the scripted_metric documentation page
I am not good with the elasticsearch syntax but I think your metric inputs would be:
init_script- just initialize a accumulator for each date:
"init_script": "_agg.d1Val = 0; _agg.d2Val = 0;"
map_script- test the date of the document and add to the right accumulator:
"map_script": "if (doc.date == firstDate) { _agg.d1Val += doc.field_3; } else { _agg.d2Val = doc.field_3;};",
reduce_script - accumulate intermediate data from various shards and return the final results:
"reduce_script": "totalD1 = 0; totalD2 = 0; for (agg in _aggs) { totalD1 += agg.d1Val ; totalD2 += agg.d2Val ;}; return totalD1 - totalD2"
I don't think that in this case you need a combine_script.
If course, if you can't use 1.4.2 than this is no help :-)

Resources