How to properly do sorting under aggregation? - elasticsearch

I am still new to elasticSearch, and i have a doubt here. Would like to get assits. I have some error on properly do sorting under aggregation. Please advice me. Thank YOu
{
"size": 20,
"query": {
"bool": {
"filter": [
{
"range": {
"ts": {
"gt": "2016-08-22T00:00:00.000Z",
"lt": "2016-08-23T13:41:09.000Z"
}
}
}
]
}
},
"aggs": {
"group_by_ip": {
"terms": {
"field": "id_orig_h"
},
"aggs": {
"sum_volume": {
"sum": {
"field": "resp_bytes",
"sort": [
{
"resp_bytes": {
"order": "asc"
}
}
]
}
}
}
}
}
}

You can do it with the order setting in your terms aggregation referencing the sum_volume sub-aggregation:
{
"size": 20,
"query": {
"bool": {
"filter": [
{
"range": {
"ts": {
"gt": "2016-08-22T00:00:00.000Z",
"lt": "2016-08-23T13:41:09.000Z"
}
}
}
]
}
},
"aggs": {
"group_by_ip": {
"terms": {
"field": "id_orig_h",
"order": {
"sum_volume": "asc"
}
},
"aggs": {
"sum_volume": {
"sum": {
"field": "resp_bytes"
}
}
}
}
}
}

Related

How to order serial_diff aggregation result in Elasticsearch?

I have build a query based on serial_diff aggregation. I am trying to sort the result based on the result of the serial_diff agg. I am struggling to get the result in order, below.
GET db/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"Name": [
"q"
]
}
}
],
"filter": [
{
"range": {
"ts": {
"gte": "2020-03-09T09:00:00.000Z",
"lte": "2020-03-09T12:40:00.000Z",
"format": "date_optional_time"
}
}
}
]
}
},
"aggs": {
"sourceNameCount": {
"cardinality": {
"field": "sourceName"
}
},
"sourceName": {
"terms": {
"size": 100,
"field": "sourceName"
},
"aggs": {
"timeseries": {
"date_histogram": {
"field": "ts",
"min_doc_count": 1,
"interval": "15m",
"order": {
"_key": "asc"
}
},
"aggs": {
"the_sum":{
"avg":{
"field": "libVal"
}
},
"ts_diff":{
"serial_diff": {
"buckets_path": "the_sum",
"lag": 1
}
}
}
}
}
}
}
}

FIlter is not being applied to aggregation

I'm trying to get the billing of a product selled by a specific user, but it seems that the query is not being applied to the sum aggregation.
Could someone help me, please?
{
"query": {
"bool": {
"filter": [
{ "term": { "seller": 1 } },
{"term": { "product": 2 } }
]
}
},
"size": 0,
"aggs": {
"product": {
"terms": {
"field": "product"
},
"aggregations": {
"billing": {
"sum": {
"field": "price"
}
},
"aggregation": {
"bucket_sort": {
"sort": [
{
"billing": {
"order": "desc"
}
}
]
}
}
}
}
}
}
Try nesting your existing aggregations within another terms aggregation on "seller".
{
"query": {
"bool": {
"filter": [
{
"term": {
"seller": 1
}
},
{
"term": {
"product": 2
}
}
]
}
},
"size": 0,
"aggs": {
"seller": {
"terms": {
"field": "seller",
"size": 1
},
"aggs": {
"product": {
"terms": {
"field": "product",
"size": 1
},
"aggregations": {
"billing": {
"sum": {
"field": "price"
}
},
"aggregation": {
"bucket_sort": {
"sort": [
{
"billing": {
"order": "desc"
}
}
]
}
}
}
}
}
}
}
}

ElasticSearch extended_bounds over range with no data/hitdocs

I've a range for which no hitdocs exist. When a date_histogram aggregation based query is run with extended_bounds over this no-data range, nothing is returned.
However, for a range which has at least 1 hitdoc, buckets data is returned for the range as specified using extended_bounds.
How can I achieved similar results over a range with no hitdocs?
Sample query -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"terms": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
One can use missing aggregation for the same. Above query looks like this after update -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"missing": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
An observation - extended_bounds doesn't seem to be working for missing.

How to filter subindex for aggregation in Elasticsearch?

I query an index with wildcard (interactive*) to get all documents for the two indices interactive-foo* & interactive-bar*.
For some of my aggregations all of the indices are relevant but for others only interactive-foo* OR interactive-bar*. So I just want to filter for these 'subindices' in the aggregation.
GET _search
{
"query":{
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2017-08-01 00:00:00",
"lte": "2017-08-31 23:59:59"
}
}
},
{
"match": {
"key": "SOME_KEY"
}
}
]
}
},
"size":0,
"aggs": {
// This one should be filtered and just count for interactive-bar*
"bar_count": {
"value_count": {
"field": "SOME_FIELD"
}
},
// This one should be filtered and just count for interactive-foo*
"foo_count": {
"value_count": {
"field": "SOME_FIELD"
}
}
}
}
You can use a filter aggregation like this:
{
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2017-08-01 00:00:00",
"lte": "2017-08-31 23:59:59"
}
}
},
{
"match": {
"key": "SOME_KEY"
}
}
]
}
},
"size": 0,
"aggs": {
"bar_count": {
"filter": {
"indices": {
"indices": ["interactive-bar-*"]
}
},
"aggs": {
"bar_count": {
"value_count": {
"field": "SOME_FIELD"
}
}
}
},
"foo_count": {
"filter": {
"indices": {
"indices": ["interactive-foo-*"]
}
},
"aggs": {
"foo_count": {
"value_count": {
"field": "SOME_FIELD"
}
}
}
}
}
}
Note though that the indices query has been deprecated in ES 5.0. What you should do instead is to use a terms query on the _index field and list all the indices you want to include in your aggregation, like this:
"size": 0,
"aggs": {
"bar_count": {
"filter": {
"terms": {
"_index": ["interactive-foo-2017.08.14", "interactive-foo-2017.08.15"]
}
},
"aggs": {
"bar_count": {
"value_count": {
"field": "SOME_FIELD"
}
}
}
},
"foo_count": {
"filter": {
"terms": {
"_index": ["interactive-bar-2017.08.14", "interactive-bar-2017.08.15"]
}
},
"aggs": {
"foo_count": {
"value_count": {
"field": "SOME_FIELD"
}
}
}
}
}
}

How to use Aggregation by range time and terms

Idea: Search Top events on specific range and order by start_time. Like:
{
"from": 0,
"size": 7,
"query": {
"filtered": {
"query": { "match_all": {} },
"filter": {
"and": [
{ "bool": { "must_not": { "term": { "status": "OK" } } } },
{ "bool": { "must": { "term": { "is_blocked": false } } } }, {
"range": {
"start_time": {
"gte": "2016-01-01",
"lte": "2016-03-01"
}
}
}, {
"bool": {
"must": {
"geo_distance": {
"distance": "150km",
"coordinates": "xx.xxx, zz.zz "
}
}
}
}
]
}
}
},
"sort": [{ "start_time": "asc" },
{ "attending": "desc" }
]
}
I quite new on this concept of aggregations so still with basic problems to understand
I wanna 7 results of top events for the next 2 months. So I have two attributes to look. The max of people attending(attendings) is the definition of Top, but also I wanna order this by time(start_time: asc)
What I start to wrote but is wrong:
{
"aggs": {
"aggs": {
"event_interval": {
"date_histogram": {
"field": "start_time",
"interval": "2M",
"format": "dateOptionalTime"
}
},
"max_attending": { "max": { "field": "attending" } },
"_source": {
"include": [
"name"
]
}
}
}
}
I'm not sure you need to be using an aggregation to get what you are looking for, I think that a simple query can yield the results you would like to see, try this:
{
"size": 7,
"sort": {
"attending": {
"order": "desc"
}
},
"query": {
"bool": {
"filter": [
{
"range": {
"start_time": {
"gte": "now-2M",
"lte": "now"
}
}
}
]
}
}
}

Resources