Elasticsearch - Calculate sub range aggregation - elasticsearch

I have following ES query to calculate a host average CPU in the last 30 days.
es_query = {
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{"range": {"#timestamp": {"gte": "now-30d",}}},
{"query_string": {"query": 'hostname: myhost',"analyze_wildcard": True}}
],
"should": [
{"match": {"metricset.name": "cpu"}}
]
}
}
}
},
"aggs": {
"group_by_time_interval": {
"date_histogram": {
"field": "#timestamp",
"interval": "1h",
"time_zone": "PST8PDT",
"min_doc_count": 1
},
"aggs": {
"cpu_used_avg_pct": {"avg": {"field": "system.cpu.total.pct"}}
}
},
"avg_monthly_cpu_pct": {
"avg_bucket": {
"buckets_path": "group_by_time_interval>cpu_used_avg_pct"
}
}
}
}
After execute it return the avg CPU of last 30 days as expected.
The question is: How I can also compute avg CPU of the last 7 days , by just extending the above query ?
Currently, my dumb solution is to copy to another query, replace "gte: now-30d" by "gte: now-7d" then run again, which is very time consuming.
Thank you.
Alex

The easiest you can do is simply to add another aggregation that is filtered on the last 7 days:
{
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"range": {
"#timestamp": {
"gte": "now-30d"
}
}
},
{
"query_string": {
"query": "hostname: myhost",
"analyze_wildcard": true
}
}
],
"should": [
{
"match": {
"metricset.name": "cpu"
}
}
]
}
}
}
},
"aggs": {
"group_by_time_interval": {
"date_histogram": {
"field": "#timestamp",
"interval": "1h",
"time_zone": "PST8PDT",
"min_doc_count": 1
},
"aggs": {
"cpu_used_avg_pct": {
"avg": {
"field": "system.cpu.total.pct"
}
}
}
},
"avg_monthly_cpu_pct": {
"avg_bucket": {
"buckets_path": "group_by_time_interval>cpu_used_avg_pct"
}
},
"last_7_days": {
"filter": {
"range": {
"#timestamp": {
"gte": "now-7d"
}
}
},
"aggs": {
"last_7_days_interval": {
"date_histogram": {
"field": "#timestamp",
"interval": "1h",
"time_zone": "PST8PDT",
"min_doc_count": 1
},
"aggs": {
"cpu_used_avg_pct": {
"avg": {
"field": "system.cpu.total.pct"
}
}
}
},
"avg_monthly_cpu_pct": {
"avg_bucket": {
"buckets_path": "last_7_days_interval>cpu_used_avg_pct"
}
}
}
}
}
}

Related

Elasticsearch add range filter to aggregation

I'm not experimented in elasticsearch and I have to add a range filter for the field "data.elements.id_element" to the next query:
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
I've tried to add to the range part like this, but it's ignored :
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
},
{
"range": {
"data.elements.id_element": {
"gte": 1,
"lte": 1001
}
}
}
]
}
}
}
I've tried this too:
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "data.elements.id_element:[1 TO 1001]",
"analyze_wildcard": true,
}
}
],
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
Same result, aleatoire elements id and does not respect the range filter/condition.
plz any idea.
Thanks.
For others who can face the same problem, I used partition so I've dispatched my query into many queries following this doc:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_filtering_values_with_partitions
Maybe there is better solution, but this what worked for me in my context.
Considering, that you want to apply filter on a particular aggregation, this can be done as below:
{
"aggs": {
"elementId": {
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
}
}
},
"filter": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
}
}

How to order serial_diff aggregation result in Elasticsearch?

I have build a query based on serial_diff aggregation. I am trying to sort the result based on the result of the serial_diff agg. I am struggling to get the result in order, below.
GET db/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"Name": [
"q"
]
}
}
],
"filter": [
{
"range": {
"ts": {
"gte": "2020-03-09T09:00:00.000Z",
"lte": "2020-03-09T12:40:00.000Z",
"format": "date_optional_time"
}
}
}
]
}
},
"aggs": {
"sourceNameCount": {
"cardinality": {
"field": "sourceName"
}
},
"sourceName": {
"terms": {
"size": 100,
"field": "sourceName"
},
"aggs": {
"timeseries": {
"date_histogram": {
"field": "ts",
"min_doc_count": 1,
"interval": "15m",
"order": {
"_key": "asc"
}
},
"aggs": {
"the_sum":{
"avg":{
"field": "libVal"
}
},
"ts_diff":{
"serial_diff": {
"buckets_path": "the_sum",
"lag": 1
}
}
}
}
}
}
}
}

How to aggregate minutely data to hourly after 90 days?

I would like to average out minutely data to hourly after a certain time period. For that what will be the query.
The query structure is -
GET ml_test_meters-2019_6/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"host-status.meta.current-time": {
"gte": 1549611907552,
"lte": 1549654551498,
"format": "epoch_millis"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "host-status.meta.current-time",
"interval": "1h",
"time_zone": "US/Central",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "host-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"avg": {
"field": "host-status.status-properties.status-detail.total-cpu-stat-iowait"
}
}
}
}
}
}
}
}
What could be the possible solution ? I would like to insert the new data into the same index later on and delete minutely data.

ElasticSearch extended_bounds over range with no data/hitdocs

I've a range for which no hitdocs exist. When a date_histogram aggregation based query is run with extended_bounds over this no-data range, nothing is returned.
However, for a range which has at least 1 hitdoc, buckets data is returned for the range as specified using extended_bounds.
How can I achieved similar results over a range with no hitdocs?
Sample query -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"terms": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
One can use missing aggregation for the same. Above query looks like this after update -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"missing": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
An observation - extended_bounds doesn't seem to be working for missing.

Using minimum_should_match in filtered elasticSearch query

I have a filtered elasticsearch query that works, but I want to use minimum_should_match to instruct ES to return only results that have at least 3 should matches. But I can't seem to figure out where to put minimum_should_match. Where should I put it?
{
"size": 100,
"sort": {
"price_monthly": "asc"
},
"query": {
"filtered": {
"query": {
"match_all": []
},
"filter": {
"bool": {
"must": [],
"should": [
[
{
"range": {
"mb.untouched": {
"gte": "0",
"lt": "500"
}
}
},
{
"range": {
"mb.untouched": {
"gte": "500",
"lt": "1000"
}
}
}
],
[
{
"range": {
"minutes.untouched": {
"gte": "0",
"lt": "100"
}
}
},
{
"range": {
"minutes.untouched": {
"gte": "200",
"lt": "300"
}
}
}
],
[
{
"range": {
"sms.untouched": {
"gte": "750",
"lt": "1000"
}
}
}
]
],
"must_not": {
"missing": {
"field": "provider.untouched"
}
}
}
},
"strategy": "query_first"
}
},
"aggs": {
"provider.untouched": {
"terms": {
"field": "provider.untouched"
}
},
"prolong.untouched": {
"terms": {
"field": "prolong.untouched"
}
},
"duration.untouched": {
"terms": {
"field": "duration.untouched"
}
},
"mb.untouched": {
"histogram": {
"field": "mb.untouched",
"interval": 500,
"min_doc_count": 1
}
},
"sms.untouched": {
"histogram": {
"field": "sms.untouched",
"interval": 250,
"min_doc_count": 1
}
},
"minutes.untouched": {
"histogram": {
"field": "minutes.untouched",
"interval": 100,
"min_doc_count": 1
}
},
"price_monthly.untouched": {
"histogram": {
"field": "price_monthly.untouched",
"interval": 5,
"min_doc_count": 1
}
}
}
}
In order to use minimum_should_match, you need to rewrite your filtered query a little bit, i.e. you need to move your should clause to the query part of the filtered query and just keep must_not in the filter part (because missing is a filter). Then you can add minimum_should_match: 3 in the bool query part as shown below:
{
"size": 100,
"sort": {
"price_monthly": "asc"
},
"query": {
"filtered": {
"query": {
"bool": {
"minimum_should_match": 3,
"must": [],
"should": [
[
{
"range": {
"mb.untouched": {
"gte": "0",
"lt": "500"
}
}
},
{
"range": {
"mb.untouched": {
"gte": "500",
"lt": "1000"
}
}
}
],
[
{
"range": {
"minutes.untouched": {
"gte": "0",
"lt": "100"
}
}
},
{
"range": {
"minutes.untouched": {
"gte": "200",
"lt": "300"
}
}
}
],
[
{
"range": {
"sms.untouched": {
"gte": "750",
"lt": "1000"
}
}
}
]
]
}
},
"filter": {
"bool": {
"must_not": {
"missing": {
"field": "provider.untouched"
}
}
}
},
"strategy": "query_first"
}
},
"aggs": {
"provider.untouched": {
"terms": {
"field": "provider.untouched"
}
},
"prolong.untouched": {
"terms": {
"field": "prolong.untouched"
}
},
"duration.untouched": {
"terms": {
"field": "duration.untouched"
}
},
"mb.untouched": {
"histogram": {
"field": "mb.untouched",
"interval": 500,
"min_doc_count": 1
}
},
"sms.untouched": {
"histogram": {
"field": "sms.untouched",
"interval": 250,
"min_doc_count": 1
}
},
"minutes.untouched": {
"histogram": {
"field": "minutes.untouched",
"interval": 100,
"min_doc_count": 1
}
},
"price_monthly.untouched": {
"histogram": {
"field": "price_monthly.untouched",
"interval": 5,
"min_doc_count": 1
}
}
}
}

Resources