Elasticsearch add range filter to aggregation - elasticsearch

I'm not experimented in elasticsearch and I have to add a range filter for the field "data.elements.id_element" to the next query:
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
I've tried to add to the range part like this, but it's ignored :
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
},
{
"range": {
"data.elements.id_element": {
"gte": 1,
"lte": 1001
}
}
}
]
}
}
}
I've tried this too:
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "data.elements.id_element:[1 TO 1001]",
"analyze_wildcard": true,
}
}
],
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
Same result, aleatoire elements id and does not respect the range filter/condition.
plz any idea.
Thanks.

For others who can face the same problem, I used partition so I've dispatched my query into many queries following this doc:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_filtering_values_with_partitions
Maybe there is better solution, but this what worked for me in my context.

Considering, that you want to apply filter on a particular aggregation, this can be done as below:
{
"aggs": {
"elementId": {
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
}
}
},
"filter": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
}
}

Related

Elasticsearch query : how to use terms query with Range?

im new to Elasticsearch, how to use terms query with range? Or how to modify if this is not possible
here is my query
{
"size": 0,
"query": {
"terms": {
"action": [
"created",
"updated",
"deleted"
]
}
},
"aggs": {
"2": {
"terms": {
"field": "action",
"order": {
"_count": "desc"
},
"size": 100
},
"aggs": {
"3": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "30m",,
"min_doc_count": 1
}
}
}
}
}
}
here is the time range which i want to add in it,
{
"range": {
"timestamp": {
"gte": "now-5y",
"lte": "now",
"format": "epoch_millis"
}
}
You need to combine both terms and range constraints using a bool/filter query, like this:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"terms": {
"action": [
"created",
"updated",
"deleted"
]
}
},
{
"range": {
"timestamp": {
"gte": "now-5y",
"lte": "now",
"format": "epoch_millis"
}
}
}
]
}
},
"aggs": {
"2": {
"terms": {
"field": "action",
"order": {
"_count": "desc"
},
"size": 100
},
"aggs": {
"3": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "30m",
"min_doc_count": 1
}
}
}
}
}
}

How to order serial_diff aggregation result in Elasticsearch?

I have build a query based on serial_diff aggregation. I am trying to sort the result based on the result of the serial_diff agg. I am struggling to get the result in order, below.
GET db/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"Name": [
"q"
]
}
}
],
"filter": [
{
"range": {
"ts": {
"gte": "2020-03-09T09:00:00.000Z",
"lte": "2020-03-09T12:40:00.000Z",
"format": "date_optional_time"
}
}
}
]
}
},
"aggs": {
"sourceNameCount": {
"cardinality": {
"field": "sourceName"
}
},
"sourceName": {
"terms": {
"size": 100,
"field": "sourceName"
},
"aggs": {
"timeseries": {
"date_histogram": {
"field": "ts",
"min_doc_count": 1,
"interval": "15m",
"order": {
"_key": "asc"
}
},
"aggs": {
"the_sum":{
"avg":{
"field": "libVal"
}
},
"ts_diff":{
"serial_diff": {
"buckets_path": "the_sum",
"lag": 1
}
}
}
}
}
}
}
}

How to aggregate minutely data to hourly after 90 days?

I would like to average out minutely data to hourly after a certain time period. For that what will be the query.
The query structure is -
GET ml_test_meters-2019_6/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"host-status.meta.current-time": {
"gte": 1549611907552,
"lte": 1549654551498,
"format": "epoch_millis"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "host-status.meta.current-time",
"interval": "1h",
"time_zone": "US/Central",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "host-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"avg": {
"field": "host-status.status-properties.status-detail.total-cpu-stat-iowait"
}
}
}
}
}
}
}
}
What could be the possible solution ? I would like to insert the new data into the same index later on and delete minutely data.

ElasticSearch extended_bounds over range with no data/hitdocs

I've a range for which no hitdocs exist. When a date_histogram aggregation based query is run with extended_bounds over this no-data range, nothing is returned.
However, for a range which has at least 1 hitdoc, buckets data is returned for the range as specified using extended_bounds.
How can I achieved similar results over a range with no hitdocs?
Sample query -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"terms": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
One can use missing aggregation for the same. Above query looks like this after update -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"missing": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
An observation - extended_bounds doesn't seem to be working for missing.

Using minimum_should_match in filtered elasticSearch query

I have a filtered elasticsearch query that works, but I want to use minimum_should_match to instruct ES to return only results that have at least 3 should matches. But I can't seem to figure out where to put minimum_should_match. Where should I put it?
{
"size": 100,
"sort": {
"price_monthly": "asc"
},
"query": {
"filtered": {
"query": {
"match_all": []
},
"filter": {
"bool": {
"must": [],
"should": [
[
{
"range": {
"mb.untouched": {
"gte": "0",
"lt": "500"
}
}
},
{
"range": {
"mb.untouched": {
"gte": "500",
"lt": "1000"
}
}
}
],
[
{
"range": {
"minutes.untouched": {
"gte": "0",
"lt": "100"
}
}
},
{
"range": {
"minutes.untouched": {
"gte": "200",
"lt": "300"
}
}
}
],
[
{
"range": {
"sms.untouched": {
"gte": "750",
"lt": "1000"
}
}
}
]
],
"must_not": {
"missing": {
"field": "provider.untouched"
}
}
}
},
"strategy": "query_first"
}
},
"aggs": {
"provider.untouched": {
"terms": {
"field": "provider.untouched"
}
},
"prolong.untouched": {
"terms": {
"field": "prolong.untouched"
}
},
"duration.untouched": {
"terms": {
"field": "duration.untouched"
}
},
"mb.untouched": {
"histogram": {
"field": "mb.untouched",
"interval": 500,
"min_doc_count": 1
}
},
"sms.untouched": {
"histogram": {
"field": "sms.untouched",
"interval": 250,
"min_doc_count": 1
}
},
"minutes.untouched": {
"histogram": {
"field": "minutes.untouched",
"interval": 100,
"min_doc_count": 1
}
},
"price_monthly.untouched": {
"histogram": {
"field": "price_monthly.untouched",
"interval": 5,
"min_doc_count": 1
}
}
}
}
In order to use minimum_should_match, you need to rewrite your filtered query a little bit, i.e. you need to move your should clause to the query part of the filtered query and just keep must_not in the filter part (because missing is a filter). Then you can add minimum_should_match: 3 in the bool query part as shown below:
{
"size": 100,
"sort": {
"price_monthly": "asc"
},
"query": {
"filtered": {
"query": {
"bool": {
"minimum_should_match": 3,
"must": [],
"should": [
[
{
"range": {
"mb.untouched": {
"gte": "0",
"lt": "500"
}
}
},
{
"range": {
"mb.untouched": {
"gte": "500",
"lt": "1000"
}
}
}
],
[
{
"range": {
"minutes.untouched": {
"gte": "0",
"lt": "100"
}
}
},
{
"range": {
"minutes.untouched": {
"gte": "200",
"lt": "300"
}
}
}
],
[
{
"range": {
"sms.untouched": {
"gte": "750",
"lt": "1000"
}
}
}
]
]
}
},
"filter": {
"bool": {
"must_not": {
"missing": {
"field": "provider.untouched"
}
}
}
},
"strategy": "query_first"
}
},
"aggs": {
"provider.untouched": {
"terms": {
"field": "provider.untouched"
}
},
"prolong.untouched": {
"terms": {
"field": "prolong.untouched"
}
},
"duration.untouched": {
"terms": {
"field": "duration.untouched"
}
},
"mb.untouched": {
"histogram": {
"field": "mb.untouched",
"interval": 500,
"min_doc_count": 1
}
},
"sms.untouched": {
"histogram": {
"field": "sms.untouched",
"interval": 250,
"min_doc_count": 1
}
},
"minutes.untouched": {
"histogram": {
"field": "minutes.untouched",
"interval": 100,
"min_doc_count": 1
}
},
"price_monthly.untouched": {
"histogram": {
"field": "price_monthly.untouched",
"interval": 5,
"min_doc_count": 1
}
}
}
}

Resources