ElasticSearch extended_bounds over range with no data/hitdocs - elasticsearch

I've a range for which no hitdocs exist. When a date_histogram aggregation based query is run with extended_bounds over this no-data range, nothing is returned.
However, for a range which has at least 1 hitdoc, buckets data is returned for the range as specified using extended_bounds.
How can I achieved similar results over a range with no hitdocs?
Sample query -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"terms": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}

One can use missing aggregation for the same. Above query looks like this after update -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"missing": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
An observation - extended_bounds doesn't seem to be working for missing.

Related

Elasticsearch add range filter to aggregation

I'm not experimented in elasticsearch and I have to add a range filter for the field "data.elements.id_element" to the next query:
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
I've tried to add to the range part like this, but it's ignored :
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
},
{
"range": {
"data.elements.id_element": {
"gte": 1,
"lte": 1001
}
}
}
]
}
}
}
I've tried this too:
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "data.elements.id_element:[1 TO 1001]",
"analyze_wildcard": true,
}
}
],
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
Same result, aleatoire elements id and does not respect the range filter/condition.
plz any idea.
Thanks.
For others who can face the same problem, I used partition so I've dispatched my query into many queries following this doc:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_filtering_values_with_partitions
Maybe there is better solution, but this what worked for me in my context.
Considering, that you want to apply filter on a particular aggregation, this can be done as below:
{
"aggs": {
"elementId": {
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
}
}
},
"filter": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
}
}

How to order serial_diff aggregation result in Elasticsearch?

I have build a query based on serial_diff aggregation. I am trying to sort the result based on the result of the serial_diff agg. I am struggling to get the result in order, below.
GET db/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"Name": [
"q"
]
}
}
],
"filter": [
{
"range": {
"ts": {
"gte": "2020-03-09T09:00:00.000Z",
"lte": "2020-03-09T12:40:00.000Z",
"format": "date_optional_time"
}
}
}
]
}
},
"aggs": {
"sourceNameCount": {
"cardinality": {
"field": "sourceName"
}
},
"sourceName": {
"terms": {
"size": 100,
"field": "sourceName"
},
"aggs": {
"timeseries": {
"date_histogram": {
"field": "ts",
"min_doc_count": 1,
"interval": "15m",
"order": {
"_key": "asc"
}
},
"aggs": {
"the_sum":{
"avg":{
"field": "libVal"
}
},
"ts_diff":{
"serial_diff": {
"buckets_path": "the_sum",
"lag": 1
}
}
}
}
}
}
}
}

Combine two elastic queries into 1. How?

I have two queries which fetched results when performed a GET operation.
The 1st query is -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"database-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "database-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "database-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "database-status.status-properties.rate-properties.cache-properties.compressed-tree-cache-hit-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
and the 2nd query is -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"database-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "database-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "database-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "database-status.status-properties.rate-properties.cache-properties.compressed-tree-cache-miss-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
How do I combine two queries into 1 query and get both the results in the same result sets? Based on this I'll try to replicate the method with other queries and even try to combine 3 or more queries into 1.
There are two options to do that:
using multi search (msearch) will allow you to run one request to ES containing both queries. The response of the msearch will contain both queries responses separately, and you can then choose how to combine the answers.
combine the queries in a single bool:
so lets say you have:
Q1->bool->must->inner-q-1
and Q2->bool->must->inner-q-2
then you can combine them with should:
Q3->bool->should->[inner-q-1, inner-q-2], with minimum_should_match equals 1 (very important!)
I made use of nested aggregation.
Here is the combined code -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"server-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"time-interval": {
"date_histogram": {
"field": "server-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"http-server": {
"terms": {
"field": "server-status.type.keyword",
"include": "http-server",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-hit-rate.value",
"script": "_value/60"
}
},
"2": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-miss-rate.value",
"script": "_value/60"
}
},
"3": {
"terms": {
"field": "server-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-hit-rate.value",
"script": "_value/60"
}
},
"2": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-miss-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
}
}

How to aggregate minutely data to hourly after 90 days?

I would like to average out minutely data to hourly after a certain time period. For that what will be the query.
The query structure is -
GET ml_test_meters-2019_6/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"host-status.meta.current-time": {
"gte": 1549611907552,
"lte": 1549654551498,
"format": "epoch_millis"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "host-status.meta.current-time",
"interval": "1h",
"time_zone": "US/Central",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "host-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"avg": {
"field": "host-status.status-properties.status-detail.total-cpu-stat-iowait"
}
}
}
}
}
}
}
}
What could be the possible solution ? I would like to insert the new data into the same index later on and delete minutely data.

Using minimum_should_match in filtered elasticSearch query

I have a filtered elasticsearch query that works, but I want to use minimum_should_match to instruct ES to return only results that have at least 3 should matches. But I can't seem to figure out where to put minimum_should_match. Where should I put it?
{
"size": 100,
"sort": {
"price_monthly": "asc"
},
"query": {
"filtered": {
"query": {
"match_all": []
},
"filter": {
"bool": {
"must": [],
"should": [
[
{
"range": {
"mb.untouched": {
"gte": "0",
"lt": "500"
}
}
},
{
"range": {
"mb.untouched": {
"gte": "500",
"lt": "1000"
}
}
}
],
[
{
"range": {
"minutes.untouched": {
"gte": "0",
"lt": "100"
}
}
},
{
"range": {
"minutes.untouched": {
"gte": "200",
"lt": "300"
}
}
}
],
[
{
"range": {
"sms.untouched": {
"gte": "750",
"lt": "1000"
}
}
}
]
],
"must_not": {
"missing": {
"field": "provider.untouched"
}
}
}
},
"strategy": "query_first"
}
},
"aggs": {
"provider.untouched": {
"terms": {
"field": "provider.untouched"
}
},
"prolong.untouched": {
"terms": {
"field": "prolong.untouched"
}
},
"duration.untouched": {
"terms": {
"field": "duration.untouched"
}
},
"mb.untouched": {
"histogram": {
"field": "mb.untouched",
"interval": 500,
"min_doc_count": 1
}
},
"sms.untouched": {
"histogram": {
"field": "sms.untouched",
"interval": 250,
"min_doc_count": 1
}
},
"minutes.untouched": {
"histogram": {
"field": "minutes.untouched",
"interval": 100,
"min_doc_count": 1
}
},
"price_monthly.untouched": {
"histogram": {
"field": "price_monthly.untouched",
"interval": 5,
"min_doc_count": 1
}
}
}
}
In order to use minimum_should_match, you need to rewrite your filtered query a little bit, i.e. you need to move your should clause to the query part of the filtered query and just keep must_not in the filter part (because missing is a filter). Then you can add minimum_should_match: 3 in the bool query part as shown below:
{
"size": 100,
"sort": {
"price_monthly": "asc"
},
"query": {
"filtered": {
"query": {
"bool": {
"minimum_should_match": 3,
"must": [],
"should": [
[
{
"range": {
"mb.untouched": {
"gte": "0",
"lt": "500"
}
}
},
{
"range": {
"mb.untouched": {
"gte": "500",
"lt": "1000"
}
}
}
],
[
{
"range": {
"minutes.untouched": {
"gte": "0",
"lt": "100"
}
}
},
{
"range": {
"minutes.untouched": {
"gte": "200",
"lt": "300"
}
}
}
],
[
{
"range": {
"sms.untouched": {
"gte": "750",
"lt": "1000"
}
}
}
]
]
}
},
"filter": {
"bool": {
"must_not": {
"missing": {
"field": "provider.untouched"
}
}
}
},
"strategy": "query_first"
}
},
"aggs": {
"provider.untouched": {
"terms": {
"field": "provider.untouched"
}
},
"prolong.untouched": {
"terms": {
"field": "prolong.untouched"
}
},
"duration.untouched": {
"terms": {
"field": "duration.untouched"
}
},
"mb.untouched": {
"histogram": {
"field": "mb.untouched",
"interval": 500,
"min_doc_count": 1
}
},
"sms.untouched": {
"histogram": {
"field": "sms.untouched",
"interval": 250,
"min_doc_count": 1
}
},
"minutes.untouched": {
"histogram": {
"field": "minutes.untouched",
"interval": 100,
"min_doc_count": 1
}
},
"price_monthly.untouched": {
"histogram": {
"field": "price_monthly.untouched",
"interval": 5,
"min_doc_count": 1
}
}
}
}

Resources