How to order serial_diff aggregation result in Elasticsearch? - elasticsearch

I have build a query based on serial_diff aggregation. I am trying to sort the result based on the result of the serial_diff agg. I am struggling to get the result in order, below.
GET db/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"Name": [
"q"
]
}
}
],
"filter": [
{
"range": {
"ts": {
"gte": "2020-03-09T09:00:00.000Z",
"lte": "2020-03-09T12:40:00.000Z",
"format": "date_optional_time"
}
}
}
]
}
},
"aggs": {
"sourceNameCount": {
"cardinality": {
"field": "sourceName"
}
},
"sourceName": {
"terms": {
"size": 100,
"field": "sourceName"
},
"aggs": {
"timeseries": {
"date_histogram": {
"field": "ts",
"min_doc_count": 1,
"interval": "15m",
"order": {
"_key": "asc"
}
},
"aggs": {
"the_sum":{
"avg":{
"field": "libVal"
}
},
"ts_diff":{
"serial_diff": {
"buckets_path": "the_sum",
"lag": 1
}
}
}
}
}
}
}
}

Related

Elasticsearch add range filter to aggregation

I'm not experimented in elasticsearch and I have to add a range filter for the field "data.elements.id_element" to the next query:
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
I've tried to add to the range part like this, but it's ignored :
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
},
{
"range": {
"data.elements.id_element": {
"gte": 1,
"lte": 1001
}
}
}
]
}
}
}
I've tried this too:
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "data.elements.id_element:[1 TO 1001]",
"analyze_wildcard": true,
}
}
],
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
Same result, aleatoire elements id and does not respect the range filter/condition.
plz any idea.
Thanks.
For others who can face the same problem, I used partition so I've dispatched my query into many queries following this doc:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_filtering_values_with_partitions
Maybe there is better solution, but this what worked for me in my context.
Considering, that you want to apply filter on a particular aggregation, this can be done as below:
{
"aggs": {
"elementId": {
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
}
}
},
"filter": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
}
}

Elasticsearch query : how to use terms query with Range?

im new to Elasticsearch, how to use terms query with range? Or how to modify if this is not possible
here is my query
{
"size": 0,
"query": {
"terms": {
"action": [
"created",
"updated",
"deleted"
]
}
},
"aggs": {
"2": {
"terms": {
"field": "action",
"order": {
"_count": "desc"
},
"size": 100
},
"aggs": {
"3": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "30m",,
"min_doc_count": 1
}
}
}
}
}
}
here is the time range which i want to add in it,
{
"range": {
"timestamp": {
"gte": "now-5y",
"lte": "now",
"format": "epoch_millis"
}
}
You need to combine both terms and range constraints using a bool/filter query, like this:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"terms": {
"action": [
"created",
"updated",
"deleted"
]
}
},
{
"range": {
"timestamp": {
"gte": "now-5y",
"lte": "now",
"format": "epoch_millis"
}
}
}
]
}
},
"aggs": {
"2": {
"terms": {
"field": "action",
"order": {
"_count": "desc"
},
"size": 100
},
"aggs": {
"3": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "30m",
"min_doc_count": 1
}
}
}
}
}
}

Elastic search query Pagination help on Aggregations

I have tried the below query for the Pagination on Aggregations but not working properly.
I Am getting the error "reason": "[40:7] [terms] unknown field [from], parser not found"
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"answer.keyword": "UNHANDLED"
}
},
{
"term": {
"source.keyword": "QUAL2"
}
}
]
}
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "question.keyword",
"order": {
"_count": "asc"
},
"size": "10"
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "timestamp",
"order": {
"_count": "asc"
},
"size": "3",
"from": 8
}
}
}
}
}
}
Only size is supported, you have to remove the param from from the aggregation query.
You can try using partitions in the aggreagtion
Try out the below query:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"answer.keyword": "UNHANDLED"
}
},
{
"term": {
"source.keyword": "QUAL2"
}
}
]
}
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "question.keyword",
"order": {
"_count": "asc"
},
"size": "10"
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "timestamp",
"order": {
"_count": "asc"
},
"size": "3",
"include": {
"partition": 1,
"num_partitions": 10
}
}
}
}
}
}
}

Combine two elastic queries into 1. How?

I have two queries which fetched results when performed a GET operation.
The 1st query is -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"database-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "database-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "database-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "database-status.status-properties.rate-properties.cache-properties.compressed-tree-cache-hit-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
and the 2nd query is -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"database-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "database-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "database-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "database-status.status-properties.rate-properties.cache-properties.compressed-tree-cache-miss-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
How do I combine two queries into 1 query and get both the results in the same result sets? Based on this I'll try to replicate the method with other queries and even try to combine 3 or more queries into 1.
There are two options to do that:
using multi search (msearch) will allow you to run one request to ES containing both queries. The response of the msearch will contain both queries responses separately, and you can then choose how to combine the answers.
combine the queries in a single bool:
so lets say you have:
Q1->bool->must->inner-q-1
and Q2->bool->must->inner-q-2
then you can combine them with should:
Q3->bool->should->[inner-q-1, inner-q-2], with minimum_should_match equals 1 (very important!)
I made use of nested aggregation.
Here is the combined code -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"server-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"time-interval": {
"date_histogram": {
"field": "server-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"http-server": {
"terms": {
"field": "server-status.type.keyword",
"include": "http-server",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-hit-rate.value",
"script": "_value/60"
}
},
"2": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-miss-rate.value",
"script": "_value/60"
}
},
"3": {
"terms": {
"field": "server-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-hit-rate.value",
"script": "_value/60"
}
},
"2": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-miss-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
}
}

ElasticSearch extended_bounds over range with no data/hitdocs

I've a range for which no hitdocs exist. When a date_histogram aggregation based query is run with extended_bounds over this no-data range, nothing is returned.
However, for a range which has at least 1 hitdoc, buckets data is returned for the range as specified using extended_bounds.
How can I achieved similar results over a range with no hitdocs?
Sample query -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"terms": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
One can use missing aggregation for the same. Above query looks like this after update -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"missing": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
An observation - extended_bounds doesn't seem to be working for missing.

Resources