How to aggregate minutely data to hourly after 90 days? - elasticsearch

I would like to average out minutely data to hourly after a certain time period. For that what will be the query.
The query structure is -
GET ml_test_meters-2019_6/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"host-status.meta.current-time": {
"gte": 1549611907552,
"lte": 1549654551498,
"format": "epoch_millis"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "host-status.meta.current-time",
"interval": "1h",
"time_zone": "US/Central",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "host-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"avg": {
"field": "host-status.status-properties.status-detail.total-cpu-stat-iowait"
}
}
}
}
}
}
}
}
What could be the possible solution ? I would like to insert the new data into the same index later on and delete minutely data.

Related

Elasticsearch add range filter to aggregation

I'm not experimented in elasticsearch and I have to add a range filter for the field "data.elements.id_element" to the next query:
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
I've tried to add to the range part like this, but it's ignored :
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
},
{
"range": {
"data.elements.id_element": {
"gte": 1,
"lte": 1001
}
}
}
]
}
}
}
I've tried this too:
{
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"calendar_interval": "1d",
"min_doc_count": 1
},
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
},
"aggs": {
"Device": {
"filters": {
},
"aggs": {
}
}
}
}
}
}
},
"size": 0,
"docvalue_fields": [
{
"field": "#timestamp",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "data.elements.id_element:[1 TO 1001]",
"analyze_wildcard": true,
}
}
],
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
Same result, aleatoire elements id and does not respect the range filter/condition.
plz any idea.
Thanks.
For others who can face the same problem, I used partition so I've dispatched my query into many queries following this doc:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_filtering_values_with_partitions
Maybe there is better solution, but this what worked for me in my context.
Considering, that you want to apply filter on a particular aggregation, this can be done as below:
{
"aggs": {
"elementId": {
"aggs": {
"elementId": {
"terms": {
"field": "data.elements.id_element",
"order": {
"_count": "desc"
},
"size": 1000
}
}
},
"filter": {
"bool": {
"filter": [
{
"range": {
"#timestamp": {
"gte": "startDate",
"lte": "endDate",
"format": "strict_date_optional_time"
}
}
}
]
}
}
}
}
}

Elasticsearch query : how to use terms query with Range?

im new to Elasticsearch, how to use terms query with range? Or how to modify if this is not possible
here is my query
{
"size": 0,
"query": {
"terms": {
"action": [
"created",
"updated",
"deleted"
]
}
},
"aggs": {
"2": {
"terms": {
"field": "action",
"order": {
"_count": "desc"
},
"size": 100
},
"aggs": {
"3": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "30m",,
"min_doc_count": 1
}
}
}
}
}
}
here is the time range which i want to add in it,
{
"range": {
"timestamp": {
"gte": "now-5y",
"lte": "now",
"format": "epoch_millis"
}
}
You need to combine both terms and range constraints using a bool/filter query, like this:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"terms": {
"action": [
"created",
"updated",
"deleted"
]
}
},
{
"range": {
"timestamp": {
"gte": "now-5y",
"lte": "now",
"format": "epoch_millis"
}
}
}
]
}
},
"aggs": {
"2": {
"terms": {
"field": "action",
"order": {
"_count": "desc"
},
"size": 100
},
"aggs": {
"3": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "30m",
"min_doc_count": 1
}
}
}
}
}
}

How to shift elastic graph by 1 hr?

I have a visualization on hourly basis. Data from 1 to 2 is displayed at 1 o'clock. I want it to be displayed at 2 o'clock. How can I shift the graph by 1 ?
This is the query that I'm using-
Query -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"match": {
"server-status.name.keyword": {
"query": "https-x509",
"type": "phrase"
}
}
},
{
"range": {
"server-status.meta.current-time": {
"gte": 1550660541174,
"lte": 1550674941175,
"format": "epoch_millis"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "server-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"4": {
"terms": {
"field": "server-status.type.keyword",
"include": "http-server",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.request-rate.value",
"script": "_value/60"
}
},
"3": {
"terms": {
"field": "server-status.name.keyword",
"size": 5,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.request-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
}
}
I would like to shift the values by 1 hr. For example if the value is 2.0 at 2019-02-20T05:00:00.000-06:00 I want it to be displayed for 2019-02-20T06:00:00.000-06:00
Just a possible workaround:
Kibana display time based on browser timezone. You could set the timezone in Kibana configuration for a timezone of your interests.
Update:
You could use date_range aggregation and choose key for those buckets. You will need to generate the aggregation based on your time_range and interval.
For example:
"aggs": {
"range": {
"date_range": {
"field": "date",
"ranges": [
{
"key": "bucket1",
"to": "2016/02/01"
},
{
"key": "bucket2",
"from": "2016/02/01",
"to" : "now/d"
}
]
}
}
}
Reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-daterange-aggregation.html

Combine two elastic queries into 1. How?

I have two queries which fetched results when performed a GET operation.
The 1st query is -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"database-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "database-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "database-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "database-status.status-properties.rate-properties.cache-properties.compressed-tree-cache-hit-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
and the 2nd query is -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"database-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "database-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "database-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "database-status.status-properties.rate-properties.cache-properties.compressed-tree-cache-miss-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
How do I combine two queries into 1 query and get both the results in the same result sets? Based on this I'll try to replicate the method with other queries and even try to combine 3 or more queries into 1.
There are two options to do that:
using multi search (msearch) will allow you to run one request to ES containing both queries. The response of the msearch will contain both queries responses separately, and you can then choose how to combine the answers.
combine the queries in a single bool:
so lets say you have:
Q1->bool->must->inner-q-1
and Q2->bool->must->inner-q-2
then you can combine them with should:
Q3->bool->should->[inner-q-1, inner-q-2], with minimum_should_match equals 1 (very important!)
I made use of nested aggregation.
Here is the combined code -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"server-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"time-interval": {
"date_histogram": {
"field": "server-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"http-server": {
"terms": {
"field": "server-status.type.keyword",
"include": "http-server",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-hit-rate.value",
"script": "_value/60"
}
},
"2": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-miss-rate.value",
"script": "_value/60"
}
},
"3": {
"terms": {
"field": "server-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-hit-rate.value",
"script": "_value/60"
}
},
"2": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-miss-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
}
}

ElasticSearch extended_bounds over range with no data/hitdocs

I've a range for which no hitdocs exist. When a date_histogram aggregation based query is run with extended_bounds over this no-data range, nothing is returned.
However, for a range which has at least 1 hitdoc, buckets data is returned for the range as specified using extended_bounds.
How can I achieved similar results over a range with no hitdocs?
Sample query -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"terms": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
One can use missing aggregation for the same. Above query looks like this after update -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"missing": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
An observation - extended_bounds doesn't seem to be working for missing.

Resources