Elasticsearch date histogram returning only maximum of 13 buckets - elasticsearch

I'm trying to run the following query, and expecting 24 buckets to be present but 13 buckets are returned by elasticsearch.
{
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": start_time,
"lte": start_time + 86400
}
}
},
{
"term": {
"some_field": "some_value"
}
}
]
}
},
"aggs": {
"hourly_data": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "60m",
"min_doc_count": 0
},
"aggs": {
"unique_some_agg_name": {
"cardinality": {
"field": "some_other_field"
}
}
}
}
},
"size": 0
}
I'm not able figure out how to set the bucket size of data_histogram.

Related

Filter an elasticsearch result after an aggregation

I have this elasticsearch query that get every x-locations for which the number of documents (with timestamp gte 1 month ago) is greater than 5000. I'm also able to get the most recent data timestamp for each of these x-locations.
Is it possible to add an additional filter at the end of the query, in order to ignore all x-locations for which the most recent timestamp is older than 2 days ago?
The query:
GET /mypattern-*/_search
{
"query": {
"bool": {
"must": [
{"match": {"method": "GET"}},
{
"range": {
"timestamp": {
"gte": "now-1M"
}
}
}
]
}
},
"aggs": {
"location_terms": {
"terms": {
"field": "x-location.keyword",
"min_doc_count": 500,
"size": 1000,
"order": {
"recent_timestamp": "desc"
}
},
"aggs": {
"recent_timestamp": {
"max": {
"field": "timestamp"
}
}
}
}
}
}

ElasticSearch: Nested buckets aggregation

I'm new to ElasticSearch, so this question could be quite trivial for you, but here I go:
I'm using kibana_sample_data_ecommerce, which documents have a mapping like this
{
...
"order_date" : <datetime>
"taxful_total_price" : <double>
...
}
I want to get a basic daily behavior of the data:
Expecting documents like this:
[
{
"qtime" : "00:00",
"mean" : 20,
"std" : 40
},
{
"qtime" : "01:00",
"mean" : 150,
"std" : 64
},
...
]
So, the process I think that I need to do is:
Group by day all records ->
Group by time window for each day ->
Sum all record in each time window ->
Cumulative Sum for each sum by time window, thus, I get behavior of a day ->
Extended_stats by the same time window across all days
And that can be expressed like this:
But I can't unwrap those buckets to process those statistics. May you give me some advice to do that operation and get that result?
Here is my current query(kibana developer tools):
POST kibana_sample_data_ecommerce/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"order_date": {
"gt": "now-1M",
"lte": "now"
}
}
}
]
}
},
"aggs": {
"day_histo": {
"date_histogram": {
"field": "order_date",
"calendar_interval": "day"
},
"aggs": {
"qmin_histo": {
"date_histogram": {
"field": "order_date",
"calendar_interval": "hour"
},
"aggs": {
"qminute_sum": {
"sum": {
"field": "taxful_total_price"
}
},
"cumulative_qminute_sum": {
"cumulative_sum": {
"buckets_path": "qminute_sum"
}
}
}
}
}
}
}
}
Here's how you pull off the extended stats:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"order_date": {
"gt": "now-4M",
"lte": "now"
}
}
}
]
}
},
"aggs": {
"by_day": {
"date_histogram": {
"field": "order_date",
"calendar_interval": "day"
},
"aggs": {
"by_hour": {
"date_histogram": {
"field": "order_date",
"calendar_interval": "hour"
},
"aggs": {
"by_taxful_total_price": {
"extended_stats": {
"field": "taxful_total_price"
}
}
}
}
}
}
}
}
yielding

Elasticsearch : How get result buckets size

Here is my query result
GET _search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"serviceName.keyword": "directory-view-service"
}
},
{
"match": {
"path": "thewall"
}
},
{
"range": {
"#timestamp": {
"from": "now-31d",
"to": "now"
}
}
}
]
}
},
"aggs": {
"by_day": {
"date_histogram": {
"field": "date",
"interval": "7d"
},
"aggs": {
"byUserUid": {
"terms": {
"field": "token_userId.keyword",
"size": 150000
},
"aggs": {
"filterByCallNumber": {
"bucket_selector": {
"buckets_path": {
"doc_count": "_count"
},
"script": {
"inline": "params.doc_count <= 1"
}
}
}
}
}
}
}
}
}
I want my query return all user call my endpoint min. once time by 1 month range by 7 days interval, until then everything is good.
But my result is a buckets with 370 elements and I just need to know the array size...
Are there any keyword or how can I handle it ?
Thanks

Need aggregation on document inner array object - ElasticSearch

I am trying to do aggregation over the following document
{
"pid": 900000,
"mid": 9000,
"cid": 90,
"bid": 1000,
"gmv": 1000000,
"vol": 200,
"data": [
{
"date": "25-11-2018",
"gmv": 100000,
"vol": 20
},
{
"date": "24-11-2018",
"gmv": 100000,
"vol": 20
},
{
"date": "23-11-2018",
"gmv": 100000,
"vol": 20
}
]
}
The analysis which needs to be done here is:
Filter on mid or/and cid on all documents
Filter range on data.date for last 7 days and sum data.vol over that range for each pid
sort the documents over the sum obtained in previous step in desc order
Group these results by pid.
This means we are trying to get top products by sum of the volume (quantity sold) within a date range for specific cid/mid.
PID here refers product ID,
MID refers here merchant ID,
CID refers here category ID
Firstly you need to change your mapping to run the query on nested fields.
change the type for field 'data' as 'nested'.
Then you can use the range query in filter along with the terms filter on mid/cid to filter on the data. Once you get the correct data set, then you can aggregate on the pid following the sub aggregation on sum of vol.
Here is the below query.
{
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"range": {
"data.date": {
"gte": "28-11-2018",
"lte": "25-11-2018"
}
}
},
{
"must": [
{
"terms": {
"mid": [
"9000"
]
}
}
]
}
]
}
}
]
}
},
"aggs": {
"AGG_PID": {
"terms": {
"field": "pid",
"size": 0,
"order": {
"TOTAL_SUM": "desc"
},
"min_doc_count": 1
},
"aggs": {
"TOTAL_SUM": {
"sum": {
"field": "data.vol"
}
}
}
}
}
}
You can modify the query accordingly. Hope this will be helpful.
Please find nested aggregation query which sorts by "vol" for each bucket of "pid". You can add any number of filters in the query part.
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"mid": "2"
}
}
]
}
},
"aggs": {
"top_products_sorted_by_order_volume": {
"terms": {
"field": "pid",
"order": {
"nested_data_object>order_volume_by_range>order_volume_sum": "desc"
}
},
"aggs": {
"nested_data_object": {
"nested": {
"path": "data"
},
"aggs": {
"order_volume_by_range": {
"filter": {
"range": {
"data.date": {
"gte": "2018-11-26",
"lte": "2018-11-27"
}
}
},
"aggs": {
"order_volume_sum": {
"sum": {
"field": "data.ord_vol"
}
}
}
}
}
}
}
}
}
}

Query elasticsearch with multiple numeric ranges

{
"query": {
"filtered": {
"query": {
"match": {
"log_path": "message_notification.log"
}
},
"filter": {
"numeric_range": {
"time_taken": {
"gte": 10
}
}
}
}
},
"aggs": {
"distinct_user_ids": {
"cardinality": {
"field": "user_id"
}
}
}
}
I have to run this query 20 times as i want to know notification times above each of the following thresholds- [10,30,60,120,240,300,600,1200..]. Right now, i am running a loop and making 20 queries for fetching this.
Is there a more sane way to query elasticsearch once and get ranges that fall into these thresholds respectively?
What you probably want is a "range aggregation".
Here is the possible query where you can add more range or alter them -
{
"size": 0,
"query": {
"match": {
"log_path": "message_notification.log"
}
},
"aggs": {
"intervals": {
"range": {
"field": "time_taken",
"ranges": [
{
"to": 50
},
{
"from": 50,
"to": 100
},
{
"from": 100
}
]
},
"aggs": {
"distinct_user_ids": {
"cardinality": {
"field": "user_id"
}
}
}
}
}
}

Resources