Elasticsearch aggregation similar to group_concat - elasticsearch

I am new to elasticsearch and I would like to create a group_concat aggregation. But I don't know how. Can someone help me please.
The example data:
POST /example_measures/_bulk
{"index":{"_id":1}}
{"id":"1","datapoint_id":"1","datetime":"1577833200000","value":"5"}
{"index":{"_id":2}}
{"id":"2","datapoint_id":"2","datetime":"1577833210000","value":"51"}
{"index":{"_id":3}}
{"id":"3","datapoint_id":"2","datetime":"1577833220000","value":"77"}
What i want expressed in sql:
select
datapoint_id,
group_concat(`datetime` order by `datetime` SEPARATOR ',' limit 5) as dt,
group_concat(`value` order by `datetime` SEPARATOR ',' limit 5) as val
from example_measures
group by datapoint_id;
I would like to have 2 arrays per data point. One with the timestamps and one with the values.
I had no success with the sql syntax because group_concat is not supported in the sql input:
POST /_sql?format=txt
{
"query":"..."
}
I use Kibana and the Dev Tools for input.

You can achieve your use case, by using Terms Aggregation on datapoint_id field. This will create buckets - one pe unique value of datapoint_id. And, then you can further embed buckets inside these unique buckets using sub aggregations.
Search Query:
{
"size": 0,
"aggs": {
"id": {
"terms": {
"field": "datapoint_id.keyword"
},
"aggs": {
"dt": {
"terms": {
"field": "datetime.keyword",
"order": { "_key" : "asc" },
"size": 5
}
},
"val": {
"terms": {
"field": "value.keyword",
"size": 5
}
}
}
}
}
}
Search Result:
"aggregations": {
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 2,
"val": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "51",
"doc_count": 1
},
{
"key": "77",
"doc_count": 1
}
]
},
"dt": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1577833210000",
"doc_count": 1
},
{
"key": "1577833220000",
"doc_count": 1
}
]
}
},
{
"key": "1",
"doc_count": 1,
"val": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "5",
"doc_count": 1
}
]
},
"dt": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1577833200000",
"doc_count": 1
}
]
}
}
]
}
}

Related

ElasticSearch filtering results according buckets length ( number of unique keys in a bucket)

I want to write down an elastic aggregation which only returns a key only if its inner bucket's length is greater than 1.
"aggs": {
"product_definitions": {
"terms": {
"field": "definition_name",
"size": 200,
"exclude": "NO_MATCH",
"min_doc_count": 5
},
"aggs": {
"product_instances": {
"terms": {
"field": "data_source_name",
"size": 100
}
}
}
}
}
This is my aggregation an it returns:
"aggregations": {
"product_definitions": {
"doc_count_error_upper_bound": 10,
"sum_other_doc_count": 29281,
"buckets": [
{
"key": "DANA ANTRİKOT KG",
"doc_count": 13,
"product_instances": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "SariyerMarketCom",
"doc_count": 13
}
]
}
},
{
"key": "Keskinoğlu Piliç Salam 700G",
"doc_count": 10,
"product_instances": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "HappyCenterComTr",
"doc_count": 9
},
{
"key": "SanalMarketComTr",
"doc_count": 1
}
]
}
},
{
"key": "Doğuş Filiz Çayı 1000 G",
"doc_count": 9,
"product_instances": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "HappyCenterComTr",
"doc_count": 7
},
{
"key": "SanalMarketComTr",
"doc_count": 2
}
]
}
}
]
}
}
I want keys in products definitions only if their product instances buckets has more than two keys. In this example it should only return 2. and 3. keys and not 1. because bucket of 1. key only contains 1 key which is
"buckets": [
{
"key": "SariyerMarketCom",
"doc_count": 13
}
]
You can leverage the bucket_selector pipeline aggregations to achieve that, like this:
"aggs": {
"product_definitions": {
"terms": {
"field": "definition_name",
"size": 200,
"exclude": "NO_MATCH",
"min_doc_count": 5
},
"aggs": {
"product_instances": {
"terms": {
"field": "data_source_name",
"size": 100
}
},
"minimum_2": {
"bucket_selector": {
"buckets_path": {
"count": "product_instances._bucket_count"
},
"script": "params.count >= 2"
}
}
}
}
}

how can I filter the continuous days in elasticsearch?

Here is my scenario, I want find the users who had continuously login our website equal or greater than 3 days.
For example:
{"login_time":"2018-01-01T18:19:07.982Z", "user_id":123}
{"login_time":"2018-01-01T08:30:07.982Z", "user_id":456}
{"login_time":"2018-01-02T09:39:07.982Z", "user_id":123}
{"login_time":"2018-01-03T08:20:07.982Z", "user_id":123}
{"login_time":"2018-01-03T08:20:07.982Z", "user_id":456}
So, the user_id:123 has been continuously login for 3 days and user_id:456 has been continuously login for 1 day, I wish I can drop the user_id:456 when Elasticsearch returns.
This is my ES JSON:
GET event-tracking/_search
{
"aggs": {
"login_by_day": {
"date_histogram": {
"field": "login_time",
"interval": "day"
},
"aggs": {
"user_id": {
"terms": {
"field": "user_id",
"size": 10
}
}
}
}
}
}
And response:
"aggregations": {
"login_by_day": {
"buckets": [
{
"key_as_string": "2018-01-01T00:00:00.000Z",
"key": 1514764800000,
"doc_count": 2,
"user_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 123,
"doc_count": 1
},
{
"key": 456,
"doc_count": 1
}
]
}
},
{
"key_as_string": "2018-01-02T00:00:00.000Z",
"key": 1514851200000,
"doc_count": 1,
"user_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 123,
"doc_count": 1
}
]
}
},
{
"key_as_string": "2018-01-03T00:00:00.000Z",
"key": 1514937600000,
"doc_count": 2,
"user_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 123,
"doc_count": 1
},
{
"key": 456,
"doc_count": 1
}
]
}
}
]
}
}
Then, I have to write some code to filter the result. My question is how can I use ES JSON to reach it without any code.
Thanks in advance.

elasticsearch aggregations sort on buckets keys

How do i sort elasticsearch aggregations buckets on keys. I have nested aggregations and want to sort on my 2nd aggregation buckets result.
Like I have:
"result": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 20309,
"doc_count": 752,
"Events": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "impression",
"doc_count": 30
},
{
"key": "page_view",
"doc_count": 10
},
...
]
}
},
{
"key": 20771,
"doc_count": 46,
"Events": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "impression",
"doc_count": 32
},
{
"key": "page_view",
"doc_count": 9
},
...
]
}
},
I want my Events aggregate buckets to sort by desc/asc on key impression or on page_view.
How do I achieve such results set?
Here is my query
GET someindex/useractivity/_search?search_type=count
{
"size": 1000000,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"range": {
"created_on": {
"from": "2015-01-12",
"to": "2016-05-12"
}
}
},
{
"term": {
"group_id": 1
}
}
]
}
}
}
},
"aggs": {
"result": {
"terms": {
"field": "entity_id",
"size": 1000000
},
"aggs": {
"Events": {
"terms": {
"field": "event_type",
"min_doc_count": 0,
"size": 10
}
}
}
}
}
}
I have tried using _key, but it sort within the bucket. I want to sort by looking at all buckets. Like I have a key impression. I want my buckets result to be sorted with this key. Not within the bucket.
I want my results set to be like if I want to sort on impression by descending order then my result should be
"buckets": [
{
"key": 20771,
"doc_count": 46,
"Events": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "impression",
"doc_count": 32
},
{
"key": "page_view",
"doc_count": 9
},
...
]
}
},
{
"key": 20309,
"doc_count": 752,
"Events": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "impression",
"doc_count": 30
},
{
"key": "page_view",
"doc_count": 10
},
...
]
}
},
i.e the bucket with maximum impression should be on top. (order buckets by impression in descending order)
Try this aggregation:
{
"size": 0,
"aggs": {
"result": {
"terms": {
"field": "entity_id",
"size": 10,
"order": {
"impression_Events": "desc"
}
},
"aggs": {
"Events": {
"terms": {
"field": "event_type",
"min_doc_count": 0,
"size": 10
}
},
"impression_Events": {
"filter": {
"term": {
"event_type": "impression"
}
}
}
}
}
}
}

How to use elasticsearch facet query to groupby the result

I have a json data in the below format
{
"ID": { "Color": "Black", "Product": "Car" },
"ID": { "Color": "Black", "Product": "Car" },
"ID": { "Color": "Black", "Product": "Van" },
"ID": { "Color": "Black", "Product": "Van" },
"ID": { "Color": "Ash", "Product": "Bike" }
}
I want to calculate the count of car and the corresponding color. I am using elasticsearch facet to do this.
My query
$http.post('http://localhost:9200/product/productinfoinfo/_search?size=5', { "aggregations": { "ProductInfo": { "terms": { "field": "product" } } }, "facets": { "ProductColor": { "terms": { "field": "Color", "size": 10 } } } })
I am getting the output like below
"facets": { "ProductColor": { "_type": "terms", "missing": 0, "total": 7115, "other": 1448, "terms": [ { "term": "Black", "count": 4 }, { "term": "Ash","count":1} },
"aggregations": { "ProductInfo": { "doc_count_error_upper_bound": 94, "sum_other_doc_count": 11414, "buckets": [ { "key": "Car", "doc_count": 2 }, { "key": "Van", "doc_count": 2 }, { "key": "Bike", "doc_count": 1 } ] } } }
What I actually want is,
[ { "key": "Car", "doc_count": 2, "Color":"Black", "count":2 }, { "key": "Van", "doc_count": 2,"Color":"Black", "count":2 }, { "key": "Bike", "doc_count": 1,"Color":"Ash", "count":1 } ]
I would like to groupby the result . Is it possible to do it in elasticsearch query.
Thanks in advance
This is because you're using both aggregations and facets, which, if they are similar, are not meant to be used together.
Facets are deprecated and will be soon removed from ElasticSearch.
Aggregations are the way to go to make "group by"-like queries.
You just have to nest another terms aggregation in the first one, like this :
{
"aggs": {
"By_type": {
"terms": {
"field": "Product"
},
"aggs": {
"By_color": {
"terms": {
"field": "Color"
}
}
}
}
}
}
And the result will be close to what you want :
"aggregations": {
"By_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "bike",
"doc_count": 2,
"By_color": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "ash",
"doc_count": 1
},
{
"key": "black",
"doc_count": 1
}
]
}
},
{
"key": "car",
"doc_count": 2,
"By_color": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "black",
"doc_count": 2
}
]
}
},
{
"key": "van",
"doc_count": 1,
"By_color": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "black",
"doc_count": 1
}
]
}
}
]
}
}

elasticsearch - additional field in aggregation results

I have the following aggregation for Categories
{
"aggs": {
"category": {
"terms": { "field": "category.name" }
}
}
}
// results
"category": {
"buckets": [
{
"key": "computer & office",
"doc_count": 365
},
{
"key": "home & garden",
"doc_count": 171
},
{
"key": "consumer electronics",
"doc_count": 49
},
]
}
How can I pass additional field, like category.id to the category buckets, so I could query by category.id if the certain aggregation is clicked by a user. I'm not really clear how to query aggregations, if there's any direct way or you have to make a new query and pass bucket key to query filters.
Use a sub-aggregation on the category.id, you will do a bit more work when looking at the results, but I think it's better than changing the mapping:
{
"aggs": {
"name": {
"terms": {
"field": "name"
},
"aggs": {
"id": {
"terms": {
"field": "id"
}
}
}
}
}
}
And the results will look like the following:
"aggregations": {
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "consumer electronics",
"doc_count": 2,
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 2
}
]
}
},
{
"key": "computer & office",
"doc_count": 1,
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 5,
"doc_count": 1
}
]
}
},
{
"key": "home & garden",
"doc_count": 1,
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1
}
]
}
},
{
"key": "whatever",
"doc_count": 1,
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 3,
"doc_count": 1
}
]
}
}
]
}
}
You will still have the category name, but now you, also, have the id from the second aggregation as a sub-bucket in the root bucket:
"key": "consumer electronics",
...
"id": {
...
"buckets": [
{
"key": 2,
"doc_count": 2
You could add a sub aggregation:
{
"aggs": {
"category": {
"terms": {
field": "category.name",
"aggs": {
"id": {
"terms": { "field": "category.id" }
}
}
}
}
}
}
This way each category.name bucket will contain a single bucket containing the id for that category.

Resources