How to aggregate data by an field in elasticsearch? - elasticsearch

I'm using kibana 4.4.1 and in elasticsearch I store the status of PC, only when PC status is changed (open, closed, warings, etc)
My data into Elasticsearch looks like:
{ "status_id":1 , "pc":"lpt001" , "date":"2016-10-25T17:49:00Z" }
{ "status_id":3 , "pc":"lpt001" , "date":"2016-10-25T15:48:00Z" }
{ "status_id":4 , "pc":"lpt002" , "date":"2016-10-25T15:46:00Z" }
{ "status_id":1 , "pc":"lpt002" , "date":"2016-10-25T12:48:00Z" }
And I what to get the newest record in order to have at any time how many PC's are opened, closed or have some issues.
My query is like:
GET cb-2016.10.26/_search
{
"query": {
"match_all": { }
},
"sort": [
{
"date": {
"order": "desc"
}
}
],
"aggs": {
"max_date":{
"max": {
"field": "date"
}
}
}
}
And the result is:
"aggregations": {
"max_date": {
"value": 1477417680000,
"value_as_string": "2016-10-25T17:48:00.000Z"
}
}
But What I want is to have that max_date for each "pc": "lpt001", "lpt002".
There is any way to split max_date by "pc" field? I read something about bucket aggregations but I did not reach the result.
Thank you

Yes, you can do it like this using a terms aggregation for the pc field and then move the max_date to a sub-aggregation of the terms one:
POST cb-2016.10.26/_search
{
"query": {
"match_all": { }
},
"sort": [
{
"date": {
"order": "desc"
}
}
],
"aggs": {
"pcs": {
"terms": {
"field": "pc"
},
"aggs": {
"max_date":{
"max": {
"field": "date"
}
}
}
}
}
}

the final query looks like:
{
"query": {
"match_all": { }
},
"aggs" : {
"pcstatus" : {
"terms" : {
"field" : "pc"
},
"aggs": {
"top_date_hit": {
"top_hits": {
"sort": [
{
"date": {
"order": "desc"
}
}
],
"size" : 1
}
}
}
}
}
}

Related

How to get the last Elasticsearch document for each unique value of a field?

I have a data structure in Elasticsearch that looks like:
{
"name": "abc",
"date": "2022-10-08T21:30:40.000Z",
"rank": 3
}
I want to get, for each unique name, the rank of the document (or the whole document) with the most recent date.
I currently have this:
"aggs": {
"group-by-name": {
"terms": {
"field": "name"
},
"aggs": {
"max-date": {
"max": {
"field": "date"
}
}
}
}
}
How can I get the rank (or the whole document) for each result, and if possible, in 1 request ?
You can use below options
Collapse
"collapse": {
"field": "name"
},
"sort": [
{
"date": {
"order": "desc"
}
}
]
Top hits aggregation
{
"aggs": {
"group-by-name": {
"terms": {
"field": "name",
"size": 100
},
"aggs": {
"top_doc": {
"top_hits": {
"sort": [
{
"date": {
"order": "desc"
}
}
],
"size": 1
}
}
}
}
}
}

How to sort buckets aggregation from query with wildcard?

I'm can't figure out how sorting the buckets results. I'm using ES 6.3 and following suggested docs. I'm trying to sort the results putting "bucket_sort" aggregation, but getting error. The follow query works but returns the buckets in same order no matter I put "sort" clause with 'asc' or 'desc' after "query" body:
{
"query": {
"bool":{
"filter":{
"wildcard": {
"datas.295.keyword": {
"value":"*w*"
}
}
}
}
},
"sort":[
{
"datas.295.keyword": {
"order" : "desc"
}
}
],
"aggs": {
"AGGREGATE_UNIQUE_VALUES_FROM_REPEATED": {
"terms": {
"field": "datas.295.keyword"
}
}
}}
returning records matchs with operating system windows XP, Windows Vista etc. But How to sort it in ascending order? I'm try this:
{
"query": {
"bool":{
"filter":{
"wildcard": {
"datas.295.keyword": {
"value":"*w*"
}
}
}
}
},
"aggs": {
"AGGREGATE_UNIQUE_VALUES_FROM_REPEATED": {
"terms": {
"field": "datas.295.keyword"
},
"aggs": {
"bucket_sort":{
"sort": [
{
"datas.295.keyword": {"order": "asc"}
}
]
}
}
}
}}
This query raising 'Expected [START_OBJECT] under [sort], but got a [START_ARRAY] in [bucket_sort]' error
Thank for read!
The hits and aggs are separate parts of the API. What you need is the terms' bucket order:
{
"query": {
"bool": {
"filter": {
"wildcard": {
"datas.295.keyword": {
"value": "*w*"
}
}
}
}
},
"sort": [
{
"datas.295.keyword": {
"order": "desc"
}
}
],
"aggs": {
"AGGREGATE_UNIQUE_VALUES_FROM_REPEATED": {
"terms": {
"field": "datas.295.keyword",
"order": {
"_key": "desc"
}
}
}
}
}

Elasticsearch: Aggregation on filtered nested objects to find unique values

I have an array of objects (tags) in each document in Elasticsearch 5:
{
"tags": [
{ "key": "tag1", "value": "val1" },
{ "key": "tag2", "value": "val2" },
...
]
}
Now I want to find unique tag values for a certain tag key. Something similiar to this SQL query:
SELECT DISTINCT(tags.value) FROM tags WHERE tags.key='some-key'
I have came to this DSL so far:
{
"size": 0,
"aggs": {
"my_tags": {
"nested": {
"path": "tags"
},
"aggs": {
"filter" : { "terms": { "tags.key": "tag1" } },
"aggs": {
"my_tags_values": {
"terms" : {
"field" : "tags.value",
"size": 9999
}
}
}
}
}
}
}
But It is showing me this error:
[terms] unknown field [tags.key], parser not found.
Is this the right approach to solve the problem? Thanks for your help.
Note: I have declared the tags field as a nested field in my mapping.
You mixed up things there. You wanted probably to add a filter aggregation, but you didn't give it any name:
{
"size": 0,
"aggs": {
"my_tags": {
"nested": {
"path": "tags"
},
"aggs": {
"my_filter": {
"filter": {
"terms": {
"tags.key": [
"tag1"
]
}
},
"aggs": {
"my_tags_values": {
"terms": {
"field": "tags.value",
"size": 9999
}
}
}
}
}
}
}
}
Try Bool Query inside the Filter-Aggregation:
{
"size": 0,
"aggs": {
"my_tags": {
"nested": {
"path": "tags"
},
"aggs": {
"filter": {
"bool": {
"must": [
{
"term": {
"tags.key": "tag1"
}
}
]
},
"aggs": {
"my_tags_values": {
"terms": {
"field": "tags.value",
"size": 0
}
}
}
}
}
}
}
}
BTW: if you want to retrieve all buckets, you can write 0 instead of 9999 in aggregation size.

Elasticsearch - get terms aggregation for specified fields

I am using terms aggregations to get all the no of users from each city
{
"aggs" : {
"cities" : {
"terms" : { "field" : "city.name" }
}
}
}
This is giving results. But I always want to get some specific cities in results of aggregation irrespective of whether they are in top 10 or not. Do I need to use filter aggregation for each of the city separately to get its result?
You have three solutions:
A. You can specify a filter in the query:
{
"query": {
"terms": {
"city.name": [ "city1", "city2", "city3" ]
}
},
"aggs": {
"cities": {
"terms": {
"field": "city.name"
}
}
}
}
B. You can specify a filter in the aggregations:
{
"aggs": {
"city_filter": {
"filter": {
"terms": {
"city.name": [
"city1",
"city2",
"city3"
]
}
},
"aggs": {
"cities": {
"terms": {
"field": "city.name"
}
}
}
}
}
}
C. You can filter values in the terms aggregation:
{
"aggs": {
"cities": {
"terms": {
"field": "city.name",
"include": "city1*",
"exclude": "city2*"
}
}
}
}

For each country/colour/brand combination , find sum of number of items in elasticsearch

This is a portion of the data I have indexed in elasticsearch:
{
"country" : "India",
"colour" : "white",
"brand" : "sony"
"numberOfItems" : 3
}
I want to get the total sum of numberOfItems on a per country basis, per colour basis and per brand basis. Is there any way to do this in elasticsearch?
The following should land you straight to the answer.
Make sure you enable scripting before using it.
{
"aggs": {
"keys": {
"terms": {
"script": "doc['country'].value + doc['color'].value + doc['brand'].value"
},
"aggs": {
"keySum": {
"sum": {
"field": "numberOfItems"
}
}
}
}
}
}
To get a single result you may use sum aggregation applied to a filtered query with term (terms) filter, e.g.:
{
"query": {
"filtered": {
"filter": {
"term": {
"country": "India"
}
}
}
},
"aggs": {
"total_sum": {
"sum": {
"field": "numberOfItems"
}
}
}
}
To get statistics for all countries/colours/brands in a single pass over the data you may use the following query with 3 multi-bucket aggregations, each of them containing a single-bucket sum sub-aggregation:
{
"query": {
"match_all": {}
},
"aggs": {
"countries": {
"terms": {
"field": "country"
},
"aggs": {
"country_sum": {
"sum": {
"field": "numberOfItems"
}
}
}
},
"colours": {
"terms": {
"field": "colour"
},
"aggs": {
"colour_sum": {
"sum": {
"field": "numberOfItems"
}
}
}
},
"brands": {
"terms": {
"field": "brand"
},
"aggs": {
"brand_sum": {
"sum": {
"field": "numberOfItems"
}
}
}
}
}
}

Resources