Max and min from all index in query - elasticsearch

Is there way to get max and min for all documents in index, not only max and min from category "game" without making another request to elastic?
{
"query": {
"bool": {
"must": [
{
"match": {
"category": "game"
}
}
]
}
},
"aggs": {
"maxPoints": {
"max": {
"field": "points"
}
},
"minPoints": {
"min": {
"field": "points"
}
}
}
Here is some data data i have, with query above I want to get this 2 docs from category game and min 0, max 100 instead of min 10, max 20.
[
{
"id": 1,
"category": "offer",
"points": 0
},
{
"id": 2,
"category": "game",
"points": 10
},
{
"id": 3,
"category": "game",
"points": 20
},
{
"id": 4,
"category": "offer",
"points": 100
}
]

Yeah, just remove the match clause, and add match_all query to include all the documents in your index. Use post_filter to get the expected results in a single ES call.
{
"query": {
"match_all": {}
},
"aggs": {
"maxPoints": {
"max": {
"field": "points"
}
},
"minPoints": {
"min": {
"field": "points"
}
}
},
"post_filter": { // Note this
"term": {
"category": "game"
}
}
}
Output
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65406564",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"id": 2,
"category": "game",
"points": 10
}
},
{
"_index": "65406564",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"id": 3,
"category": "game",
"points": 20
}
}
]
},
"aggregations": {
"maxPoints": {
"value": 100.0
},
"minPoints": {
"value": 0.0
}
}
}

Related

ElasticSearch aggregation return entire sub object, not just the key

newbies with ElasticSearch we have docs indexed with following structure:
{
"Id": 1246761,
"ContentTypeName": "Official Statement",
"Title": "Official statement Title",
"Categories": [
{
"Id": 3,
"Type": 1,
"Name": "Category A",
"ParentId": 0
},
{
"Id": 10,
"Type": 3,
"Name": "Category B",
"ParentId": 0
},
{
"Id": 426,
"Type": 7,
"Name": "Category C",
"ParentId": 0
}
]
}
The requirement is to get the aggregated list of categories + document count matching a keyword search.
So far our query looks like this:
GET _search
{
"query": {
"match_all": {}
},
"size": 0,
"aggs": {
"my-agg-name": {
"terms": {
"field": "Categories.Id"
}
}
}
}
Result is
{
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"my-agg-name" : {
"doc_count_error_upper_bound" : 23845,
"sum_other_doc_count" : 1068245,
"buckets" : [
{
"key" : 426,
"doc_count" : 112651
},
{
"key" : 10,
"doc_count" : 91146
},
....
]
}
}
}
Is there a way to get back the entire Category object, not only the Id ?
Or serialize the category object into string as the key ?
You need to use nested aggregation to achieve your required use case
Adding a working example with index mapping, search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"Categories": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"match_all": {}
},
"aggs": {
"resellers": {
"nested": {
"path": "Categories"
},
"aggs": {
"my-agg-name": {
"terms": {
"field": "Categories.Id"
},
"aggs": {
"categories-doc": {
"top_hits": {
"_source": {
"includes": [
"Categories.Id",
"Categories.Type",
"Categories.Name",
"Categories.ParentId"
]
},
"size": 1
}
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"resellers": {
"doc_count": 3,
"my-agg-name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 3, // note this
"doc_count": 1,
"categories-doc": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65847850",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "Categories",
"offset": 0
},
"_score": 1.0,
"_source": {
"ParentId": 0,
"Type": 1,
"Id": 3, // note this
"Name": "Category A"
}
}
]
}
}
},
{
"key": 10,
"doc_count": 1,
"categories-doc": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65847850",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "Categories",
"offset": 1
},
"_score": 1.0,
"_source": {
"ParentId": 0,
"Type": 3,
"Id": 10,
"Name": "Category B"
}
}
]
}
}
},
{
"key": 426,
"doc_count": 1,
"categories-doc": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65847850",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "Categories",
"offset": 2
},
"_score": 1.0,
"_source": {
"ParentId": 0,
"Type": 7,
"Id": 426,
"Name": "Category C"
}
}
]
}
}
}
]
}
}
}

How can i get the ALL lastest record with each group by Elasticsearch query?

I have reference from this how-to-get-latest-values-for-each-group-with-an-elasticsearch-query
and now i do the search, but the aggregations only return 10 doc for me, how can it show all match result? I ONLY show two since its too long for the return repsonse , thanks!
my ES query is :
{
"size" :1,
"aggs": {
"group": {
"terms": {
"field": "studentId"
},
"aggs": {
"group_docs": {
"top_hits": {
"size": 1,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}
}
}
}
}
}
and the result:
{
"took": 32,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 10000,
"relation": "gte"
},
"max_score": 1.0,
"hits": [
{
"_index": "data",
"_type": "class",
"_id": "N-wsrHYB4zCrGLTdS7Ur",
"_score": 1.0,
"_source": {
"studentId": 144,
"timestampstring": "2020-09-02 05:58:04.828",
"type": "data"
}
}
]
},
"aggregations": {
"group": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 99670,
"buckets": [
{
"key": 131,
"doc_count": 579,
"group_docs": {
"hits": {
"total": {
"value": 579,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "data",
"_type": "class",
"_id": "SVVj4HYBlaUrIoJst3-o",
"_score": null,
"_source": {
"studentId": 131,
"timestampstring": "2021-01-08 13:06:34.413",
"type": "data"
},
"sort": [
1609340059767
]
}
]
}
}
},
{
"key": 147,
"doc_count": 529,
"group_docs": {
"hits": {
"total": {
"value": 529,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "data",
"_type": "class",
"_id": "SVVj4HYBlaUrIoJst3-o",
"_score": null,
"_source": {
"studentId": 147,
"timestampstring": "2021-01-08 13:06:34.413",
"type": "data"
},
"sort": [
1610082394413
]
}
]
}
}
}
]
}
}
}
You need to add the size param in the terms aggregation
The size parameter can be set to define how many term buckets should
be returned out of the overall terms list.
{
"size": 1,
"aggs": {
"group": {
"terms": {
"field": "studentId",
"size": 100 // note this
},
"aggs": {
"group_docs": {
"top_hits": {
"size": 1,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}
}
}
}
}
}
Update 1:
You can use stats bucket aggregation, to get the count of unique studenid
{
"size": 1,
"aggs": {
"group": {
"terms": {
"field": "studentId",
"size": 100 // note this
},
"aggs": {
"group_docs": {
"top_hits": {
"size": 1,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}
}
}
},
"bucketcount": {
"stats_bucket": {
"buckets_path": "group._count"
}
}
}
}

How to get sum value for fields based on input field in elasticsearch (input field and sum output fields are different)

This is document present in elastic search and wants to output based fields in which it returns the sum of the high and medium and which be greater than zero, the value of high and medium must be greater than > 0
{
"host_id": 1,
"hostname": "Hostname1",
"businesshierarchy": {
"businessunit": "NON Unit",
"Location":"Un",
"Application":"App1"
},
"updatedts": 1601894092,
"critical": 0,
"high": 1,
"medium": 1,
"low": 0
},
{
"host_id": 2,
"hostname": "Hostname2",
"businesshierarchy": {
"businessunit": "One Unit",
"Location":"Un",
"Application":"App2"
},
"updatedts": 1601894092,
"critical": 0,
"high": 1,
"medium": 2,
"low": 0
},
{
"host_id": 3,
"hostname": "Hostname3",
"businesshierarchy": {
"businessunit": "NON Unit",
"Location":"Uk",
"Application":"App2"
},
"updatedts": 1601894092,
"critical": 0,
"high": 2,
"medium": 2,
"low": 0
}
Is there are any query or method to get output like in elastic search?
based on location
Location - Un
High - 2
medium - 3
Location - Uk
High - 2
medium - 2
Based on application
Application - App1
High - 1
medium - 1
Application - App2
High - 3
medium - 4
or based on hostname
hostname - Hostname1
High - 1
medium - 1
hostname - Hostname2
High - 1
medium - 2
hostname - Hostname3
High - 2
medium - 2
Similarly for businessunit. The field name passed dynamically like businessunit, hostname, application, location-based on it want to get count High and medium value like the above output.
Adding a working example with index mapping, index data(same as that given in question), search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"hostname": {
"type": "keyword"
},
"businesshierarchy": {
"properties": {
"Location": {
"type": "keyword"
},
"Application": {
"type": "keyword"
}
}
}
}
}
}
Search Query:
{
"size": 0,
"aggs": {
"user": {
"terms": {
"field": "businesshierarchy.Location"
},
"aggs": {
"top_user_hits": {
"top_hits": {
"_source": {
"includes": [
"high",
"medium"
]
}
}
},
"high_sum": {
"sum": {
"field": "high"
}
},
"medium_sum": {
"sum": {
"field": "medium"
}
}
}
}
}
}
Search Result:
Based on the location
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Un",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
},
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0 <-- note this
},
"medium_sum": {
"value": 3.0
}
},
{
"key": "Uk",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0 <-- note this
},
"medium_sum": {
"value": 2.0
}
}
]
}
For querying on the basis of application replace terms aggregation like this:
"aggs": {
"user": {
"terms": {
"field": "businesshierarchy.Application"
},
The following search result will be there:
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "App2",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
},
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 3.0
},
"medium_sum": {
"value": 4.0
}
},
{
"key": "App1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 1.0
}
}
]
}
For querying on the basis of hostname replace terms aggregation like this:
"aggs": {
"user": {
"terms": {
"field": "hostname"
},
Search Results will be :
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Hostname1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 1.0
}
},
{
"key": "Hostname2",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 2.0
}
},
{
"key": "Hostname3",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0
},
"medium_sum": {
"value": 2.0
}
}
]
}
we can use this query to get the excepted result
{
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{
"range": {
"medium": {
"gt": 0
}
}
},
{
"range": {
"high": {
"gt": 0
}
}
}
]
}
}
]
}
},
"aggs": {
"fieldnames": {
"terms": {
"field": "hostname.keyword"
},
"aggs": {
"medium": {
"sum": {
"field": "medium"
}
},
"high": {
"sum": {
"field": "high"
}
}
}
}
},
"size": 0
}
Search result for this look like this
"aggregations": {
"fieldnames": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "ALL Unit",
"doc_count": 1,
"high": {
"value": 0.0
},
"medium": {
"value": 7.0
}
},
{
"key": "Latest Unit",
"doc_count": 1,
"high": {
"value": 0.0
},
"medium": {
"value": 5.0
}
},
{
"key": "NO Unit",
"doc_count": 1,
"high": {
"value": 1.0
},
"medium": {
"value": 1.0
}
}
]
}
}
In case if we need the result for location and application, just need to change
for Location
"aggs": {
"fieldnames": {
"terms": {
"field": "businesshierarchy.Application.keyword"
}
for Application
"aggs": {
"fieldnames": {
"terms": {
"field": "businesshierarchy.Location.keyword"
}
if the mapping is something like this,
{
"mappings": {
"properties": {
"hostname": {
"type": "keyword"
},
"businesshierarchy": {
"properties": {
"Location": {
"type": "keyword"
},
"Application": {
"type": "keyword"
}
}
}
}
}
}
There is no need for adding .keyword to
"terms": {
"field": "businesshierarchy.Location"
}

Filtering documents after aggregation

In Elasticsearch, I am storing item state snapshots in an append-only scheme.
For example:
POST /item/item
{
"id": "1",
"time": "2018-09-19T00:00:00Z",
status": "ON_HOLD"
}
POST /item/item
{
"id": "2",
"time": "2018-09-19T00:01:00Z",
"status": "ON_HOLD"
}
POST /item/item
{
"id": "2",
"time": "2018-09-19T00:02:00Z",
"status": "DONE"
}
Now, what I wish to achieve is answer the following question: what items are still on hold? (status==ON_HOLD).
In this simple case, the answer would be:
{
"id": "1",
"time": "2018-09-19T00:00:00Z",
status": "ON_HOLD"
}
So, in order to get the last state of an item, I use a terms aggregation, on id, like so:
GET /item/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"id": {
"terms": {
"field": "id.keyword",
"size": 10
},
"aggs": {
"top_items": {
"top_hits": {
"size": 1,
"sort": [
{
"time": {
"order": "desc"
}
}
],
"_source": {
"includes": ["*"]
}
}
}
}
}
}
}
This gives me the last available state of each item identified by its id:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 2,
"top_items": {
"hits": {
"total": 2,
"max_score": null,
"hits": [
{
"_index": "item",
"_type": "item",
"_id": "S-5eCGYBNyILygyml2jR",
"_score": null,
"_source": {
"id": "2",
"time": "2018-09-19T00:02:00Z",
"status": "DONE"
},
"sort": [
1537315320000
]
}
]
}
}
},
{
"key": "1",
"doc_count": 1,
"top_items": {
"hits": {
"total": 1,
"max_score": null,
"hits": [
{
"_index": "item",
"_type": "item",
"_id": "Se5eCGYBNyILygymjmg0",
"_score": null,
"_source": {
"id": "1",
"time": "2018-09-19T00:00:00Z",
"status": "ON_HOLD"
},
"sort": [
1537315200000
]
}
]
}
}
}
]
}
}
}
Now the problem is I would like to filter the result (after aggregation) on Elasticsearch's side (not client).
I tried a bucket_selector aggregation but it complains since the top_hits result is not a number or single value numeric aggregation.
I also tried to add a script_field to get a numeric value but cannot seem to use this after:
"script_fields": {
"on_hold": {
"script": {
"lang": "painless",
"source": "doc['status.keyword'].value == 'ON_HOLD' ? 1 : 0"
}
}
}
Is what I want to do even possible on Elasticsearch's side or do I have to do it on the client side?
PS: adding the filter before the aggregation does not provide correct result as it will return items who have been ON_HOLD at any point in time.
EDIT:
Alright I am getting somewhere with:
GET /item/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"id": {
"terms": {
"field": "id.keyword",
"size": 50
},
"aggs": {
"top_item": {
"terms": {
"size": 1,
"field": "time",
"order": {
"_key": "desc"
}
},
"aggs": {
"on_hold": {
"filter": {
"term": {
"status.keyword": "ON_HOLD"
}
},
"aggs": {
"document": {
"top_hits": {
"size": 1,
"_source": ["*"]
}
}
}
}
}
}
}
}
}
}
The top_hits aggregation is a metrics and not a bucket aggregation, so it does not do the job and must be used last.
One last problem though: filtered out buckets leave empty leaves:
"hits": []
Is there any way to remove such branches ending in empty leaves from the result tree? Thanks
Alright, I found the complete solution to the problem, including filtering out empty branches in the aggregation tree:
GET /item/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"id": {
"terms": {
"field": "id.keyword",
"size": 50
},
"aggs": {
"top_item": {
"terms": {
"size": 1,
"field": "time",
"order": {
"_key": "desc"
}
},
"aggs": {
"on_hold": {
"filter": {
"term": {
"status.keyword": "ON_HOLD"
}
},
"aggs": {
"document": {
"top_hits": {
"size": 1,
"_source": ["*"]
}
}
}
},
"remove_filtered": {
"bucket_selector": {
"buckets_path": {
"count": "on_hold._count"
},
"script": {
"source": "params.count != 0"
}
}
}
}
},
"remove_empty": {
"bucket_selector": {
"buckets_path": {
"count": "top_item._bucket_count"
},
"script": "params.count != 0"
}
}
}
}
}
}
This gives the following output which was expected:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1",
"doc_count": 1,
"top_item": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1537315200000,
"key_as_string": "2018-09-19T00:00:00.000Z",
"doc_count": 1,
"on_hold": {
"doc_count": 1,
"document": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "item",
"_type": "item",
"_id": "HvywM2YB5Ei0wOZMeia9",
"_score": 1,
"_source": {
"id": "1",
"time": "2018-09-19T00:00:00Z",
"status": "ON_HOLD"
}
}
]
}
}
}
}
]
}
}
]
}
}
}

ElasticSearch Max of Max?

I need to search for max time (Recent entries), out of those max entries I want one with max value.
I've tried various types of nesting on aggregation, using filters,etc. but it doesn't seem to work out. Any help?
Example :-
Mapping -
{
"trytime": {
"mappings": {
"value": {
"properties": {
"time": {
"type": "long"
},
"value": {
"type": "long"
}
}
}
}
}
}
Values :-
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 1,
"hits": [
{
"_index": "trytime",
"_type": "value",
"_id": "2",
"_score": 1,
"_source": {
"time": 9,
"value": 5
}
},
{
"_index": "trytime",
"_type": "value",
"_id": "4",
"_score": 1,
"_source": {
"time": 6,
"value": 10
}
},
{
"_index": "trytime",
"_type": "value",
"_id": "1",
"_score": 1,
"_source": {
"time": 9,
"value": 6
}
},
{
"_index": "trytime",
"_type": "value",
"_id": "3",
"_score": 1,
"_source": {
"time": 9,
"value": 9
}
}
]
}
}
I need the maximum time and maximum value corresponding to such time (There will be multiple values corresponding to a particular time).
Query :-
GET /trytime/_search
{
"size": 0,
"aggs":{
"max_Value": {
"max": {
"field": "value"
}
}
},
"query": {
"bool": {
"must": [
{
"range": {
"time": {
"gte": ___NEED-MAX-TIME-VALUE-HERE___
}
}
}
]
}
}
}
If I understood your question correctly, this nested aggregation should give you 3 latest times and for each time you get the max value. Set size to one if want only one result.
{
"size": 0,
"aggs": {
"times": {
"terms": {
"field": "time",
"size": 3,
"order": {
"_term": "desc"
}
},
"aggs": {
"max_vals": {
"max": {
"field": "value"
}
}
}
}
}
}
I'm not sure if this could be achieved by date histogram aggregation as well.

Resources