aggregation elastic search query with sum - elasticsearch

This is my current data. i want an aggregate query to return variantId sum of quantity based on type in/out.
hits: {
total: {
value: 5,
relation: "eq",
},
max_score: 1,
hits: [
{
_index: "transactions",
_type: "_doc",
_id: "out2391",
_score: 1,
_source: {
date: "2021-03-08",
transactionId: 2391,
brandId: 1112,
outletId: 121222,
variantId: 1321,
qty: 1,
closing: 10,
type: "out",
}
],
},
I want result that returns sum of quantity for type in/out for variants
[{
variantId: 1321,
in: sum(qty),
out: sum(qty)
},
{
variantId: 13211,
in: sum(qty),
out: sum(qty)
}
]

Ingest test documents
POST test_shaheer/_doc
{
"date": "2021-03-08",
"transactionId": 2391,
"brandId": 1112,
"outletId": 121222,
"variantId": 1321,
"qty": 1,
"closing": 10,
"type": "out"
}
POST test_shaheer/_doc
{
"date": "2021-03-08",
"transactionId": 2391,
"brandId": 1112,
"outletId": 121222,
"variantId": 1321,
"qty": 1,
"closing": 10,
"type": "out"
}
POST test_shaheer/_doc
{
"date": "2021-03-08",
"transactionId": 2391,
"brandId": 1112,
"outletId": 121222,
"variantId": 1321,
"qty": 5,
"closing": 10,
"type": "in"
}
POST test_shaheer/_doc
{
"date": "2021-03-08",
"transactionId": 2391,
"brandId": 1112,
"outletId": 121222,
"variantId": 1321,
"qty": 2,
"closing": 10,
"type": "in"
}
To achieve what you need you have nest aggregations , first you group by variantId, then each variantId by type, and finally you do a sum on the qty field inside each type.
Query
POST test_shaheer/_search
{
"size": 0,
"aggs": {
"variant_ids": {
"terms": {
"field": "variantId",
"size": 10
},
"aggs": {
"types": {
"terms": {
"field": "type.keyword",
"size": 10
},
"aggs": {
"qty_sum": {
"sum": {
"field": "qty"
}
}
}
}
}
}
}
}
Note size 0 to not show results.
Response
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"variant_ids" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 1321,
"doc_count" : 4,
"types" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "in",
"doc_count" : 2,
"qty_sum" : {
"value" : 7.0
}
},
{
"key" : "out",
"doc_count" : 2,
"qty_sum" : {
"value" : 2.0
}
}
]
}
}
]
}
}
}

Related

How to aggs on particular field in elasticsearch

I have dictionary below
{
'id': 0,
'Title': 'Wolf',
'Major Genre': Action,
'IMDB': "7"
},
{
'id': 1,
'Title': 'The Land Girls',
'Major Genre': Drama,
'IMDB': "7"
},
{
'id': 2,
'Title': 'Beauty',
'Major Genre': Comedy,
'IMDB': "5"
}
Need to find the aggregation function for Major Genre
Need to filter the output which is having Major Genre == Comedy which IMDB >6
I have done below and i got error
{
"size": 100,
"aggregations": {
"terms": {
"Major Genre": "Comedy"
}
}
}
Edit: splitted queries
Filtering documents by Comedy genre
POST test_nons/_search
{
"query": {
"bool": {
"filter": [
{
"range": {
"IMDB": {
"gte": 4
}
}
},
{
"term": {
"Major Genre.keyword": "Comedy"
}
}
]
}
}
}
Getting all possible genres
POST test_nons/_search
{
"size": 0,
"aggs": {
"major_genres": {
"terms": {
"field": "Major Genre.keyword",
"size": 10
}
}
}
}
Ingest data
POST test_nons/_doc
{
"id": 0,
"Title": "Wolf",
"Major Genre": "Action",
"IMDB": "7"
}
POST test_nons/_doc
{
"id": 1,
"Title": "The Land Girls",
"Major Genre": "Drama",
"IMDB": "7"
}
POST test_nons/_doc
{
"id": 2,
"Title": "Beauty",
"Major Genre": "Comedy",
"IMDB": "5"
}
Request
POST test_nons/_search
{
"query": {
"bool": {
"filter": [
{
"range": {
"IMDB": {
"gte": 6
}
}
},
{
"term": {
"Major Genre.keyword": "Comedy"
}
}
]
}
},
"aggs": {
"major_genres": {
"terms": {
"field": "Major Genre.keyword",
"size": 10
}
}
}
}
Response
There are no docs with Comedy genre and IMBD > 6 so response would be empty.
For example purposes I will filter by IMDB > 4 instead of 6 to have some data in the response.
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "test_nons",
"_type" : "_doc",
"_id" : "Rcd06ncB50NMsuQPeVRj",
"_score" : 0.0,
"_source" : {
"id" : 2,
"Title" : "Beauty",
"Major Genre" : "Comedy",
"IMDB" : "5"
}
}
]
},
"aggregations" : {
"major_genres" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Comedy",
"doc_count" : 1
}
]
}
}
}

I need an Elasticsearch query to display data in Grafana

Pretend I have documents a,b,c,d which has fields site_name, device_name, Interface_name and utilization. I need to display the Max Utilization for a device_name for each Interface_name for each site_name.
Here is the sample data:
**Site Device Interface Name Utilization**
TYO tyo-gb1 TenGigabitEthernet1 33,23,699
TYO tyo-gb1 TenGigabitEthernet1 38,92,992
TYO tyo-gb2 TenGigabitEthernet2 98,824
TYO tyo-gb2 TenGigabitEthernet2 49,187
SYD syd-gb1 GigabitEthernet1 52,800
SYD syd-gb1 GigabitEthernet1 71,572
STLD stld-gb1 GigabitEthernet1 1,62,886
STLD stld-gb1 GigabitEthernet1 40,977
I need to display like this:
**Site Device Interface Name Utilization**
TYO tyo-gb1 TenGigabitEthernet1 38,92,992
TYO tyo-gb2 TenGigabitEthernet2 98,824
SYD syd-gb1 GigabitEthernet1 71,572
STLD stld-gb1 GigabitEthernet1 1,62,886
Thanks in advance!
You can use this query
{
"size": 0,
"_source": false,
"stored_fields": "_none_",
"aggregations": {
"groupby": {
"composite": {
"size": 1000,
"sources": [
{
"Site": {
"terms": {
"field": "Site",
"missing_bucket": true,
"order": "asc"
}
}
},
{
"Device": {
"terms": {
"field": "Device",
"missing_bucket": true,
"order": "asc"
}
}
},
{
"Interface Name": {
"terms": {
"field": "Interface Name",
"missing_bucket": true,
"order": "asc"
}
}
}
]
},
"aggregations": {
"Utilization Sum": {
"sum": {
"field": "Utilization"
}
}
}
}
}
}
Data ingest
POST test_nagendra/_doc
{
"site_name": "TYO",
"device_name": "tyo-gb1",
"interface_name": "TenGigabitEthernet1",
"utilization": 3323699
}
POST test_nagendra/_doc
{
"site_name": "TYO",
"device_name": "tyo-gb1",
"interface_name": "TenGigabitEthernet1",
"utilization": 3892992
}
POST test_nagendra/_doc
{
"site_name": "TYO",
"device_name": "tyo-gb2",
"interface_name": "TenGigabitEthernet2",
"utilization": 98824
}
POST test_nagendra/_doc
{
"site_name": "TYO",
"device_name": "tyo-gb2",
"interface_name": "TenGigabitEthernet2",
"utilization": 49187
}
POST test_nagendra/_doc
{
"site_name": "SYD",
"device_name": "syd-gb1",
"interface_name": "GigabitEthernet1",
"utilization": 52800
}
POST test_nagendra/_doc
{
"site_name": "SYD",
"device_name": "syd-gb1",
"interface_name": "GigabitEthernet1",
"utilization": 71572
}
POST test_nagendra/_doc
{
"site_name": "STLD",
"device_name": "stld-gb1",
"interface_name": "GigabitEthernet1",
"utilization": 162886
}
POST test_nagendra/_doc
{
"site_name": "STLD",
"device_name": "stld-gb1",
"interface_name": "GigabitEthernet1",
"utilization": 40977
}
Query
POST test_nagendra/_search
{
"size": 0,
"aggs": {
"sites": {
"terms": {
"field": "site_name.keyword",
"size": 10
},
"aggs": {
"devices": {
"terms": {
"field": "device_name.keyword",
"size": 10
},
"aggs": {
"interfaces": {
"terms": {
"field": "interface_name.keyword",
"size": 10
},
"aggs": {
"max_utilization": {
"max": {
"field": "utilization"
}
}
}
}
}
}
}
}
}
}
Response
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 8,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"sites" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "TYO",
"doc_count" : 4,
"devices" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "tyo-gb1",
"doc_count" : 2,
"interfaces" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "TenGigabitEthernet1",
"doc_count" : 2,
"max_utilization" : {
"value" : 3892992.0
}
}
]
}
},
{
"key" : "tyo-gb2",
"doc_count" : 2,
"interfaces" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "TenGigabitEthernet2",
"doc_count" : 2,
"max_utilization" : {
"value" : 98824.0
}
}
]
}
}
]
}
},
{
"key" : "STLD",
"doc_count" : 2,
"devices" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "stld-gb1",
"doc_count" : 2,
"interfaces" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "GigabitEthernet1",
"doc_count" : 2,
"max_utilization" : {
"value" : 162886.0
}
}
]
}
}
]
}
},
{
"key" : "SYD",
"doc_count" : 2,
"devices" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "syd-gb1",
"doc_count" : 2,
"interfaces" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "GigabitEthernet1",
"doc_count" : 2,
"max_utilization" : {
"value" : 71572.0
}
}
]
}
}
]
}
}
]
}
}
}

How to get multiple fields returned in elasticsearch query?

How to get multiple fields returned that are unique using elasticsearch query?
All of my documents have duplicate name and job fields. I would like to use an es query to get all the unique values which include the name and job in the same response, so they are tied together.
[
{
"name": "albert",
"job": "teacher",
"dob": "11/22/91"
},
{
"name": "albert",
"job": "teacher",
"dob": "11/22/91"
},
{
"name": "albert",
"job": "teacher",
"dob": "11/22/91"
},
{
"name": "justin",
"job": "engineer",
"dob": "1/2/93"
},
{
"name": "justin",
"job": "engineer",
"dob": "1/2/93"
},
{
"name": "luffy",
"job": "rubber man",
"dob": "1/2/99"
}
]
Expected result in any format -> I was trying to use aggs but I only get one field
[
{
"name": "albert",
"job": "teacher"
},
{
"name": "justin",
"job": "engineer"
},
{
"name": "luffy",
"job": "rubber man"
},
]
This is what I tried so far
GET name.test.index/_search
{
"size": 0,
"aggs" : {
"name" : {
"terms" : { "field" : "name.keyword" }
}
}
}
using the above query gets me this which is good that its unique
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 95,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Justin",
"doc_count" : 56
},
{
"key" : "Luffy",
"doc_count" : 31
},
{
"key" : "Albert",
"doc_count" : 8
}
]
}
}
}
I tried doing nested aggregation but that did not work. Is there an alternative solution for getting multiple unique values or am I missing something?
That's a good start! There are a few ways to achieve what you want, each provides a different response format, so you can decide which one you prefer.
The first option is to leverage the top_hits sub-aggregation and return the two fields for each name bucket:
GET name.test.index/_search
{
"size": 0,
"aggs": {
"name": {
"terms": {
"field": "name.keyword"
},
"aggs": {
"top": {
"top_hits": {
"_source": [
"name",
"job"
],
"size": 1
}
}
}
}
}
}
The second option is to use a script in your terms aggregation instead of a field to return a compound value:
GET name.test.index/_search
{
"size": 0,
"aggs": {
"name": {
"terms": {
"script": "doc['name'].value + ' - ' + doc['job'].value"
}
}
}
}
The third option is to use two levels of field collapsing:
GET name.test.index/_search
{
"collapse": {
"field": "name",
"inner_hits": {
"name": "by_job",
"collapse": {
"field": "job"
},
"size": 1
}
}
}

Issue with nested aggregations ElasticSearch : doing a sum after a max

I know sub aggregation isn't possible with metric aggregations and that Elasticsearch supports sub aggregations with buckets. But I am a bit lost on how to do this.
I want to do a sum after nested aggregations and after having aggregated by max timestamp.
Something like the code below, give me this error : "Aggregator [max_date_aggs] of type [max] cannot accept sub-aggregations" which is normal. Is there a way to make it works?
{
"aggs": {
"sender_comp_aggs": {
"terms": {
"field": "senderComponent"
},
"aggs": {
"activity_mnemo_aggs": {
"terms": {
"field": "activityMnemo"
},
"aggs": {
"activity_instance_id_aggs": {
"terms": {
"field": "activityInstanceId"
},
"aggs": {
"business_date_aggs": {
"terms": {
"field": "correlationIdSet.businessDate"
},
"aggs": {
"context_set_id_closing_aggs": {
"terms": {
"field": "contextSetId.closing"
},
"aggs": {
"max_date_aggs": {
"max": {
"field": "timestamp"
},
"aggs" : {
"sum_done": {
"sum": {
"field": "itemNumberDone"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
Thank you
I am not 100% sure what you would like to achieve, it helps if you also would have shared the mapping.
A bucket aggregation is about defining the buckets/groups. As you do in your example, you can wrap/nest bucket aggregations to further break down your buckets into sub-buckets and so on.
By default Elasticsearch always calculates the count-metric, but you can specify other metrics to get calculated as well. A metric is calculated per bucket / for a bucket (and not for another metric) this is why you cannot nest a metrics aggregation under a metric aggregation, it simply does not make sense.
Depending how your data looks like the only change you may need to do is, moving the sum_done aggregation out of the aggs-clause, to the same level as your max_date_aggs-aggregation.
Code Snippet
"aggs": {
"max_date_aggs": { "max": {"field": "timestamp"} },
"sum_done": { "sum": { "field": "itemNumberDone"} }
}
After you refined your question and you provided I managed to come up with a solution requiring one single request. As previously mentioned that sum-metric aggregation needs to operate on a bucket and not a metric. The solution is pretty straight forward: rather than calculating the max-date, just re-formulate this aggregation to a terms-aggregation, sorted by descending timestamp, asking for exactly one bucket.
Solution
GET gos_element/_search
{
"size": 0,
"aggs": {
"sender_comp_aggs": {
"terms": {"field": "senderComponent.keyword"},
"aggs": {
"activity_mnemo_aggs": {
"terms": {"field": "activityMnemo.keyword"},
"aggs": {
"activity_instance_id_aggs": {
"terms": {"field": "activityInstanceId.keyword"},
"aggs": {
"business_date_aggs": {
"terms": {"field": "correlationIdSet.businessDate"},
"aggs": {
"context_set_id_closing_aggs": {
"terms": {"field": "contextSetId.closing.keyword"},
"aggs": {
"max_date_bucket_aggs": {
"terms": {
"field": "timestamp",
"size": 1,
"order": {"_key": "desc"}
},
"aggs": {
"sum_done": {
"sum": {"field": "itemNumberDone"}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
As I relied on the default Elasticsearch mapping, I had to refer to the .keyword-version of the fields. If your fields are directly mapped to a field of type keyword, you don't need to do that.
You can test the request above right away after indexing the documents provided by you with the following 2 commands:
PUT gos_element/_doc/AW_yu3dIa2R_HwqpSz
{
"senderComponent": "PS",
"timestamp": "2020-01-28T02:31:00Z",
"activityMnemo": "PScommand",
"activityInstanceId": "123466",
"activityStatus": "Progress",
"activityStatusNumber": 300,
"specificActivityStatus": "",
"itemNumberTotal": 10,
"itemNumberDone": 9,
"itemNumberInError": 0,
"itemNumberNotStarted": 1,
"itemNumberInProgress": 0,
"itemUnit": "Command",
"itemList": [],
"contextSetId": {
"PV": "VAR",
"closing": "PARIS"
},
"correlationIdSet": {
"closing": "PARIS",
"businessDate": "2020-01-27",
"correlationId": "54947df8-0e9e-4471-a2f9-9af509fb5899"
},
"errorSet": [],
"kpiSet": "",
"activitySpecificPayload": "",
"messageGroupUUID": "54947df8-0e9e-4471-a2f9-9af509fb5899"
}
PUT gos_element/_doc/AW_yu3dIa2R_HwqpSz8z
{
"senderComponent": "PS",
"timestamp": "2020-01-28T03:01:00Z",
"activityMnemo": "PScommand",
"activityInstanceId": "123466",
"activityStatus": "End",
"activityStatusNumber": 200,
"specificActivityStatus": "",
"itemNumberTotal": 10,
"itemNumberDone": 10,
"itemNumberInError": 0,
"itemNumberNotStarted": 0,
"itemNumberInProgress": 0,
"itemUnit": "Command",
"itemList": [],
"contextSetId": {
"PV": "VAR",
"closing": "PARIS"
},
"correlationIdSet": {
"closing": "PARIS",
"businessDate": "2020-01-27",
"correlationId": "54947df8-0e9e-4471-a2f9-9af509fb5899"
},
"errorSet": [],
"errorMessages": "",
"kpiSet": "",
"activitySpecificPayload": "",
"messageGroupUUID": "54947df8-0e9e-4471-a2f9-9af509fb5899"
}
As a result you get back the following response (with value 10 as expected):
{
"took" : 8,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"sender_comp_aggs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "PS",
"doc_count" : 2,
"activity_mnemo_aggs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "PScommand",
"doc_count" : 2,
"activity_instance_id_aggs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "123466",
"doc_count" : 2,
"business_date_aggs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 1580083200000,
"key_as_string" : "2020-01-27T00:00:00.000Z",
"doc_count" : 2,
"context_set_id_closing_aggs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "PARIS",
"doc_count" : 2,
"max_date_bucket_aggs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 1,
"buckets" : [
{
"key" : 1580180460000,
"key_as_string" : "2020-01-28T03:01:00.000Z",
"doc_count" : 1,
"sum_done" : {
"value" : 10.0
}
}
]
}
}
]
}
}
]
}
}
]
}
}
]
}
}
]
}
}
}
Here are two documents:
{
"_type": "gos_element",
"_id": "AW_yu3dIa2R_HwqpSz-o",
"_score": 5.785128,
"_source": {
"senderComponent": "PS",
"timestamp": "2020-01-28T02:31:00Z",
"activityMnemo": "PScommand",
"activityInstanceId": "123466",
"activityStatus": "Progress",
"activityStatusNumber": 300,
"specificActivityStatus": "",
"itemNumberTotal": 10,
"itemNumberDone": 9,
"itemNumberInError": 0,
"itemNumberNotStarted": 1,
"itemNumberInProgress": 0,
"itemUnit": "Command",
"itemList": [],
"contextSetId": {
"PV": "VAR",
"closing": "PARIS"
},
"correlationIdSet": {
"closing": "PARIS",
"businessDate": "2020-01-27",
"correlationId": "54947df8-0e9e-4471-a2f9-9af509fb5899"
},
"errorSet": [],
"kpiSet": "",
"activitySpecificPayload": "",
"messageGroupUUID": "54947df8-0e9e-4471-a2f9-9af509fb5899"
}
},
{
"_type": "gos_element",
"_id": "AW_yu3dIa2R_HwqpSz8z",
"_score": 4.8696175,
"_source": {
"senderComponent": "PS",
"timestamp": "2020-01-28T03:01:00Z",
"activityMnemo": "PScommand",
"activityInstanceId": "123466",
"activityStatus": "End",
"activityStatusNumber": 200,
"specificActivityStatus": "",
"itemNumberTotal": 10,
"itemNumberDone": 10,
"itemNumberInError": 0,
"itemNumberNotStarted": 0,
"itemNumberInProgress": 0,
"itemUnit": "Command",
"itemList": [],
"contextSetId": {
"PV": "VAR",
"closing": "PARIS"
},
"correlationIdSet": {
"closing": "PARIS",
"businessDate": "2020-01-27",
"correlationId": "54947df8-0e9e-4471-a2f9-9af509fb5899"
},
"errorSet": [],
"errorMessages": "",
"kpiSet": "",
"activitySpecificPayload": "",
"messageGroupUUID": "54947df8-0e9e-4471-a2f9-9af509fb5899"
}
}
]
}
I would like to aggregate of a few terms (senderComponent, activityMnemo ,activityInstanceId, correlationIdSet.businessDate and contextSetId.closing) and also aggregate on the max timestamp of each of these aggregations. Once this is done, I would like to sum the itemNumberDone.
If we take only these two documents and do the aggregations, I would like to get 10 itemNumberDone.
Is it possible with only one query and using buckets?

Is it possible with aggregation to amalgamate all values of an array property from all grouped documents into the coalesced document?

I have documents with the format similar to the following:
[
{
"name": "fred",
"title": "engineer",
"division_id": 20
"skills": [
"walking",
"talking"
]
},
{
"name": "ed",
"title": "ticket-taker",
"division_id": 20
"skills": [
"smiling"
]
}
]
I would like to run an aggs query that would show the complete set of skills for the division: ie,
{
"aggs":{
"distinct_skills":{
"cardinality":{
"field":"division_id"
}
}
},
"_source":{
"includes":[
"division_id",
"skills"
]
}
}
.. so that the resulting hit would look like:
{
"division_id": 20,
"skills": [
"walking",
"talking",
"smiling"
]
}
I know I can retrieve inner_hits and iterate through the list and amalgamate values "manually". I assume it would perform better if I could do it a query.
Just pipe two Terms Aggregation queries as shown below:
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"my_division_ids": {
"terms": {
"field": "division_id",
"size": 10
},
"aggs": {
"my_skills": {
"terms": {
"field": "skills", <---- If it is not keyword field use `skills.keyword` field if using dynamic mapping.
"size": 10
}
}
}
}
}
}
Below is the sample response:
Response:
{
"took" : 490,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"my_division_ids" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 20, <---- division_id
"doc_count" : 2,
"my_skills" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ <---- Skills
{
"key" : "smiling",
"doc_count" : 1
},
{
"key" : "talking",
"doc_count" : 1
},
{
"key" : "walking",
"doc_count" : 1
}
]
}
}
]
}
}
}
Hope this helps!

Resources