Aggregation and Sorting in Elastic Search - sorting

I want to sort the Aggregated results of my query in elastic search
Equivalent SQL query:- select col1, col2, sum(col3) from table group by col1,col2 order by sum(col3) desc;
I tried with the below query, it returns the result but not in sorting order which i am expecting
{
"from": 0,
"size": 0,
"_source": {
"includes": [
"col1",
"col2",
"SUM"
],
"excludes": []
},
"stored_fields": [
"col1",
"col2"
],
"aggregations": {
"col1": {
"terms": {
"field": "col1",
"size": 200,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false
},
"aggregations": {
"col2": {
"terms": {
"field": "col2",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false
},
"aggregations": {
"SUM_0": {
"sum": {
"field": "col3"
}
},
"col3_bucket_sort": {
"bucket_sort": {
"sort": [
{ "SUM_0": { "order": "desc" } }
],
"size": 3
}
}
}
}
}
}
}
}
Sampled Index data
{
"_index": "my_index",
"_type": "products",
"_id": "OJfBSXUB0GzAt2o_zVdS",
"_score": 1.0,
"_source": {
"product_name": "car",
"product_type": "retail",
"qty": 5
}
}
{
"_index": "my_index",
"_type": "report",
"_id": "OpfBSXUB0GzAt2o_zVfG1",
"_score": 1.0,
"_source": {
"product_name": "bike",
"product_type": "retail",
"qty": 5
}
},
{
"_index": "my_index",
"_type": "report",
"_id": "OpfBSXUB0GzAt2o_zVfG",
"_score": 1.0,
"_source": {
"product_name": "car",
"product_type": "retail",
"qty": 3
}
},
{
"_index": "my_index",
"_type": "report",
"_id": "OpfBSXUB0GzAt2o_zVfG2",
"_score": 1.0,
"_source": {
"product_name": "bike",
"product_type": "retail",
"qty": 1
}
}
expected output :- Want to aggregate(group by) my documents on the basis of fields product_name and product_type and sorted by sum(qty)
Equivalent SQl Query:- select product_name, product_type, sum(qty) from product_table group by product_name, product_type order by sum(qty) desc;
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"product_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "car",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value":8
}
}
]
}
},
{
"key": "bike",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value": 6
}
}
]
}
}
]
}
}
}
but i am getting below output i.e aggreating the docs successfully but sorting is not working on sum(qty)
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"product_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "bike",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value": 6
}
}
]
}
},
{
"key": "car",
"doc_count": 2,
"product_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": retail,
"doc_count": 2,
"SUM_0": {
"value":8
}
}
]
}
}
]
}
}
}

Since you are grouping your data by col1,col2 (i.e using two terms aggregation), therefore when you are trying to sort the result on the basis of sum aggregation using bucket sort aggregation the results are not coming appropriate.
You need to use max bucket aggregation that is a sibling pipeline aggregation which identifies the bucket(s) with the maximum value of a specified metric in a sibling aggregation and outputs both the value and the key(s) of the bucket(s).
And then you should perform bucket sort aggregation, on the aggregated result.
Adding a working example with index data(used same as that in the question), search query, and search result.
Search Query:
{
"size": 0,
"aggs": {
"agg1": {
"terms": {
"field": "product_name.keyword"
},
"aggs": {
"agg2": {
"terms": {
"field": "product_type.keyword"
},
"aggregations": {
"SUM_0": {
"sum": {
"field": "qty"
}
}
}
},
"sum_max_bucket": {
"max_bucket": {
"buckets_path": "agg2>SUM_0" <-- note this
}
},
"sum_bucket_sort": {
"bucket_sort": {
"sort": {
"sum_max_bucket": {
"order": "desc"
}
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"agg1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "car",
"doc_count": 2,
"agg2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "retail",
"doc_count": 2,
"SUM_0": {
"value": 8.0 <-- note this
}
}
]
},
"sum_max_bucket": {
"value": 8.0,
"keys": [
"retail"
]
}
},
{
"key": "bike",
"doc_count": 2,
"agg2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "retail",
"doc_count": 2,
"SUM_0": {
"value": 6.0 <-- note this
}
}
]
},
"sum_max_bucket": {
"value": 6.0,
"keys": [
"retail"
]
}
}
]
}

Referring to an excerpt from ES-Docs
The bucket_sort aggregation, like all pipeline aggregations, is
executed after all other non-pipeline aggregations. This means the
sorting only applies to whatever buckets are already returned from the
parent aggregation. For example, if the parent aggregation is terms
and its size is set to 10, the bucket_sort will only sort over those
10 returned term buckets.
Above is the reason why your query is not giving correct result.

Related

How to get buckets in nested aggregation even if there is no matched data

I requested like this ( I twigged just some terms for you to understand ) :
{
"size": 0,
"aggs": {
"byMonth": {
"date_histogram": {
"field": "date_time",
"order": {
"_key": "desc"
},
"interval": "month",
"format": "yyyy-MM",
"extended_bounds": {
"max": "2022-02",
"min": "2022-01"
}
},
"aggs": {
"byTest": {
"terms": {
"field": "test_cate_m",
"size": 100,
"order": {
"_count": "desc"
}
}
}
}
}
}
}
and response is :
{ -
"took": 15,
"timed_out": false,
"_shards": { -
"total": 183,
"successful": 183,
"skipped": 0,
"failed": 0
},
"hits": { -
"total": { -
"value": 10000,
"relation": "gte"
},
"max_score": null,
"hits": [ -
]
},
"aggregations": { -
"byMonth": { -
"buckets": [ -
{ -
"key_as_string": "2022-02",
"key": 1643673600000,
"doc_count": 600,
"byTest": { -
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [ -
{ -
"key": "test1",
"doc_count": 100
},
{ -
"key": "test2",
"doc_count": 200
},
{ -
"key": "test3",
"doc_count": 300
}
]
}
},
{ -
"key_as_string": "2022-01",
"key": 1640995200000,
"doc_count": 100,
"byTest": { -
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [ -
{ -
"key": "test3",
"doc_count": 100
}
]
}
}
]
}
}
}
in the nested buckets, there are no 'test1' , 'test2'. I'd like to get 'test1' and 'test2' in the buckets for comparison with both, even if there is no data.
and if i can, can i calculate with those both result within the query? like, i'd like to compare the each of key's doc_count in one query, not only just get the data. Can i do this?
If you help me out, it'll be a huge help :)

Elasticsearch top_hits aggregation result and doc_count are different

Query
GET /someindex/_search
{
"size": 0,
"query": {
"ids": {
"types": [],
"values": ["08a2","08a3","03a2","03a3","84a1"]
}
},
"aggregations": {
"498": {
"terms": {
"field": "holderInfo.raw",
"size": 50
},
"aggregations": {
"tops": {
"top_hits": {
"_source": {
"includes": ["uid"]
}
}
}
}
}
}
}
Result
{
...
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"498": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "MATSUSHITA ELECTRIC INDUSTRIAL",
"doc_count": 5,
"tops": {
"hits": {
"total": 5,
"max_score": 1,
"hits": [
{
"_index": "someindex",
"_id": "03a3",
"_score": 1,
"_source": {
"uid": "03a3"
}
},
{
"_index": "someindex",
"_id": "08a2",
"_score": 1,
"_source": {
"uid": "08a2"
}
},
{
"_index": "someindex",
"_id": "84a1",
"_score": 1,
"_source": {
"uid": "84a1"
}
}
]
}
}
}
]
}
}
}
"08a2", "08a3", "03a2", "03a3" and "84a1" each clearly have 'MATSUSHITA ELECTRIC INDUSTRIAL' in the holderInfo.raw field.
Therefore, there are 5 cases in the doc_count, but only "03a3", "08a2", and "84a1" are output in the top_hits results, and "08a3" and "03a2" are omitted.
Query
GET /someindex/_search
{
"size": 0,
"query": {
"ids": {
"types": [],
"values": ["08a2","08a3","03a2","03a3","84a1"]
}
},
"aggregations": {
"498": {
"terms": {
"script": {
"inline": "doc['holderInfo.raw'].value"
},
"size": 50
}
}
}
}
Result
{
...
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"498": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "MATSUSHITA ELECTRIC INDUSTRIAL",
"doc_count": 3
}
]
}
}
}
In addition, two cases are omitted when aggregating with script.
I'd like to know why some uids are missing.
I'm in a situation where I have to use the elasticsearch version 2.2. I want to know if it's an elasticsearch bug that occurs in an old version or a user's fault.
Thanks!
By default, the top_hits aggregation returns the first 3 top hits. You just need to increase the size parameter:
GET /someindex/_search
{
"size": 0,
"query": {
"ids": {
"types": [],
"values": ["08a2","08a3","03a2","03a3","84a1"]
}
},
"aggregations": {
"498": {
"terms": {
"field": "holderInfo.raw",
"size": 50
},
"aggregations": {
"tops": {
"top_hits": {
"size": 5, <---- add this
"_source": {
"includes": ["uid"]
}
}
}
}
}
}
}

Filtering documents after aggregation

In Elasticsearch, I am storing item state snapshots in an append-only scheme.
For example:
POST /item/item
{
"id": "1",
"time": "2018-09-19T00:00:00Z",
status": "ON_HOLD"
}
POST /item/item
{
"id": "2",
"time": "2018-09-19T00:01:00Z",
"status": "ON_HOLD"
}
POST /item/item
{
"id": "2",
"time": "2018-09-19T00:02:00Z",
"status": "DONE"
}
Now, what I wish to achieve is answer the following question: what items are still on hold? (status==ON_HOLD).
In this simple case, the answer would be:
{
"id": "1",
"time": "2018-09-19T00:00:00Z",
status": "ON_HOLD"
}
So, in order to get the last state of an item, I use a terms aggregation, on id, like so:
GET /item/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"id": {
"terms": {
"field": "id.keyword",
"size": 10
},
"aggs": {
"top_items": {
"top_hits": {
"size": 1,
"sort": [
{
"time": {
"order": "desc"
}
}
],
"_source": {
"includes": ["*"]
}
}
}
}
}
}
}
This gives me the last available state of each item identified by its id:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "2",
"doc_count": 2,
"top_items": {
"hits": {
"total": 2,
"max_score": null,
"hits": [
{
"_index": "item",
"_type": "item",
"_id": "S-5eCGYBNyILygyml2jR",
"_score": null,
"_source": {
"id": "2",
"time": "2018-09-19T00:02:00Z",
"status": "DONE"
},
"sort": [
1537315320000
]
}
]
}
}
},
{
"key": "1",
"doc_count": 1,
"top_items": {
"hits": {
"total": 1,
"max_score": null,
"hits": [
{
"_index": "item",
"_type": "item",
"_id": "Se5eCGYBNyILygymjmg0",
"_score": null,
"_source": {
"id": "1",
"time": "2018-09-19T00:00:00Z",
"status": "ON_HOLD"
},
"sort": [
1537315200000
]
}
]
}
}
}
]
}
}
}
Now the problem is I would like to filter the result (after aggregation) on Elasticsearch's side (not client).
I tried a bucket_selector aggregation but it complains since the top_hits result is not a number or single value numeric aggregation.
I also tried to add a script_field to get a numeric value but cannot seem to use this after:
"script_fields": {
"on_hold": {
"script": {
"lang": "painless",
"source": "doc['status.keyword'].value == 'ON_HOLD' ? 1 : 0"
}
}
}
Is what I want to do even possible on Elasticsearch's side or do I have to do it on the client side?
PS: adding the filter before the aggregation does not provide correct result as it will return items who have been ON_HOLD at any point in time.
EDIT:
Alright I am getting somewhere with:
GET /item/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"id": {
"terms": {
"field": "id.keyword",
"size": 50
},
"aggs": {
"top_item": {
"terms": {
"size": 1,
"field": "time",
"order": {
"_key": "desc"
}
},
"aggs": {
"on_hold": {
"filter": {
"term": {
"status.keyword": "ON_HOLD"
}
},
"aggs": {
"document": {
"top_hits": {
"size": 1,
"_source": ["*"]
}
}
}
}
}
}
}
}
}
}
The top_hits aggregation is a metrics and not a bucket aggregation, so it does not do the job and must be used last.
One last problem though: filtered out buckets leave empty leaves:
"hits": []
Is there any way to remove such branches ending in empty leaves from the result tree? Thanks
Alright, I found the complete solution to the problem, including filtering out empty branches in the aggregation tree:
GET /item/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"id": {
"terms": {
"field": "id.keyword",
"size": 50
},
"aggs": {
"top_item": {
"terms": {
"size": 1,
"field": "time",
"order": {
"_key": "desc"
}
},
"aggs": {
"on_hold": {
"filter": {
"term": {
"status.keyword": "ON_HOLD"
}
},
"aggs": {
"document": {
"top_hits": {
"size": 1,
"_source": ["*"]
}
}
}
},
"remove_filtered": {
"bucket_selector": {
"buckets_path": {
"count": "on_hold._count"
},
"script": {
"source": "params.count != 0"
}
}
}
}
},
"remove_empty": {
"bucket_selector": {
"buckets_path": {
"count": "top_item._bucket_count"
},
"script": "params.count != 0"
}
}
}
}
}
}
This gives the following output which was expected:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1",
"doc_count": 1,
"top_item": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1537315200000,
"key_as_string": "2018-09-19T00:00:00.000Z",
"doc_count": 1,
"on_hold": {
"doc_count": 1,
"document": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "item",
"_type": "item",
"_id": "HvywM2YB5Ei0wOZMeia9",
"_score": 1,
"_source": {
"id": "1",
"time": "2018-09-19T00:00:00Z",
"status": "ON_HOLD"
}
}
]
}
}
}
}
]
}
}
]
}
}
}

query for elasticsearch returning count

I am struggling to create the query/rule that will help me create an alerting script. I want to query the elasticsearch API for counts on a specific index so that I can get alerted when the count reaches a certain threshold.
The following query is an attempt as I have no experience with this:
{
"query": {
"filtered": {
"query": {
"query_string": {
"analyze_wildcard": true,
"query": "*"
}
},
"filter": {
"bool": {
"must": [
{
"query": {
"match": {
"PStream": {
"query": "*",
"type": "phrase"
}
}
}
},
{
"range": {
"#timestamp": {
"gte": 1447789445320,
"lte": 1447793045320
}
}
}
],
"must_not": []
}
}
}
},
"highlight": {
"pre_tags": [
"#kibana-highlighted-field#"
],
"post_tags": [
"#/kibana-highlighted-field#"
],
"fields": {
"*": {}
},
"fragment_size": 2147483647
},
"size": 500,
"sort": [
{
"#timestamp": {
"order": "desc",
"unmapped_type": "boolean"
}
}
],
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"interval": "1m",
"pre_zone": "-05:00",
"pre_zone_adjust_large_interval": true,
"min_doc_count": 0,
"extended_bounds": {
"min": 1447789445317,
"max": 1447793045317
}
}
}
},
The field PStream is the field that I am focused on
EDIT:
An example of the data going to the index:
{
"_index": "logstash-2015.11.17",
"_type": "logs",
"_id": "AVEXMKu2YVnF1NOjr9YT",
"_score": null,
"_source": {
"authorUrl": "",
"postUrl": "",
"pubDate": "2015-11-17T15:18:24",
"scrapeDate": "2015-11-17T15:44:03",
"clientId": "136902834",
"query": "Jenny Balatsinou",
"PType": "post",
"tLatency": 1539,
"PLang": "en",
"PStream": "864321",
"PName": "xStackOverflow",
"#version": "1",
"#timestamp": "2015-11-17T20:44:03.400Z"
},
"fields": {
"#timestamp": [
1447793043400
],
"pubDate": [
1447773504000
],
"scrapeDate": [
1447775043000
]
},
"sort": [
1447793043400
]
there are about 20 million of these messages getting indexed daily into Elasticsearch. I have created a dashboard in Kibana where I view this data and stats. I would like to write the proper query that I can use in a java program that periodically runs and checks this index using this query. It should return the hourly total count grouped by the PStream variable which has multiple values. So anytime the value is 0 it will send an alert.
Eg. Output:
"result": {
"total": 74,
"successful": 63,
"failed": 11,
{
{
"index": "logstash-2015.11.08",
"PStream": "37647338933",
"Count": 1234532
},
{
"index": "logstash-2015.11.08",
"PStream": "45345343566",
"Count": 156532
},
As a quick example (per comments above), I just set up a trivial index:
DELETE /test_index
PUT /test_index
added some (simplified) data:
PUT /test_index/doc/_bulk
{"index":{"_id":1}}
{"PStream": "864321","#timestamp": "2015-11-17T20:44:03.400Z"}
{"index":{"_id":2}}
{"PStream": "864321","#timestamp": "2015-11-17T21:44:03.400Z"}
{"index":{"_id":3}}
{"PStream": "864321","#timestamp": "2015-11-17T20:44:03.400Z"}
{"index":{"_id":4}}
{"PStream": "864322","#timestamp": "2015-11-17T21:44:03.400Z"}
And now I can get the "PStream" terms inside an hour histogram:
POST /test_index/_search
{
"size": 0,
"aggs" : {
"timestamp_histogram" : {
"date_histogram" : {
"field" : "#timestamp",
"interval" : "hour"
},
"aggs": {
"pstream_terms": {
"terms": {
"field": "PStream"
}
}
}
}
}
}
...
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0,
"hits": []
},
"aggregations": {
"timestamp_histogram": {
"buckets": [
{
"key_as_string": "2015-11-17T20:00:00.000Z",
"key": 1447790400000,
"doc_count": 2,
"pstream_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "864321",
"doc_count": 2
}
]
}
},
{
"key_as_string": "2015-11-17T21:00:00.000Z",
"key": 1447794000000,
"doc_count": 2,
"pstream_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "864321",
"doc_count": 1
},
{
"key": "864322",
"doc_count": 1
}
]
}
}
]
}
}
}
or the other way around:
POST /test_index/_search
{
"size": 0,
"aggs": {
"pstream_terms": {
"terms": {
"field": "PStream"
},
"aggs": {
"timestamp_histogram": {
"date_histogram": {
"field": "#timestamp",
"interval": "hour"
}
}
}
}
}
}
...
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0,
"hits": []
},
"aggregations": {
"pstream_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "864321",
"doc_count": 3,
"timestamp_histogram": {
"buckets": [
{
"key_as_string": "2015-11-17T20:00:00.000Z",
"key": 1447790400000,
"doc_count": 2
},
{
"key_as_string": "2015-11-17T21:00:00.000Z",
"key": 1447794000000,
"doc_count": 1
}
]
}
},
{
"key": "864322",
"doc_count": 1,
"timestamp_histogram": {
"buckets": [
{
"key_as_string": "2015-11-17T21:00:00.000Z",
"key": 1447794000000,
"doc_count": 1
}
]
}
}
]
}
}
}
Here's the code I used:
http://sense.qbox.io/gist/6c0c30db1cf0fb8529bcfec21c0ce5c02a5ae94c

Elasticsearch include field in result set of aggregation

How can field of type string be included in the result set of an aggregation?
For example given the following mapping:
{
"sport": {
"mappings": {
"runners": {
"properties": {
"name": {
"type": "string"
},
"city": {
"type": "string"
},
"region": {
"type": "string"
},
"sport": {
"type": "string"
}
}
}
}
}
}
Sample data:
curl -XPOST "http://localhost:9200/sport/_bulk" -d'
{"index":{"_index":"sport","_type":"runner"}}
{"name":"Gary", "city":"New York","region":"A","sport":"Soccer"}
{"index":{"_index":"sport","_type":"runner"}}
{"name":"Bob", "city":"New York","region":"A","sport":"Tennis"}
{"index":{"_index":"sport","_type":"runner"}}
{"name":"Mike", "city":"Atlanta","region":"B","sport":"Soccer"}
'
How can the field name be included in result set of the aggregation:
{
"size": 0,
"aggregations": {
"agg": {
"terms": {
"field": "city"}
}
}
}
This seems to do what you want, if I'm understanding you correctly:
POST /sport/_search
{
"size": 0,
"aggregations": {
"city_terms": {
"terms": {
"field": "city"
},
"aggs": {
"name_terms": {
"terms": {
"field": "name"
}
}
}
}
}
}
With the data you provided, it returns:
{
"took": 43,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"city_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "new",
"doc_count": 2,
"name_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "bob",
"doc_count": 1
},
{
"key": "gary",
"doc_count": 1
}
]
}
},
{
"key": "york",
"doc_count": 2,
"name_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "bob",
"doc_count": 1
},
{
"key": "gary",
"doc_count": 1
}
]
}
},
{
"key": "atlanta",
"doc_count": 1,
"name_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "mike",
"doc_count": 1
}
]
}
}
]
}
}
}
(You may want to add "index":"not_analyzed" to one or both fields in your mapping, if these results are not what you were expecting.)
Here's the code I used to test it:
http://sense.qbox.io/gist/07735aadc082c1c60409931c279f3fd85a340dbb

Resources