Find min value of field in nested array object after aggregation

Find min value of field in nested array object after aggregation - elasticsearch

I would like to find the minimum value of a field in a nested array object after aggregation.
Data example:
[
{
"id": "i1",
"version": 1,
"entries": [
{
"name": "n1",
"position": 1
}, {
"name": "n2",
"position": 2
}
]
}, {
"id": "i1"
"version": 2,
"entries": [
{
"name": "n2",
"position": 3
}, {
"name": "n3",
"position": 4
}
]
},
{
"id": "i2",
"version": 1,
"entries": [
{
"name": "n1",
"position": 8
}, {
"name": "n2",
"position": 7
}
]
}, {
"id": "i2"
"version": 2,
"entries": [
{
"name": "n2",
"position": 6
}, {
"name": "n3",
"position": 5
}
]
}
]
Pseudo Query:
SELECT min(entries["n2"].position) WHERE entries.name="n2" GROUP BY id;
Expected Result:
[
{
"id": "i1",
"min(position)": 2
}, {
"id": "i2",
"min(position)": 6
}
]
I can do this in code, but it's not performant, as I need to return the document sources which can be quite large.
I am thinking of denormalizing the data, but would like to first know if this request is not possible at all.

You can do it by nesting several aggregations like this:
terms agg -> nested agg -> filter agg -> min agg
To test it I set up an index:
PUT /test_index
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"doc": {
"properties": {
"entries": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"position": {
"type": "long"
}
}
},
"id": {
"type": "string"
},
"version": {
"type": "long"
}
}
}
}
}
And indexed your docs:
PUT /test_index/doc/_bulk
{"index":{"_id":1}}
{"id":"i1","version":1,"entries":[{"name":"n1","position":1},{"name":"n2","position":2}]}
{"index":{"_id":2}}
{"id":"i1","version":2,"entries":[{"name":"n2","position":3},{"name":"n3","position":4}]}
{"index":{"_id":3}}
{"id":"i2","version":1,"entries":[{"name":"n1","position":8},{"name":"n2","position":7}]}
{"index":{"_id":4}}
{"id":"i2","version":2,"entries":[{"name":"n2","position":6},{"name":"n3","position":5}]}
Here is the query:
POST /test_index/_search?search_type=count
{
"aggs": {
"id_terms": {
"terms": {
"field": "id"
},
"aggs": {
"nested_entries": {
"nested": {
"path": "entries"
},
"aggs": {
"filter_name": {
"filter": {
"term": {
"entries.name": "n2"
}
},
"aggs": {
"min_position": {
"min": {
"field": "position"
}
}
}
}
}
}
}
}
}
}
and the result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0,
"hits": []
},
"aggregations": {
"id_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "i1",
"doc_count": 2,
"nested_entries": {
"doc_count": 4,
"filter_name": {
"doc_count": 2,
"min_position": {
"value": 2,
"value_as_string": "2.0"
}
}
}
},
{
"key": "i2",
"doc_count": 2,
"nested_entries": {
"doc_count": 4,
"filter_name": {
"doc_count": 2,
"min_position": {
"value": 6,
"value_as_string": "6.0"
}
}
}
}
]
}
}
}
Here is the code I used all together:
http://sense.qbox.io/gist/34a013099ef07fb527d9d7cf8490ad1bbafa718b

Related

Elasticsearch - Count number of occurrence perd field per document

Is it possible to calculate the number of occurence of distinct values in a list field.
For example, let the following data:
[
{
"page":1,
"colors":[
{
"color": red
},
{
"color": white
},
{
"color": red
}
]
},
{
"page":2,
"colors":[
{
"color": yellow
},
{
"color": yellow
}
]
}
]
Is it possible to get a result as the follwing:
{
"page":1,
"colors_count":[
{
"Key": red,
"Count": 2
},
{
"Key": white,
"Count": 1
},
]
},
{
"page":2,
"colors_count":[
{
"Key": yellow,
"Count": 2
}
]
}
I tried using term aggregation but I got the number of distinct values, so for page:1 i got red:1 and white:1.

Yes, you can do it. you will have to use nested_field type and nested_Agg
Mapping:
PUT colors
{
"mappings": {
"properties": {
"page" : { "type": "keyword" },
"colors": {
"type": "nested",
"properties": {
"color": {
"type": "keyword"
}
}
}
}
}
}
Insert Documents:
PUT colors/_doc/1
{
"page": 1,
"colors": [
{
"color": "red"
},
{
"color": "white"
},
{
"color": "red"
}
]
}
PUT colors/_doc/2
{
"page": 2,
"colors": [
{
"color": "yellow"
},
{
"color": "yellow"
}
]
}
Query:
GET colors/_search
{
"size" :0,
"aggs": {
"groupByPage": {
"terms": {
"field": "page"
},
"aggs": {
"colors": {
"nested": {
"path": "colors"
},
"aggs": {
"genres": {
"terms": {
"field": "colors.color"
}
}
}
}
}
}
}
}
Output:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"groupByPage": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1", // page field value
"doc_count": 1,
"colors": {
"doc_count": 3,
"genres": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "red",
"doc_count": 2
},
{
"key": "white",
"doc_count": 1
}
]
}
}
},
{
"key": "2", // page field value
"doc_count": 1,
"colors": {
"doc_count": 2,
"genres": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "yellow",
"doc_count": 2
}
]
}
}
}
]
}
}
}

Elastic Search: Aggregation sum on a particular field

I am new to elastic search and requesting some help.
Basically I have some 2 million documents in my elastic search and the documents look like below:
{
"_index": "flipkart",
"_type": "PSAD_ThirdParty",
"_id": "430001_MAM_2016-02-04",
"_version": 1,
"_score": 1,
"_source": {
"metrics": [
{
"id": "Metric1",
"value": 70
},
{
"id": "Metric2",
"value": 90
},
{
"id": "Metric3",
"value": 120
}
],
"primary": true,
"ticketId": 1,
"pliId": 206,
"bookedNumbers": 15000,
"ut": 1454567400000,
"startDate": 1451629800000,
"endDate": 1464589800000,
"tz": "EST"
}
}
I want to write an aggregation query which satisfies below conditions:
1) First query based on "_index", "_type" and "pliId".
2) Do aggregation sum on metrics.value based on metrics.id = "Metric1".
Basically I need to query records based on some fields and aggregate sum on a particular metrics value based on metrics id.
Please can you help me in getting my query right.

Your metrics field needs to be of type nested:
"metrics": {
"type": "nested",
"properties": {
"id": {
"type": "string",
"index": "not_analyzed"
}
}
}
If you want Metric1 to match, meaning upper-case letter, then as you see above the id needs to be not_analyzed.
Then, if you only want metrics.id = "Metric1" aggregations, you need something like this:
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"pliId": 206
}
}
]
}
}
}
},
"aggs": {
"by_metrics": {
"nested": {
"path": "metrics"
},
"aggs": {
"metric1_only": {
"filter": {
"bool": {
"must": [
{
"term": {
"metrics.id": {
"value": "Metric1"
}
}
}
]
}
},
"aggs": {
"by_metric_id": {
"terms": {
"field": "metrics.id"
},
"aggs": {
"total_delivery": {
"sum": {
"field": "metrics.value"
}
}
}
}
}
}
}
}
}
}

Created new index:
Method : PUT ,
URL : http://localhost:9200/google/
Body:
{
"mappings": {
"PSAD_Primary": {
"properties": {
"metrics": {
"type": "nested",
"properties": {
"id": {
"type": "string",
"index": "not_analyzed"
},
"value": {
"type": "integer",
"index": "not_analyzed"
}
}
}
}
}
}
}
Then I inserted some 200 thousand documents and than ran the query and it worked.
Response:
{
"took": 34,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "google",
"_type": "PSAD_Primary",
"_id": "383701291_MAM_2016-01-06",
"_score": 1,
"_source": {
"metrics": [
{
"id": "Metric1",
"value": 70
},
{
"id": "Metric2",
"value": 90
},
{
"id": "Metric3",
"value": 120
}
],
"primary": true,
"ticketId": 1,
"pliId": 221244,
"bookedNumbers": 15000,
"ut": 1452061800000,
"startDate": 1451629800000,
"endDate": 1464589800000,
"tz": "EST"
}
}
]
},
"aggregations": {
"by_metrics": {
"doc_count": 3,
"metric1_only": {
"doc_count": 1,
"by_metric_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Metric1",
"doc_count": 1,
"total_delivery": {
"value": 70
}
}
]
}
}
}
}
}

elasticsearch nested aggregation inside a reverse nested aggregation

Elasticsearch version: 2.3.1
JVM version: 1.8.0_66 / 25.66-b17
OS version: Mac OS X 10.11.4
I am having trouble getting the correct values to show up in a 4 level deep aggregation scenario where the first two levels are nested, the third is reverse_nested, and the fourth is nested again.
Here is my index mapping:
curl -XDELETE localhost:9200/orders-d
curl -XPUT localhost:9200/orders-d
curl -XPUT localhost:9200/orders-d/order-d/_mapping -d '{
"order-d": {
"properties": {
"id": {
"type": "string"
},
"orderNumber": {
"type": "string"
},
"groupId": {
"type": "string"
},
"groupOrderNumber": {
"type": "string"
},
"dateCreated": {
"type": "date"
},
"dateUpdated": {
"type": "date"
},
"location": {
"type": "object"
},
"orderSubmitter": {
"type": "object"
},
"distributor": {
"type": "object"
},
"salesRep": {
"type": "object"
},
"status": {
"type": "string"
},
"total": {
"type": "double"
},
"isTTOrder": {
"type": "boolean"
},
"lineItems": {
"type": "nested",
"include_in_parent": true,
"properties": {
"product": {
"type": "object"
},
"category": {
"type": "object"
},
"subCategory": {
"type": "object"
},
"quantity": {
"type": "double"
},
"unitPrice": {
"type": "double"
},
"totalPrice": {
"type": "double"
},
"pricedByUnitPrice": {
"type": "double"
}
}
}
}
}
}'
Here are the documents:
curl -XPUT localhost:9200/orders-d/order-d/0 -d '{
"id": "571652632a19085c008b4577",
"orderNumber": "1617590686",
"groupId": "571652632a19085c008b4578",
"groupOrderNumber": "3485944627",
"dateCreated": "2016-04-19",
"dateUpdated": null,
"location": {
"id": "54e53853505eb66b008b4569",
"name": "Andrews Diner"
},
"orderSubmitter": {
"id": "54e53853505eb66b008b4567",
"name": "Kostantino Plaitis"
},
"distributor": {
"id": "55c3879459ad0c63008b4569",
"name": "Performance Foodservice Metro NY"
},
"salesRep": null,
"status": "pending",
"total": 5410.21,
"isTTOrder": true,
"lineItems": [{
"product": {
"id": "55bfb445c440b26a008b4571",
"name": "Sabrett Sauerkraut 12 x 2 lb bags"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4586",
"name": "Other Sauces Dipping\/Condiments\/Savoury Toppings\/Savoury Spreads\/Marinades (Perishable)"
},
"quantity": 1,
"unitPrice": 25.24,
"totalPrice": 25.24,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55bc219238c0376e008b4570",
"name": "Franks Red Hot Cayenne Pepper Sauce 4 x 1 gallon"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4606",
"name": "Other Sauces Dipping\/Condiments\/Savoury Toppings\/Savoury Spreads\/Marinades (Shelf Stable)"
},
"quantity": 1,
"unitPrice": 45.06,
"totalPrice": 45.06,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "56d76c41bd821fda008b459a",
"name": "Cereal, Classic Variety Pack, Kelloggs 1\/60 ct."
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b462d",
"name": "Grains\/Cereal - Ready to Eat - (Shelf Stable)"
},
"quantity": 1,
"unitPrice": 56.03,
"totalPrice": 56.03,
"pricedByUnitPrice": 0
}]
}'
curl -XPUT localhost:9200/orders-d/order-d/0 -d '{
"id": "571652632a19085c008b4576",
"orderNumber": "2041063294",
"groupId": "571652632a19085c008b4578",
"groupOrderNumber": "3485944627",
"dateCreated": "2016-04-19",
"dateUpdated": null,
"location": {
"id": "54e53853505eb66b008b4569",
"name": "Andrews Diner"
},
"orderSubmitter": {
"id": "54e53853505eb66b008b4567",
"name": "Kostantino Plaitis"
},
"distributor": {
"id": "55cdeece0a41216c008b4583",
"name": "Driscoll Foods"
},
"salesRep": null,
"status": "pending",
"total": 7575.27,
"isTTOrder": true,
"lineItems": [{
"product": {
"id": "55ad05e08d28c36b008b456c",
"name": "Pepper 3000 pcs"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4582",
"name": "Herbs\/Spices (Shelf Stable)"
},
"quantity": 3,
"unitPrice": 8.95,
"totalPrice": 26.85,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55b3a12f6b415c68008b4568",
"name": "Venice Maid Deluxe Corned Beef Hash 6 x 6 lb 10 oz"
},
"category": {
"id": "53df846c3b8e77710e7b23f7",
"name": "Meat"
},
"subCategory": {
"id": "54d8c56a279871b9078b4581",
"name": "Beef - Prepared\/Processed"
},
"quantity": 1,
"unitPrice": 59.75,
"totalPrice": 59.75,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55b145798c26dc69008b4568",
"name": "Aladdin Bakers Sesame Bread Sticks 150 x 2 packs"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b45b0",
"name": "Dried Breads (Shelf Stable)"
},
"quantity": 8,
"unitPrice": 15.5,
"totalPrice": 124,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55ad074a8d28c36f008b456d",
"name": "Smuckers Breakfast Syrup 100 cups"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b457d",
"name": "Syrup\/Treacle\/Molasses (Shelf Stable)"
},
"quantity": 10,
"unitPrice": 8.95,
"totalPrice": 89.5,
"pricedByUnitPrice": 0
}]
}'
Here is my query:
curl -XPOST localhost:9200/orders-d/_search -d '{
"from": 0,
"size": 0,
"aggregations": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"terms": {
"field": "lineItems.category.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"terms": {
"field": "lineItems.product.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"terms": {
"field": "distributor.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"sum": {
"field": "lineItems.totalPrice"
}
}
},
"nested": {
"path": "lineItems"
}
}
}
}
},
"reverse_nested": {}
}
}
}
}
}
},
"nested": {
"path": "lineItems"
}
}
},
"query": {
"bool": {
"must": [{
"range": {
"dateCreated": {
"format": "yyyy-MM-dd",
"gte": "2016-01-01",
"lte": "2016-04-30"
}
}
}]
}
}
}'
...and here are my results:
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "53df845b3b8e77710e7b23ec",
"doc_count": 3,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55ad05e08d28c36b008b456c",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}, {
"key": "55ad074a8d28c36f008b456d",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}, {
"key": "55b145798c26dc69008b4568",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}]
}
}, {
"key": "53df846c3b8e77710e7b23f7",
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55b3a12f6b415c68008b4568",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}]
}
}]
}
}
}
}
As you can see from the results, all the aggregated values for each drilldown of totalLineItems have the same exact value. This is obviously incorrect.
Did I do something wrong, is it a bug, or is nesting inside a reverse nesting unsupported?

elasticsearch terms and sum aggregation

I have documents in elasticsearch (1.5) that looks like:
{
"gender": [
{
"name": "unknown",
"value": 12
},
{
"name": "male",
"value": 89
},
{
"name": "female",
"value": 84
}
]
}
not all of the documents contains the three options (male/female/unknown)
i would like to get the sum of all values per each gender name. like that:
{
"buckets": [
{
"key": "unknown",
"doc_count": 112,
"gender_a": {
"value": 462
}
},
{
"key": "male",
"doc_count": 107,
"gender_a": {
"value": 438
}
},
{
"key": "female",
"doc_count": 36,
"gender_a": {
"value": 186
}
}
]
}
i tried this query:
{
"aggs": {
"gender_name": {
"terms": {
"field": "gender.name"
},
"aggs": {
"gender_sum": {
"sum": {
"field": "gender.value"
}
}
}
}
}
}
but something weird is going on, and i don't get the right values.
any idea what i am missing ?

You will probably need to make sure that your "gender" property has type "nested". With that, I was able to make the following do what I think you're asking.
First I set up a simple index:
PUT /test_index
{
"mappings": {
"doc": {
"properties": {
"gender": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"value": {
"type": "long"
}
}
}
}
}
}
}
Then added a couple of docs:
PUT /test_index/doc/1
{
"gender": [
{
"name": "unknown",
"value": 12
},
{
"name": "male",
"value": 89
},
{
"name": "female",
"value": 84
}
]
}
PUT /test_index/doc/2
{
"gender": [
{
"name": "male",
"value": 8
},
{
"name": "female",
"value": 4
}
]
}
Then I was able to get total counts by gender name as follows:
POST /test_index/_search?search_type=count
{
"aggs": {
"genders": {
"nested": {
"path": "gender"
},
"aggs": {
"gender_terms": {
"terms": {
"field": "gender.name"
},
"aggs": {
"gender_name_value_sums": {
"sum": {
"field": "gender.value"
}
}
}
}
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"genders": {
"doc_count": 5,
"gender_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 2,
"gender_name_value_sums": {
"value": 88,
"value_as_string": "88.0"
}
},
{
"key": "male",
"doc_count": 2,
"gender_name_value_sums": {
"value": 97,
"value_as_string": "97.0"
}
},
{
"key": "unknown",
"doc_count": 1,
"gender_name_value_sums": {
"value": 12,
"value_as_string": "12.0"
}
}
]
}
}
}
}
Here is the code I used to test it:
http://sense.qbox.io/gist/d4533215806b858aa2cc1565546d167fdec3c973

Elasticsearch: generating terms from array using script

Would love an explanation of why this happens and how to correct it.
Here's a snippet of the source document:
{
"created_time":1412988495000,
"tags":{
"items":[
{
"tag_type":"Placement",
"tag_id":"id1"
},
{
"tag_type":"Product",
"tag_id":"id2"
}
]
}
}
The following terms aggregation:
"aggs":{
"tags":{
"terms":{
"script":"doc['tags'].value != null ? doc['tags.items.tag_type'].value + ':' + doc['tags.items.tag_id'].value : ''",
"size":2000,
"exclude":{
"pattern":"null:null"
}
}
}
}
returns:
"buckets":[
{
"key":"Placement:id1",
"doc_count":1
},
{
"key":"Placement:id2",
"doc_count":1
}
]
...when you would expect:
"buckets":[
{
"key":"Placement:id1",
"doc_count":1
},
{
"key":"Product:id2",
"doc_count":1
}
]

I would probably go with a nested type. I don't know all the details of your setup, but here is a proof of concept, at least. I took out the "items" property because I didn't need that many layers, and just used "tags" as the nested type. It could be added back in if needed, I think.
So I set up an index with a "nested" property:
DELETE /test_index
PUT /test_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"doc": {
"properties": {
"created_time": {
"type": "date"
},
"tags": {
"type": "nested",
"properties": {
"tag_type": {
"type": "string",
"index": "not_analyzed"
},
"tag_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
Then added a couple of docs (notice that the structure differs slightly from yours):
PUT /test_index/doc/1
{
"created_time": 1412988495000,
"tags": [
{
"tag_type": "Placement",
"tag_id": "id1"
},
{
"tag_type": "Product",
"tag_id": "id2"
}
]
}
PUT /test_index/doc/2
{
"created_time": 1412988475000,
"tags": [
{
"tag_type": "Type3",
"tag_id": "id3"
},
{
"tag_type": "Type4",
"tag_id": "id3"
}
]
}
Now a scripted terms aggregation inside a nested aggregation seems to do the trick:
POST /test_index/_search?search_type=count
{
"query": {
"match_all": {}
},
"aggs": {
"tags": {
"nested": { "path": "tags" },
"aggs":{
"tag_vals": {
"terms": {
"script": "doc['tag_type'].value+':'+doc['tag_id'].value"
}
}
}
}
}
}
...
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"tags": {
"doc_count": 4,
"tag_vals": {
"buckets": [
{
"key": "Placement:id1",
"doc_count": 1
},
{
"key": "Product:id2",
"doc_count": 1
},
{
"key": "Type3:id3",
"doc_count": 1
},
{
"key": "Type4:id3",
"doc_count": 1
}
]
}
}
}
}
Here is the code I used:
http://sense.qbox.io/gist/4ceaf8693f85ff257c2fd0639ba62295f2e5e8c5

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Find min value of field in nested array object after aggregation - elasticsearch

Related

Elasticsearch - Count number of occurrence perd field per document

Elastic Search: Aggregation sum on a particular field

elasticsearch nested aggregation inside a reverse nested aggregation

elasticsearch terms and sum aggregation

Elasticsearch: generating terms from array using script

Categories

Resources