elasticsearch nested aggregation inside a reverse nested aggregation - elasticsearch

Elasticsearch version: 2.3.1
JVM version: 1.8.0_66 / 25.66-b17
OS version: Mac OS X 10.11.4
I am having trouble getting the correct values to show up in a 4 level deep aggregation scenario where the first two levels are nested, the third is reverse_nested, and the fourth is nested again.
Here is my index mapping:
curl -XDELETE localhost:9200/orders-d
curl -XPUT localhost:9200/orders-d
curl -XPUT localhost:9200/orders-d/order-d/_mapping -d '{
"order-d": {
"properties": {
"id": {
"type": "string"
},
"orderNumber": {
"type": "string"
},
"groupId": {
"type": "string"
},
"groupOrderNumber": {
"type": "string"
},
"dateCreated": {
"type": "date"
},
"dateUpdated": {
"type": "date"
},
"location": {
"type": "object"
},
"orderSubmitter": {
"type": "object"
},
"distributor": {
"type": "object"
},
"salesRep": {
"type": "object"
},
"status": {
"type": "string"
},
"total": {
"type": "double"
},
"isTTOrder": {
"type": "boolean"
},
"lineItems": {
"type": "nested",
"include_in_parent": true,
"properties": {
"product": {
"type": "object"
},
"category": {
"type": "object"
},
"subCategory": {
"type": "object"
},
"quantity": {
"type": "double"
},
"unitPrice": {
"type": "double"
},
"totalPrice": {
"type": "double"
},
"pricedByUnitPrice": {
"type": "double"
}
}
}
}
}
}'
Here are the documents:
curl -XPUT localhost:9200/orders-d/order-d/0 -d '{
"id": "571652632a19085c008b4577",
"orderNumber": "1617590686",
"groupId": "571652632a19085c008b4578",
"groupOrderNumber": "3485944627",
"dateCreated": "2016-04-19",
"dateUpdated": null,
"location": {
"id": "54e53853505eb66b008b4569",
"name": "Andrews Diner"
},
"orderSubmitter": {
"id": "54e53853505eb66b008b4567",
"name": "Kostantino Plaitis"
},
"distributor": {
"id": "55c3879459ad0c63008b4569",
"name": "Performance Foodservice Metro NY"
},
"salesRep": null,
"status": "pending",
"total": 5410.21,
"isTTOrder": true,
"lineItems": [{
"product": {
"id": "55bfb445c440b26a008b4571",
"name": "Sabrett Sauerkraut 12 x 2 lb bags"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4586",
"name": "Other Sauces Dipping\/Condiments\/Savoury Toppings\/Savoury Spreads\/Marinades (Perishable)"
},
"quantity": 1,
"unitPrice": 25.24,
"totalPrice": 25.24,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55bc219238c0376e008b4570",
"name": "Franks Red Hot Cayenne Pepper Sauce 4 x 1 gallon"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4606",
"name": "Other Sauces Dipping\/Condiments\/Savoury Toppings\/Savoury Spreads\/Marinades (Shelf Stable)"
},
"quantity": 1,
"unitPrice": 45.06,
"totalPrice": 45.06,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "56d76c41bd821fda008b459a",
"name": "Cereal, Classic Variety Pack, Kelloggs 1\/60 ct."
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b462d",
"name": "Grains\/Cereal - Ready to Eat - (Shelf Stable)"
},
"quantity": 1,
"unitPrice": 56.03,
"totalPrice": 56.03,
"pricedByUnitPrice": 0
}]
}'
curl -XPUT localhost:9200/orders-d/order-d/0 -d '{
"id": "571652632a19085c008b4576",
"orderNumber": "2041063294",
"groupId": "571652632a19085c008b4578",
"groupOrderNumber": "3485944627",
"dateCreated": "2016-04-19",
"dateUpdated": null,
"location": {
"id": "54e53853505eb66b008b4569",
"name": "Andrews Diner"
},
"orderSubmitter": {
"id": "54e53853505eb66b008b4567",
"name": "Kostantino Plaitis"
},
"distributor": {
"id": "55cdeece0a41216c008b4583",
"name": "Driscoll Foods"
},
"salesRep": null,
"status": "pending",
"total": 7575.27,
"isTTOrder": true,
"lineItems": [{
"product": {
"id": "55ad05e08d28c36b008b456c",
"name": "Pepper 3000 pcs"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4582",
"name": "Herbs\/Spices (Shelf Stable)"
},
"quantity": 3,
"unitPrice": 8.95,
"totalPrice": 26.85,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55b3a12f6b415c68008b4568",
"name": "Venice Maid Deluxe Corned Beef Hash 6 x 6 lb 10 oz"
},
"category": {
"id": "53df846c3b8e77710e7b23f7",
"name": "Meat"
},
"subCategory": {
"id": "54d8c56a279871b9078b4581",
"name": "Beef - Prepared\/Processed"
},
"quantity": 1,
"unitPrice": 59.75,
"totalPrice": 59.75,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55b145798c26dc69008b4568",
"name": "Aladdin Bakers Sesame Bread Sticks 150 x 2 packs"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b45b0",
"name": "Dried Breads (Shelf Stable)"
},
"quantity": 8,
"unitPrice": 15.5,
"totalPrice": 124,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55ad074a8d28c36f008b456d",
"name": "Smuckers Breakfast Syrup 100 cups"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b457d",
"name": "Syrup\/Treacle\/Molasses (Shelf Stable)"
},
"quantity": 10,
"unitPrice": 8.95,
"totalPrice": 89.5,
"pricedByUnitPrice": 0
}]
}'
Here is my query:
curl -XPOST localhost:9200/orders-d/_search -d '{
"from": 0,
"size": 0,
"aggregations": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"terms": {
"field": "lineItems.category.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"terms": {
"field": "lineItems.product.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"terms": {
"field": "distributor.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"sum": {
"field": "lineItems.totalPrice"
}
}
},
"nested": {
"path": "lineItems"
}
}
}
}
},
"reverse_nested": {}
}
}
}
}
}
},
"nested": {
"path": "lineItems"
}
}
},
"query": {
"bool": {
"must": [{
"range": {
"dateCreated": {
"format": "yyyy-MM-dd",
"gte": "2016-01-01",
"lte": "2016-04-30"
}
}
}]
}
}
}'
...and here are my results:
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "53df845b3b8e77710e7b23ec",
"doc_count": 3,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55ad05e08d28c36b008b456c",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}, {
"key": "55ad074a8d28c36f008b456d",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}, {
"key": "55b145798c26dc69008b4568",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}]
}
}, {
"key": "53df846c3b8e77710e7b23f7",
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55b3a12f6b415c68008b4568",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}]
}
}]
}
}
}
}
As you can see from the results, all the aggregated values for each drilldown of totalLineItems have the same exact value. This is obviously incorrect.
Did I do something wrong, is it a bug, or is nesting inside a reverse nesting unsupported?

Related

Elastic search terms aggregation for getting filter options

im trying to implement product searching and want to get search results along with filters to filter from. i have managed to get the filter keys reference, but also want values of those keys
my product body is
{
...product,
"attributes": [
{
"name": "Color",
"value": "Aqua Blue"
},
{
"name": "Gender",
"value": "Female"
},
{
"name": "Occasion",
"value": "Active Wear"
},
{
"name": "Size",
"value": "0"
}
],
}
and im using the this query in es
GET product/_search
{
"aggs": {
"filters": {
"terms": {
"field": "attributes.name"
},
"aggs": {
"values": {
"terms": {
"field": "attributes.value",
"size": 10
}
}
}
}
}
}
Not sure why, but im getting all values for each key
"aggregations": {
"filters": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Color",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Gender",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Occasion",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Size",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
}
]
}
Also i do not want to specify manually all keys explicitly like Color, Size to get their respective values each.
Thanks :)
To keep things simple must you use a single field to store attributes:
"gender":"Male"
I assume you have tons of attributes so you create an array instead, to handle that you will have to use "nested" field type.
Nested type preserves the relation between each of the nested document properties. If you dont use nested you will see all the properties and values mixed and you will not be able to aggregate by a property without manually adding filters.
You can read an article I wrote about that here:
https://opster.com/guides/elasticsearch/data-architecture/elasticsearch-nested-field-object-field/
Mappings :
PUT test_product_nested
{
"mappings": {
"properties": {
"attributes": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
This query will only show Red products of size XL and aggregate by attributes.
If you want to do OR's instead of AND's you must use "should" clauses instead of "filter" clauses.
Query
POST test_product_nested/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"filter": [
{
"term": {
"attributes.name.keyword": "Color"
}
},
{
"term": {
"attributes.value.keyword": "Red"
}
}
]
}
}
}
},
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"filter": [
{
"term": {
"attributes.name.keyword": "Size"
}
},
{
"term": {
"attributes.value.keyword": "XL"
}
}
]
}
}
}
}
]
}
},
"aggs": {
"attributes": {
"nested": {
"path": "attributes"
},
"aggs": {
"name": {
"terms": {
"field": "attributes.name.keyword"
},
"aggs": {
"values": {
"terms": {
"field": "attributes.value.keyword",
"size": 10
}
}
}
}
}
}
}
}
Results
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test_product_nested",
"_id": "aJRayoQBtNG1OrZoEOQi",
"_score": 0,
"_source": {
"title": "Product 1",
"attributes": [
{
"name": "Color",
"value": "Red"
},
{
"name": "Gender",
"value": "Female"
},
{
"name": "Occasion",
"value": "Active Wear"
},
{
"name": "Size",
"value": "XL"
}
]
}
}
]
},
"aggregations": {
"attributes": {
"doc_count": 4,
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Color",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Red",
"doc_count": 1
}
]
}
},
{
"key": "Gender",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Female",
"doc_count": 1
}
]
}
},
{
"key": "Occasion",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 1
}
]
}
},
{
"key": "Size",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "XL",
"doc_count": 1
}
]
}
}
]
}
}
}
}

ElasticSearc - unable to get nested object from multilevel nested object

I have multilevel nested object. the first nested object is category and inside category there is one more nested object is group.
So I want to get distinct category along with nested groups using aggregation query.
am successful to get distinct category but unable to get group detail with it.
Mapping:
"mappings": {
"doc": {
"properties": {
"categories": {
"type": "nested",
"properties": {
"cat_id": {
"type": "integer"
},
"cat_name": {
"type": "keyword"
},
"cat_slug": {
"type": "keyword"
},
"cat_type": {
"type": "long"
},
"groups": {
"type": "nested",
"properties": {
"group": {
"type": "keyword"
},
"group_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
}
}
},
"ordering": {
"type": "integer"
},
"parent_id": {
"type": "integer"
},
"parent_name": {
"type": "keyword"
},
"parent_slug": {
"type": "keyword"
},
"parent_type": {
"type": "long"
}
}
}
}
}
}
Sample data:
{
"_index": "product",
"_type": "doc",
"_id": "18556",
"_score": 1,
"_source": {
"sku": "GR0005P08",
"product_id": 18556,
"slug": "blue-garter-with-sexy-laces",
"categories": [
{
"ordering": 10,
"cat_id": 343,
"cat_type": 1,
"cat_slug": "t-thisr",
"cat_name": "cat1"
},
{
"ordering": 9999999,
"cat_id": 2,
"cat_type": 3,
"cat_slug": "pajams",
"cat_name": "pajams"
},
{
"ordering": 5,
"cat_id": 77,
"cat_type": 3,
"cat_slug": "accessories",
"cat_name": "Accessories"
},
{
"parent_name": "Pajams",
"cat_name": "Night",
"ordering": 1,
"cat_id": 139,
"parent_type": 3,
"cat_slug": "night",
"parent_id": 2,
"groups": [
{
"id": 146,
"group_name": "Shop By Style"
},
{
"id": 481,
"group_name": "Shop By Offer "
}
],
"parent_slug": "pajams",
"cat_type": 1
}
],
"name": "love for pajams"
}
}
This is my aggregation query:
GET product/_search
{
"_source": [
"product_id"
],
"query": {
"nested": {
"path": "categories",
"query": {
"bool": {
"must": [
{
"match": {
"categories.cat_slug": "xyz"
}
}
]
}
}
}
},
"aggs": {
"categories": {
"nested": {
"path": "categories"
},
"aggs": {
"distinct_categories.cat_name": {
"terms": {
"field": "categories.cat_name"
},
"aggs": {
"categories.groups.group_name": {
"terms": {
"field": "categories.groups.group_name.keyword"
}
}
}
}
}
}
}
}
And this is my Response:
{
"took": 11,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 8,
"max_score": 3.232121,
"hits": [
{
"_index": "product",
"_type": "doc",
"_id": "15621",
"_score": 3.232121,
"_source": {
"product_id": 15621
}
},
{
"_index": "product",
"_type": "doc",
"_id": "18556",
"_score": 2.5758784,
"_source": {
"product_id": 18556
}
}
]
},
"aggregations": {
"categories": {
"doc_count": 98,
"distinct_categories.cat_name": {
"doc_count_error_upper_bound": 2,
"sum_other_doc_count": 50,
"buckets": [
{
"key": "Accessories",
"doc_count": 8,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "T-shirt",
"doc_count": 8,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Sexy",
"doc_count": 7,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "clothing",
"doc_count": 6,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Pants",
"doc_count": 6,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Colour Me",
"doc_count": 4,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Pajamas",
"doc_count": 3,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
]
}
}
}
}
Probably try "field": "categories.groups.group_name" instead of "field": "categories.groups.group_name.keyword" in you deepest aggregation?
Edit:
Using "categories.groups.group_name.keyword" is the correct way to achieve this. The problem should be that you would need to add nested for each level of your nested structure for your query.
"aggs": {
"categories": {
"nested": {
"path": "categories"
},
"aggs": {
"distinct_categories.cat_name": {
"terms": {
"field": "categories.cat_name"
},
"aggs": {
"deeper_nested_agg": {
"nested": {
"path": "categories.groups"
},
"aggs": {
"categories.groups.group_name": {
"terms": {
"field": "categories.groups.group_name.keyword"
}
}
}
}
}
}
}
}
Please give it a try. Hope this helps!
I found issue with mapping definition of groups after changing query work fine.
{
"type": "nested",
"properties": {
"group_name": {
"type": "keyword"
},
"id": {
"type": "long"
}
}
}

Elasticsearch: Ordering with respect to Nested Sum Aggregation

The use case is to create aggregations to find top selling products in a region sorted by the counts of products sold. The data is stored in an index in elasticsearch.
I want to sort my 'group_by_name' aggregations output by the sum_quantity value aggregation which is in the last nested aggregation/ two levels in after 'group_by_name' with an intermediate aggregation 'group_by_sku'. The default output is sorted by doc_count. I want the aggregation to be sorted by the 'sum_quantity' aggregation value.
I have an index with the following mapping:
{"settings": {
"index": {
"number_of_shards": 2,
"number_of_replicas": 0
},
"analysis":{
"analyzer":{
"autocomplete":{
"type":"custom",
"tokenizer":"standard",
"filter":[ "standard", "lowercase", "ngram" ]
}
},
"filter":{
"ngram":{
"type":"ngram",
"min_gram":3,
"max_gram":25
}
}
} },"mappings": {
"farmer_products_map":{
"properties": {
"state": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"district": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"taluka": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"village": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"createdOn": {
"type": "date",
"format": "epoch_millis"
},
"category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"productName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
},
"autocomplete":{
"analyzer": "autocomplete",
"type": "string"
}
}
},
"crop": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"quantity": {
"type": "integer"
},
"farmerId": {
"type": "integer"
},
"orderId": {
"type": "integer"
}
}} }}
The following are the sample documents:
[{
"_index": "farmer_products_index_adv",
"_type": "farmer_products_map",
"_id": "AVtCttf0IP9v8cUTtoiz",
"_score": 1,
"_source": {
"orderId": 469173,
"category": "Hardware",
"farmerId": 509583,
"district": "",
"brand": "Honda",
"taluka": "",
"crop": "",
"productName": "Honda BRUSH CUTTER UMK 435-T U2NT",
"state": "",
"sku": "AGS-HW-471",
"village": "",
"quantity": 1
},{
"_index": "farmer_products_index_adv",
"_type": "farmer_products_map",
"_id": "AVtCttf0IP9v8cUTtoi1",
"_score": 1,
"_source": {
"orderId": 469177,
"category": "Crop Nutrition",
"farmerId": 13732,
"district": "Banaskantha",
"brand": "Unassigned Brand",
"taluka": "Kankrej",
"crop": "",
"productName": "Free Power Gel - Plant Nutrient (500 Ml)",
"state": "Gujarat",
"sku": "AGS-CN-006",
"village": "Nanota",
"quantity": 1
}}]
I wish to perform the following query aggregation:
{
"query": {
"bool": {
"must": [{
"match": {
"state": {
"query": "Maharashtra",
"fuzziness": 3,
"prefix_length": 2
}
}
}, {
"match": {
"district": {
"query": "Wardha",
"fuzziness": 3,
"prefix_length": 2
}
}
}, {
"match": {
"taluka": {
"query": "Wardha",
"fuzziness": 3,
"prefix_length": 2
}
}
}]
}
},
"size": 0,
"aggs": {
"group_by_state": {
"terms": {
"field": "state.keyword"
},
"aggs": {
"group_by_district": {
"terms": {
"field": "district.keyword"
},
"aggs": {
"group_by_taluka": {
"terms": {
"field": "taluka.keyword"
},
"aggs": {
"group_by_name": {
"terms": {
"field": "productName.keyword"
},
"aggs": {
"group_by_sku": {
"terms": {
"field": "sku.keyword"
},
"aggs": {
"sum_quantity": {
"sum": {
"field": "quantity"
}
}
}
}
}
}
}
}
}
}
}
}
}}
The current output of the aggregation is:
[{
"key": "Free MH HDPE Tarpaulin Tape Black 3mtr roll",
"doc_count": 13,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-410",
"doc_count": 13,
"sum_quantity": {
"value": 13
}
}]}}, {
"key": "Tarpaulin Sheet 11*15 (Tadpatri) 250 GSM",
"doc_count": 10,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-326",
"doc_count": 10,
"sum_quantity": {
"value": 10
}
}]
}}, {
"key": "Free Humic power Advanced powder 95% (250 Gms)",
"doc_count": 6,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-CN-036",
"doc_count": 6,
"sum_quantity": {
"value": 18
}
}]
}}]
I want the output to be sorted by the sum_quantity value:
[{
"key": "Free Humic power Advanced powder 95% (250 Gms)",
"doc_count": 6,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-CN-036",
"doc_count": 6,
"sum_quantity": {
"value": 18
}
}]}}, {
"key": "Free MH HDPE Tarpaulin Tape Black 3mtr roll",
"doc_count": 13,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-410",
"doc_count": 13,
"sum_quantity": {
"value": 13
}
}]
}}, {
"key": "Tarpaulin Sheet 11*15 (Tadpatri) 250 GSM",
"doc_count": 10,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-326",
"doc_count": 10,
"sum_quantity": {
"value": 10
}
}]
}}]
How can I achieve this? I tried approaches suggested in other stackoverflow questions using "reverse_nested" but was unable to arrive at a solution.

elasticsearch terms and sum aggregation

I have documents in elasticsearch (1.5) that looks like:
{
"gender": [
{
"name": "unknown",
"value": 12
},
{
"name": "male",
"value": 89
},
{
"name": "female",
"value": 84
}
]
}
not all of the documents contains the three options (male/female/unknown)
i would like to get the sum of all values per each gender name. like that:
{
"buckets": [
{
"key": "unknown",
"doc_count": 112,
"gender_a": {
"value": 462
}
},
{
"key": "male",
"doc_count": 107,
"gender_a": {
"value": 438
}
},
{
"key": "female",
"doc_count": 36,
"gender_a": {
"value": 186
}
}
]
}
i tried this query:
{
"aggs": {
"gender_name": {
"terms": {
"field": "gender.name"
},
"aggs": {
"gender_sum": {
"sum": {
"field": "gender.value"
}
}
}
}
}
}
but something weird is going on, and i don't get the right values.
any idea what i am missing ?
You will probably need to make sure that your "gender" property has type "nested". With that, I was able to make the following do what I think you're asking.
First I set up a simple index:
PUT /test_index
{
"mappings": {
"doc": {
"properties": {
"gender": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"value": {
"type": "long"
}
}
}
}
}
}
}
Then added a couple of docs:
PUT /test_index/doc/1
{
"gender": [
{
"name": "unknown",
"value": 12
},
{
"name": "male",
"value": 89
},
{
"name": "female",
"value": 84
}
]
}
PUT /test_index/doc/2
{
"gender": [
{
"name": "male",
"value": 8
},
{
"name": "female",
"value": 4
}
]
}
Then I was able to get total counts by gender name as follows:
POST /test_index/_search?search_type=count
{
"aggs": {
"genders": {
"nested": {
"path": "gender"
},
"aggs": {
"gender_terms": {
"terms": {
"field": "gender.name"
},
"aggs": {
"gender_name_value_sums": {
"sum": {
"field": "gender.value"
}
}
}
}
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"genders": {
"doc_count": 5,
"gender_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 2,
"gender_name_value_sums": {
"value": 88,
"value_as_string": "88.0"
}
},
{
"key": "male",
"doc_count": 2,
"gender_name_value_sums": {
"value": 97,
"value_as_string": "97.0"
}
},
{
"key": "unknown",
"doc_count": 1,
"gender_name_value_sums": {
"value": 12,
"value_as_string": "12.0"
}
}
]
}
}
}
}
Here is the code I used to test it:
http://sense.qbox.io/gist/d4533215806b858aa2cc1565546d167fdec3c973

Find min value of field in nested array object after aggregation

I would like to find the minimum value of a field in a nested array object after aggregation.
Data example:
[
{
"id": "i1",
"version": 1,
"entries": [
{
"name": "n1",
"position": 1
}, {
"name": "n2",
"position": 2
}
]
}, {
"id": "i1"
"version": 2,
"entries": [
{
"name": "n2",
"position": 3
}, {
"name": "n3",
"position": 4
}
]
},
{
"id": "i2",
"version": 1,
"entries": [
{
"name": "n1",
"position": 8
}, {
"name": "n2",
"position": 7
}
]
}, {
"id": "i2"
"version": 2,
"entries": [
{
"name": "n2",
"position": 6
}, {
"name": "n3",
"position": 5
}
]
}
]
Pseudo Query:
SELECT min(entries["n2"].position) WHERE entries.name="n2" GROUP BY id;
Expected Result:
[
{
"id": "i1",
"min(position)": 2
}, {
"id": "i2",
"min(position)": 6
}
]
I can do this in code, but it's not performant, as I need to return the document sources which can be quite large.
I am thinking of denormalizing the data, but would like to first know if this request is not possible at all.
You can do it by nesting several aggregations like this:
terms agg -> nested agg -> filter agg -> min agg
To test it I set up an index:
PUT /test_index
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"doc": {
"properties": {
"entries": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"position": {
"type": "long"
}
}
},
"id": {
"type": "string"
},
"version": {
"type": "long"
}
}
}
}
}
And indexed your docs:
PUT /test_index/doc/_bulk
{"index":{"_id":1}}
{"id":"i1","version":1,"entries":[{"name":"n1","position":1},{"name":"n2","position":2}]}
{"index":{"_id":2}}
{"id":"i1","version":2,"entries":[{"name":"n2","position":3},{"name":"n3","position":4}]}
{"index":{"_id":3}}
{"id":"i2","version":1,"entries":[{"name":"n1","position":8},{"name":"n2","position":7}]}
{"index":{"_id":4}}
{"id":"i2","version":2,"entries":[{"name":"n2","position":6},{"name":"n3","position":5}]}
Here is the query:
POST /test_index/_search?search_type=count
{
"aggs": {
"id_terms": {
"terms": {
"field": "id"
},
"aggs": {
"nested_entries": {
"nested": {
"path": "entries"
},
"aggs": {
"filter_name": {
"filter": {
"term": {
"entries.name": "n2"
}
},
"aggs": {
"min_position": {
"min": {
"field": "position"
}
}
}
}
}
}
}
}
}
}
and the result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0,
"hits": []
},
"aggregations": {
"id_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "i1",
"doc_count": 2,
"nested_entries": {
"doc_count": 4,
"filter_name": {
"doc_count": 2,
"min_position": {
"value": 2,
"value_as_string": "2.0"
}
}
}
},
{
"key": "i2",
"doc_count": 2,
"nested_entries": {
"doc_count": 4,
"filter_name": {
"doc_count": 2,
"min_position": {
"value": 6,
"value_as_string": "6.0"
}
}
}
}
]
}
}
}
Here is the code I used all together:
http://sense.qbox.io/gist/34a013099ef07fb527d9d7cf8490ad1bbafa718b

Resources