ElasticSearc - unable to get nested object from multilevel nested object - elasticsearch

I have multilevel nested object. the first nested object is category and inside category there is one more nested object is group.
So I want to get distinct category along with nested groups using aggregation query.
am successful to get distinct category but unable to get group detail with it.
Mapping:
"mappings": {
"doc": {
"properties": {
"categories": {
"type": "nested",
"properties": {
"cat_id": {
"type": "integer"
},
"cat_name": {
"type": "keyword"
},
"cat_slug": {
"type": "keyword"
},
"cat_type": {
"type": "long"
},
"groups": {
"type": "nested",
"properties": {
"group": {
"type": "keyword"
},
"group_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
}
}
},
"ordering": {
"type": "integer"
},
"parent_id": {
"type": "integer"
},
"parent_name": {
"type": "keyword"
},
"parent_slug": {
"type": "keyword"
},
"parent_type": {
"type": "long"
}
}
}
}
}
}
Sample data:
{
"_index": "product",
"_type": "doc",
"_id": "18556",
"_score": 1,
"_source": {
"sku": "GR0005P08",
"product_id": 18556,
"slug": "blue-garter-with-sexy-laces",
"categories": [
{
"ordering": 10,
"cat_id": 343,
"cat_type": 1,
"cat_slug": "t-thisr",
"cat_name": "cat1"
},
{
"ordering": 9999999,
"cat_id": 2,
"cat_type": 3,
"cat_slug": "pajams",
"cat_name": "pajams"
},
{
"ordering": 5,
"cat_id": 77,
"cat_type": 3,
"cat_slug": "accessories",
"cat_name": "Accessories"
},
{
"parent_name": "Pajams",
"cat_name": "Night",
"ordering": 1,
"cat_id": 139,
"parent_type": 3,
"cat_slug": "night",
"parent_id": 2,
"groups": [
{
"id": 146,
"group_name": "Shop By Style"
},
{
"id": 481,
"group_name": "Shop By Offer "
}
],
"parent_slug": "pajams",
"cat_type": 1
}
],
"name": "love for pajams"
}
}
This is my aggregation query:
GET product/_search
{
"_source": [
"product_id"
],
"query": {
"nested": {
"path": "categories",
"query": {
"bool": {
"must": [
{
"match": {
"categories.cat_slug": "xyz"
}
}
]
}
}
}
},
"aggs": {
"categories": {
"nested": {
"path": "categories"
},
"aggs": {
"distinct_categories.cat_name": {
"terms": {
"field": "categories.cat_name"
},
"aggs": {
"categories.groups.group_name": {
"terms": {
"field": "categories.groups.group_name.keyword"
}
}
}
}
}
}
}
}
And this is my Response:
{
"took": 11,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 8,
"max_score": 3.232121,
"hits": [
{
"_index": "product",
"_type": "doc",
"_id": "15621",
"_score": 3.232121,
"_source": {
"product_id": 15621
}
},
{
"_index": "product",
"_type": "doc",
"_id": "18556",
"_score": 2.5758784,
"_source": {
"product_id": 18556
}
}
]
},
"aggregations": {
"categories": {
"doc_count": 98,
"distinct_categories.cat_name": {
"doc_count_error_upper_bound": 2,
"sum_other_doc_count": 50,
"buckets": [
{
"key": "Accessories",
"doc_count": 8,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "T-shirt",
"doc_count": 8,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Sexy",
"doc_count": 7,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "clothing",
"doc_count": 6,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Pants",
"doc_count": 6,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Colour Me",
"doc_count": 4,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key": "Pajamas",
"doc_count": 3,
"categories.groups.group_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
]
}
}
}
}

Probably try "field": "categories.groups.group_name" instead of "field": "categories.groups.group_name.keyword" in you deepest aggregation?
Edit:
Using "categories.groups.group_name.keyword" is the correct way to achieve this. The problem should be that you would need to add nested for each level of your nested structure for your query.
"aggs": {
"categories": {
"nested": {
"path": "categories"
},
"aggs": {
"distinct_categories.cat_name": {
"terms": {
"field": "categories.cat_name"
},
"aggs": {
"deeper_nested_agg": {
"nested": {
"path": "categories.groups"
},
"aggs": {
"categories.groups.group_name": {
"terms": {
"field": "categories.groups.group_name.keyword"
}
}
}
}
}
}
}
}
Please give it a try. Hope this helps!

I found issue with mapping definition of groups after changing query work fine.
{
"type": "nested",
"properties": {
"group_name": {
"type": "keyword"
},
"id": {
"type": "long"
}
}
}

Related

Elastic search terms aggregation for getting filter options

im trying to implement product searching and want to get search results along with filters to filter from. i have managed to get the filter keys reference, but also want values of those keys
my product body is
{
...product,
"attributes": [
{
"name": "Color",
"value": "Aqua Blue"
},
{
"name": "Gender",
"value": "Female"
},
{
"name": "Occasion",
"value": "Active Wear"
},
{
"name": "Size",
"value": "0"
}
],
}
and im using the this query in es
GET product/_search
{
"aggs": {
"filters": {
"terms": {
"field": "attributes.name"
},
"aggs": {
"values": {
"terms": {
"field": "attributes.value",
"size": 10
}
}
}
}
}
}
Not sure why, but im getting all values for each key
"aggregations": {
"filters": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Color",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Gender",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Occasion",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Size",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
}
]
}
Also i do not want to specify manually all keys explicitly like Color, Size to get their respective values each.
Thanks :)
To keep things simple must you use a single field to store attributes:
"gender":"Male"
I assume you have tons of attributes so you create an array instead, to handle that you will have to use "nested" field type.
Nested type preserves the relation between each of the nested document properties. If you dont use nested you will see all the properties and values mixed and you will not be able to aggregate by a property without manually adding filters.
You can read an article I wrote about that here:
https://opster.com/guides/elasticsearch/data-architecture/elasticsearch-nested-field-object-field/
Mappings :
PUT test_product_nested
{
"mappings": {
"properties": {
"attributes": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
This query will only show Red products of size XL and aggregate by attributes.
If you want to do OR's instead of AND's you must use "should" clauses instead of "filter" clauses.
Query
POST test_product_nested/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"filter": [
{
"term": {
"attributes.name.keyword": "Color"
}
},
{
"term": {
"attributes.value.keyword": "Red"
}
}
]
}
}
}
},
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"filter": [
{
"term": {
"attributes.name.keyword": "Size"
}
},
{
"term": {
"attributes.value.keyword": "XL"
}
}
]
}
}
}
}
]
}
},
"aggs": {
"attributes": {
"nested": {
"path": "attributes"
},
"aggs": {
"name": {
"terms": {
"field": "attributes.name.keyword"
},
"aggs": {
"values": {
"terms": {
"field": "attributes.value.keyword",
"size": 10
}
}
}
}
}
}
}
}
Results
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test_product_nested",
"_id": "aJRayoQBtNG1OrZoEOQi",
"_score": 0,
"_source": {
"title": "Product 1",
"attributes": [
{
"name": "Color",
"value": "Red"
},
{
"name": "Gender",
"value": "Female"
},
{
"name": "Occasion",
"value": "Active Wear"
},
{
"name": "Size",
"value": "XL"
}
]
}
}
]
},
"aggregations": {
"attributes": {
"doc_count": 4,
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Color",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Red",
"doc_count": 1
}
]
}
},
{
"key": "Gender",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Female",
"doc_count": 1
}
]
}
},
{
"key": "Occasion",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 1
}
]
}
},
{
"key": "Size",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "XL",
"doc_count": 1
}
]
}
}
]
}
}
}
}

Elasticsearch: Ordering with respect to Nested Sum Aggregation

The use case is to create aggregations to find top selling products in a region sorted by the counts of products sold. The data is stored in an index in elasticsearch.
I want to sort my 'group_by_name' aggregations output by the sum_quantity value aggregation which is in the last nested aggregation/ two levels in after 'group_by_name' with an intermediate aggregation 'group_by_sku'. The default output is sorted by doc_count. I want the aggregation to be sorted by the 'sum_quantity' aggregation value.
I have an index with the following mapping:
{"settings": {
"index": {
"number_of_shards": 2,
"number_of_replicas": 0
},
"analysis":{
"analyzer":{
"autocomplete":{
"type":"custom",
"tokenizer":"standard",
"filter":[ "standard", "lowercase", "ngram" ]
}
},
"filter":{
"ngram":{
"type":"ngram",
"min_gram":3,
"max_gram":25
}
}
} },"mappings": {
"farmer_products_map":{
"properties": {
"state": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"district": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"taluka": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"village": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"createdOn": {
"type": "date",
"format": "epoch_millis"
},
"category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"productName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
},
"autocomplete":{
"analyzer": "autocomplete",
"type": "string"
}
}
},
"crop": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"quantity": {
"type": "integer"
},
"farmerId": {
"type": "integer"
},
"orderId": {
"type": "integer"
}
}} }}
The following are the sample documents:
[{
"_index": "farmer_products_index_adv",
"_type": "farmer_products_map",
"_id": "AVtCttf0IP9v8cUTtoiz",
"_score": 1,
"_source": {
"orderId": 469173,
"category": "Hardware",
"farmerId": 509583,
"district": "",
"brand": "Honda",
"taluka": "",
"crop": "",
"productName": "Honda BRUSH CUTTER UMK 435-T U2NT",
"state": "",
"sku": "AGS-HW-471",
"village": "",
"quantity": 1
},{
"_index": "farmer_products_index_adv",
"_type": "farmer_products_map",
"_id": "AVtCttf0IP9v8cUTtoi1",
"_score": 1,
"_source": {
"orderId": 469177,
"category": "Crop Nutrition",
"farmerId": 13732,
"district": "Banaskantha",
"brand": "Unassigned Brand",
"taluka": "Kankrej",
"crop": "",
"productName": "Free Power Gel - Plant Nutrient (500 Ml)",
"state": "Gujarat",
"sku": "AGS-CN-006",
"village": "Nanota",
"quantity": 1
}}]
I wish to perform the following query aggregation:
{
"query": {
"bool": {
"must": [{
"match": {
"state": {
"query": "Maharashtra",
"fuzziness": 3,
"prefix_length": 2
}
}
}, {
"match": {
"district": {
"query": "Wardha",
"fuzziness": 3,
"prefix_length": 2
}
}
}, {
"match": {
"taluka": {
"query": "Wardha",
"fuzziness": 3,
"prefix_length": 2
}
}
}]
}
},
"size": 0,
"aggs": {
"group_by_state": {
"terms": {
"field": "state.keyword"
},
"aggs": {
"group_by_district": {
"terms": {
"field": "district.keyword"
},
"aggs": {
"group_by_taluka": {
"terms": {
"field": "taluka.keyword"
},
"aggs": {
"group_by_name": {
"terms": {
"field": "productName.keyword"
},
"aggs": {
"group_by_sku": {
"terms": {
"field": "sku.keyword"
},
"aggs": {
"sum_quantity": {
"sum": {
"field": "quantity"
}
}
}
}
}
}
}
}
}
}
}
}
}}
The current output of the aggregation is:
[{
"key": "Free MH HDPE Tarpaulin Tape Black 3mtr roll",
"doc_count": 13,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-410",
"doc_count": 13,
"sum_quantity": {
"value": 13
}
}]}}, {
"key": "Tarpaulin Sheet 11*15 (Tadpatri) 250 GSM",
"doc_count": 10,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-326",
"doc_count": 10,
"sum_quantity": {
"value": 10
}
}]
}}, {
"key": "Free Humic power Advanced powder 95% (250 Gms)",
"doc_count": 6,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-CN-036",
"doc_count": 6,
"sum_quantity": {
"value": 18
}
}]
}}]
I want the output to be sorted by the sum_quantity value:
[{
"key": "Free Humic power Advanced powder 95% (250 Gms)",
"doc_count": 6,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-CN-036",
"doc_count": 6,
"sum_quantity": {
"value": 18
}
}]}}, {
"key": "Free MH HDPE Tarpaulin Tape Black 3mtr roll",
"doc_count": 13,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-410",
"doc_count": 13,
"sum_quantity": {
"value": 13
}
}]
}}, {
"key": "Tarpaulin Sheet 11*15 (Tadpatri) 250 GSM",
"doc_count": 10,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-326",
"doc_count": 10,
"sum_quantity": {
"value": 10
}
}]
}}]
How can I achieve this? I tried approaches suggested in other stackoverflow questions using "reverse_nested" but was unable to arrive at a solution.

Elastic Search: Aggregation sum on a particular field

I am new to elastic search and requesting some help.
Basically I have some 2 million documents in my elastic search and the documents look like below:
{
"_index": "flipkart",
"_type": "PSAD_ThirdParty",
"_id": "430001_MAM_2016-02-04",
"_version": 1,
"_score": 1,
"_source": {
"metrics": [
{
"id": "Metric1",
"value": 70
},
{
"id": "Metric2",
"value": 90
},
{
"id": "Metric3",
"value": 120
}
],
"primary": true,
"ticketId": 1,
"pliId": 206,
"bookedNumbers": 15000,
"ut": 1454567400000,
"startDate": 1451629800000,
"endDate": 1464589800000,
"tz": "EST"
}
}
I want to write an aggregation query which satisfies below conditions:
1) First query based on "_index", "_type" and "pliId".
2) Do aggregation sum on metrics.value based on metrics.id = "Metric1".
Basically I need to query records based on some fields and aggregate sum on a particular metrics value based on metrics id.
Please can you help me in getting my query right.
Your metrics field needs to be of type nested:
"metrics": {
"type": "nested",
"properties": {
"id": {
"type": "string",
"index": "not_analyzed"
}
}
}
If you want Metric1 to match, meaning upper-case letter, then as you see above the id needs to be not_analyzed.
Then, if you only want metrics.id = "Metric1" aggregations, you need something like this:
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"pliId": 206
}
}
]
}
}
}
},
"aggs": {
"by_metrics": {
"nested": {
"path": "metrics"
},
"aggs": {
"metric1_only": {
"filter": {
"bool": {
"must": [
{
"term": {
"metrics.id": {
"value": "Metric1"
}
}
}
]
}
},
"aggs": {
"by_metric_id": {
"terms": {
"field": "metrics.id"
},
"aggs": {
"total_delivery": {
"sum": {
"field": "metrics.value"
}
}
}
}
}
}
}
}
}
}
Created new index:
Method : PUT ,
URL : http://localhost:9200/google/
Body:
{
"mappings": {
"PSAD_Primary": {
"properties": {
"metrics": {
"type": "nested",
"properties": {
"id": {
"type": "string",
"index": "not_analyzed"
},
"value": {
"type": "integer",
"index": "not_analyzed"
}
}
}
}
}
}
}
Then I inserted some 200 thousand documents and than ran the query and it worked.
Response:
{
"took": 34,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "google",
"_type": "PSAD_Primary",
"_id": "383701291_MAM_2016-01-06",
"_score": 1,
"_source": {
"metrics": [
{
"id": "Metric1",
"value": 70
},
{
"id": "Metric2",
"value": 90
},
{
"id": "Metric3",
"value": 120
}
],
"primary": true,
"ticketId": 1,
"pliId": 221244,
"bookedNumbers": 15000,
"ut": 1452061800000,
"startDate": 1451629800000,
"endDate": 1464589800000,
"tz": "EST"
}
}
]
},
"aggregations": {
"by_metrics": {
"doc_count": 3,
"metric1_only": {
"doc_count": 1,
"by_metric_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Metric1",
"doc_count": 1,
"total_delivery": {
"value": 70
}
}
]
}
}
}
}
}

elasticsearch nested aggregation inside a reverse nested aggregation

Elasticsearch version: 2.3.1
JVM version: 1.8.0_66 / 25.66-b17
OS version: Mac OS X 10.11.4
I am having trouble getting the correct values to show up in a 4 level deep aggregation scenario where the first two levels are nested, the third is reverse_nested, and the fourth is nested again.
Here is my index mapping:
curl -XDELETE localhost:9200/orders-d
curl -XPUT localhost:9200/orders-d
curl -XPUT localhost:9200/orders-d/order-d/_mapping -d '{
"order-d": {
"properties": {
"id": {
"type": "string"
},
"orderNumber": {
"type": "string"
},
"groupId": {
"type": "string"
},
"groupOrderNumber": {
"type": "string"
},
"dateCreated": {
"type": "date"
},
"dateUpdated": {
"type": "date"
},
"location": {
"type": "object"
},
"orderSubmitter": {
"type": "object"
},
"distributor": {
"type": "object"
},
"salesRep": {
"type": "object"
},
"status": {
"type": "string"
},
"total": {
"type": "double"
},
"isTTOrder": {
"type": "boolean"
},
"lineItems": {
"type": "nested",
"include_in_parent": true,
"properties": {
"product": {
"type": "object"
},
"category": {
"type": "object"
},
"subCategory": {
"type": "object"
},
"quantity": {
"type": "double"
},
"unitPrice": {
"type": "double"
},
"totalPrice": {
"type": "double"
},
"pricedByUnitPrice": {
"type": "double"
}
}
}
}
}
}'
Here are the documents:
curl -XPUT localhost:9200/orders-d/order-d/0 -d '{
"id": "571652632a19085c008b4577",
"orderNumber": "1617590686",
"groupId": "571652632a19085c008b4578",
"groupOrderNumber": "3485944627",
"dateCreated": "2016-04-19",
"dateUpdated": null,
"location": {
"id": "54e53853505eb66b008b4569",
"name": "Andrews Diner"
},
"orderSubmitter": {
"id": "54e53853505eb66b008b4567",
"name": "Kostantino Plaitis"
},
"distributor": {
"id": "55c3879459ad0c63008b4569",
"name": "Performance Foodservice Metro NY"
},
"salesRep": null,
"status": "pending",
"total": 5410.21,
"isTTOrder": true,
"lineItems": [{
"product": {
"id": "55bfb445c440b26a008b4571",
"name": "Sabrett Sauerkraut 12 x 2 lb bags"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4586",
"name": "Other Sauces Dipping\/Condiments\/Savoury Toppings\/Savoury Spreads\/Marinades (Perishable)"
},
"quantity": 1,
"unitPrice": 25.24,
"totalPrice": 25.24,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55bc219238c0376e008b4570",
"name": "Franks Red Hot Cayenne Pepper Sauce 4 x 1 gallon"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4606",
"name": "Other Sauces Dipping\/Condiments\/Savoury Toppings\/Savoury Spreads\/Marinades (Shelf Stable)"
},
"quantity": 1,
"unitPrice": 45.06,
"totalPrice": 45.06,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "56d76c41bd821fda008b459a",
"name": "Cereal, Classic Variety Pack, Kelloggs 1\/60 ct."
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b462d",
"name": "Grains\/Cereal - Ready to Eat - (Shelf Stable)"
},
"quantity": 1,
"unitPrice": 56.03,
"totalPrice": 56.03,
"pricedByUnitPrice": 0
}]
}'
curl -XPUT localhost:9200/orders-d/order-d/0 -d '{
"id": "571652632a19085c008b4576",
"orderNumber": "2041063294",
"groupId": "571652632a19085c008b4578",
"groupOrderNumber": "3485944627",
"dateCreated": "2016-04-19",
"dateUpdated": null,
"location": {
"id": "54e53853505eb66b008b4569",
"name": "Andrews Diner"
},
"orderSubmitter": {
"id": "54e53853505eb66b008b4567",
"name": "Kostantino Plaitis"
},
"distributor": {
"id": "55cdeece0a41216c008b4583",
"name": "Driscoll Foods"
},
"salesRep": null,
"status": "pending",
"total": 7575.27,
"isTTOrder": true,
"lineItems": [{
"product": {
"id": "55ad05e08d28c36b008b456c",
"name": "Pepper 3000 pcs"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4582",
"name": "Herbs\/Spices (Shelf Stable)"
},
"quantity": 3,
"unitPrice": 8.95,
"totalPrice": 26.85,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55b3a12f6b415c68008b4568",
"name": "Venice Maid Deluxe Corned Beef Hash 6 x 6 lb 10 oz"
},
"category": {
"id": "53df846c3b8e77710e7b23f7",
"name": "Meat"
},
"subCategory": {
"id": "54d8c56a279871b9078b4581",
"name": "Beef - Prepared\/Processed"
},
"quantity": 1,
"unitPrice": 59.75,
"totalPrice": 59.75,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55b145798c26dc69008b4568",
"name": "Aladdin Bakers Sesame Bread Sticks 150 x 2 packs"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b45b0",
"name": "Dried Breads (Shelf Stable)"
},
"quantity": 8,
"unitPrice": 15.5,
"totalPrice": 124,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55ad074a8d28c36f008b456d",
"name": "Smuckers Breakfast Syrup 100 cups"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b457d",
"name": "Syrup\/Treacle\/Molasses (Shelf Stable)"
},
"quantity": 10,
"unitPrice": 8.95,
"totalPrice": 89.5,
"pricedByUnitPrice": 0
}]
}'
Here is my query:
curl -XPOST localhost:9200/orders-d/_search -d '{
"from": 0,
"size": 0,
"aggregations": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"terms": {
"field": "lineItems.category.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"terms": {
"field": "lineItems.product.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"terms": {
"field": "distributor.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"sum": {
"field": "lineItems.totalPrice"
}
}
},
"nested": {
"path": "lineItems"
}
}
}
}
},
"reverse_nested": {}
}
}
}
}
}
},
"nested": {
"path": "lineItems"
}
}
},
"query": {
"bool": {
"must": [{
"range": {
"dateCreated": {
"format": "yyyy-MM-dd",
"gte": "2016-01-01",
"lte": "2016-04-30"
}
}
}]
}
}
}'
...and here are my results:
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "53df845b3b8e77710e7b23ec",
"doc_count": 3,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55ad05e08d28c36b008b456c",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}, {
"key": "55ad074a8d28c36f008b456d",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}, {
"key": "55b145798c26dc69008b4568",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}]
}
}, {
"key": "53df846c3b8e77710e7b23f7",
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55b3a12f6b415c68008b4568",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}]
}
}]
}
}
}
}
As you can see from the results, all the aggregated values for each drilldown of totalLineItems have the same exact value. This is obviously incorrect.
Did I do something wrong, is it a bug, or is nesting inside a reverse nesting unsupported?

ElasticSearch aggregations - to lowercase or not to lowercase

Please observe this secenario:
Define mappings
PUT /my_index
{
"mappings": {
"my_type": {
"properties": {
"city": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
Add data
PUT /my_index/my_type/1
{
"city": "New York"
}
PUT /my_index/my_type/2
{
"city": "York"
}
PUT /my_index/my_type/3
{
"city": "york"
}
Query for facets
GET /my_index/_search
{
"size": 0,
"aggs": {
"Cities": {
"terms": {
"field": "city.raw"
}
}
}
}
Result
{
...
"aggregations": {
"Cities": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "New York",
"doc_count": 1
},
{
"key": "York",
"doc_count": 1
},
{
"key": "york",
"doc_count": 1
}
]
}
}
}
Dilemma
I would like to 2 thing:
"York" and "york" should be combined so instead of 3 buckets with each 1 hit I would 2 buckets, one for "New York (1)" and one for "York (2)"
The casing of the city must be preserved - I don't want facet values to be all lowercased
Dream result
{
...
"aggregations": {
"Cities": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "New York",
"doc_count": 1
},
{
"key": "York",
"doc_count": 2
}
]
}
}
}
It's going to make your client-side code slightly more complicated, but you could always do something like this.
Set up the index with an additional sub-field that is only lower-cased (not split on white space):
PUT /my_index
{
"settings": {
"analysis": {
"analyzer": {
"lowercase_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"lowercase"
]
}
}
}
},
"mappings": {
"my_type": {
"properties": {
"city": {
"type": "string",
"fields": {
"lowercase": {
"type": "string",
"analyzer": "lowercase_analyzer"
},
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
PUT /my_index/my_type/_bulk
{"index":{"_id":1}}
{"city":"New York"}
{"index":{"_id":2}}
{"city":"York"}
{"index":{"_id":3}}
{"city":"york"}
Then use a two-level aggregation like this, where the second orders alphabetically ascending (so that upper-case term will come first) and only returns the top raw term for each lower-case term:
GET /my_index/_search
{
"size": 0,
"aggs": {
"city_lowercase": {
"terms": {
"field": "city.lowercase"
},
"aggs": {
"city_terms": {
"terms": {
"field": "city.raw",
"order" : { "_term" : "asc" },
"size": 1
}
}
}
}
}
}
which returns:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"city_lowercase": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "york",
"doc_count": 2,
"city_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1,
"buckets": [
{
"key": "York",
"doc_count": 1
}
]
}
},
{
"key": "new york",
"doc_count": 1,
"city_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "New York",
"doc_count": 1
}
]
}
}
]
}
}
}
Here's the code I used (with a few more doc examples):
http://sense.qbox.io/gist/f3781d58fbaadcc1585c30ebb087108d2752dfff

Resources