Elasticsearch nested aggregation got too slow result - elasticsearch

I have an index with a billions document in the future, for now it's around 20mil documents. It took over 10s to get the result while I need a query around 3-4s for billion documents. Is my structure was wrong or need to improve the query or server configuration? Im using amazon elasticsearch service.
This query will return amount/transactions/items of every station in every area
Query:
{
"size" : 0,
"query": {
"bool": {
"must":
[
{
"range": {
"date_sec": {
"gte": "1483228800",
"lte": "1525046400"
}
}
},
{
"range": {
"time_sec": {
"gte": "32400",
"lte": "75600"
}
}
}
]
}
},
"aggs": {
"numstoreamountclient" : {
"filter" : { "range" : { "amount" : { "gt" : 0 } } },
"aggs": {
"numstore_amountclient": {
"cardinality" : {
"field" : "id_station"
}
}
}
},
"id_station": {
"terms": {
"field": "id_station"
},
"aggs": {
"area_type": {
"terms": {
"field": "area_type"
},
"aggs": {
"max_time" : { "max" : { "field" : "time_sec" } },
"min_time" : { "min" : { "field" : "time_sec" } },
"amountclient": {
"sum": {
"field": "amount"
}
},
"itemclient": {
"sum": {
"field": "items"
}
},
"transactionclient" : {
"value_count" :
{
"field" : "id"
}
},
"inwatchinghour": {
"filter" : { "term" : { "in_watchinghour" : 1 } },
"aggs" : {
"amountclientwatch": {
"sum": {
"field": "amount"
}
},
"itemclient": {
"sum": {
"field": "items"
}
},
"transactionclientwatch" : {
"value_count" :
{
"field" : "id"
}
}
}
},
"saleclient": {
"filter" : {
"bool": {
"must":
[
{
"term" : { "in_watchinghour" : 1 }
},
{
"range": {
"items": {
"gt": "0"
}
}
},
{
"range": {
"amount": {
"gt": "0"
}
}
}
]
}
},
"aggs" : {
"sale_client" : {
"value_count" :
{
"field" : "id"
}
}
}
}
}
}
}
}
}
}
Result:
{
"took": 10757,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 19778330,
"max_score": 0,
"hits": []
},
"aggregations": {
"numstoreamountclient": {
"doc_count": 19677164,
"numstore_amountclient": {
"value": 35
}
},
"id_station": {
"doc_count_error_upper_bound": 437877,
"sum_other_doc_count": 11401869,
"buckets": [
{
"key": 2209,
"doc_count": 1456505,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1456505,
"saleclient": {
"doc_count": 708499,
"sale_client": {
"value": 708499
}
},
"inwatchinghour": {
"doc_count": 711435,
"transactionclientwatch": {
"value": 711435
},
"amountclientwatch": {
"value": 210203295816
},
"itemclient": {
"value": 4105206
}
},
"amountclient": {
"value": 427392789897
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 1456505
},
"itemclient": {
"value": 8402911
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2210,
"doc_count": 890590,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 890590,
"saleclient": {
"doc_count": 357520,
"sale_client": {
"value": 357520
}
},
"inwatchinghour": {
"doc_count": 358900,
"transactionclientwatch": {
"value": 358900
},
"amountclientwatch": {
"value": 89792941442
},
"itemclient": {
"value": 2146312
}
},
"amountclient": {
"value": 222577251265
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 890590
},
"itemclient": {
"value": 5346273
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2226,
"doc_count": 844491,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 844491,
"saleclient": {
"doc_count": 346801,
"sale_client": {
"value": 346801
}
},
"inwatchinghour": {
"doc_count": 347730,
"transactionclientwatch": {
"value": 347730
},
"amountclientwatch": {
"value": 90585228756
},
"itemclient": {
"value": 1817412
}
},
"amountclient": {
"value": 219008246857
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 844491
},
"itemclient": {
"value": 4409412
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2317,
"doc_count": 812409,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 812409,
"saleclient": {
"doc_count": 292933,
"sale_client": {
"value": 292933
}
},
"inwatchinghour": {
"doc_count": 294866,
"transactionclientwatch": {
"value": 294866
},
"amountclientwatch": {
"value": 105661613404
},
"itemclient": {
"value": 2144352
}
},
"amountclient": {
"value": 290725384084
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 812409
},
"itemclient": {
"value": 5925558
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2211,
"doc_count": 811198,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 811198,
"saleclient": {
"doc_count": 262617,
"sale_client": {
"value": 262617
}
},
"inwatchinghour": {
"doc_count": 265515,
"transactionclientwatch": {
"value": 265515
},
"amountclientwatch": {
"value": 70763222934
},
"itemclient": {
"value": 1783073
}
},
"amountclient": {
"value": 213071496626
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 811198
},
"itemclient": {
"value": 5476443
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2331,
"doc_count": 806670,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 806670,
"saleclient": {
"doc_count": 349472,
"sale_client": {
"value": 349472
}
},
"inwatchinghour": {
"doc_count": 350285,
"transactionclientwatch": {
"value": 350285
},
"amountclientwatch": {
"value": 82784018110
},
"itemclient": {
"value": 2079211
}
},
"amountclient": {
"value": 192804137579
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 806670
},
"itemclient": {
"value": 4834069
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2323,
"doc_count": 749161,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 749161,
"saleclient": {
"doc_count": 280928,
"sale_client": {
"value": 280928
}
},
"inwatchinghour": {
"doc_count": 282498,
"transactionclientwatch": {
"value": 282498
},
"amountclientwatch": {
"value": 62082735118
},
"itemclient": {
"value": 1588445
}
},
"amountclient": {
"value": 162365212278
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 749161
},
"itemclient": {
"value": 4231490
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2345,
"doc_count": 727589,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 727589,
"saleclient": {
"doc_count": 340141,
"sale_client": {
"value": 340141
}
},
"inwatchinghour": {
"doc_count": 341590,
"transactionclientwatch": {
"value": 341590
},
"amountclientwatch": {
"value": 107492036777
},
"itemclient": {
"value": 2421158
}
},
"amountclient": {
"value": 228611232646
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 727589
},
"itemclient": {
"value": 5138628
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2329,
"doc_count": 663856,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 663856,
"saleclient": {
"doc_count": 163358,
"sale_client": {
"value": 163358
}
},
"inwatchinghour": {
"doc_count": 164339,
"transactionclientwatch": {
"value": 164339
},
"amountclientwatch": {
"value": 55298080357
},
"itemclient": {
"value": 1209514
}
},
"amountclient": {
"value": 211070998632
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 663856
},
"itemclient": {
"value": 4875689
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2355,
"doc_count": 613992,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 613992,
"saleclient": {
"doc_count": 113575,
"sale_client": {
"value": 113575
}
},
"inwatchinghour": {
"doc_count": 114038,
"transactionclientwatch": {
"value": 114038
},
"amountclientwatch": {
"value": 30494132488
},
"itemclient": {
"value": 563628
}
},
"amountclient": {
"value": 140705052880
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 613992
},
"itemclient": {
"value": 2920908
},
"min_time": {
"value": 32400
}
}
]
}
}
]
}
}
}

Related

Elastic search terms aggregation for getting filter options

im trying to implement product searching and want to get search results along with filters to filter from. i have managed to get the filter keys reference, but also want values of those keys
my product body is
{
...product,
"attributes": [
{
"name": "Color",
"value": "Aqua Blue"
},
{
"name": "Gender",
"value": "Female"
},
{
"name": "Occasion",
"value": "Active Wear"
},
{
"name": "Size",
"value": "0"
}
],
}
and im using the this query in es
GET product/_search
{
"aggs": {
"filters": {
"terms": {
"field": "attributes.name"
},
"aggs": {
"values": {
"terms": {
"field": "attributes.value",
"size": 10
}
}
}
}
}
}
Not sure why, but im getting all values for each key
"aggregations": {
"filters": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Color",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Gender",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Occasion",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Size",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
}
]
}
Also i do not want to specify manually all keys explicitly like Color, Size to get their respective values each.
Thanks :)
To keep things simple must you use a single field to store attributes:
"gender":"Male"
I assume you have tons of attributes so you create an array instead, to handle that you will have to use "nested" field type.
Nested type preserves the relation between each of the nested document properties. If you dont use nested you will see all the properties and values mixed and you will not be able to aggregate by a property without manually adding filters.
You can read an article I wrote about that here:
https://opster.com/guides/elasticsearch/data-architecture/elasticsearch-nested-field-object-field/
Mappings :
PUT test_product_nested
{
"mappings": {
"properties": {
"attributes": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
This query will only show Red products of size XL and aggregate by attributes.
If you want to do OR's instead of AND's you must use "should" clauses instead of "filter" clauses.
Query
POST test_product_nested/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"filter": [
{
"term": {
"attributes.name.keyword": "Color"
}
},
{
"term": {
"attributes.value.keyword": "Red"
}
}
]
}
}
}
},
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"filter": [
{
"term": {
"attributes.name.keyword": "Size"
}
},
{
"term": {
"attributes.value.keyword": "XL"
}
}
]
}
}
}
}
]
}
},
"aggs": {
"attributes": {
"nested": {
"path": "attributes"
},
"aggs": {
"name": {
"terms": {
"field": "attributes.name.keyword"
},
"aggs": {
"values": {
"terms": {
"field": "attributes.value.keyword",
"size": 10
}
}
}
}
}
}
}
}
Results
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test_product_nested",
"_id": "aJRayoQBtNG1OrZoEOQi",
"_score": 0,
"_source": {
"title": "Product 1",
"attributes": [
{
"name": "Color",
"value": "Red"
},
{
"name": "Gender",
"value": "Female"
},
{
"name": "Occasion",
"value": "Active Wear"
},
{
"name": "Size",
"value": "XL"
}
]
}
}
]
},
"aggregations": {
"attributes": {
"doc_count": 4,
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Color",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Red",
"doc_count": 1
}
]
}
},
{
"key": "Gender",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Female",
"doc_count": 1
}
]
}
},
{
"key": "Occasion",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 1
}
]
}
},
{
"key": "Size",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "XL",
"doc_count": 1
}
]
}
}
]
}
}
}
}

Elasticsearch sorting based on multiple aggeration

I try to get my data with different aggeration criterias afterwards I want to order it based on one of aggeration criteria. In this specific case I want to get my data to be ordered descendly based on "Monthly_Income/ SUM" criteria.
I searched and tried lots of thing but none of them worked for me. Could you give me the answer because I am new on elasticsearch.
what I searched so far and couldn't solve the problem ;
"ordering_by_a_sub_aggregation,
Sorting Based on "Deep" Metrics,
search-aggregations-bucket-terms-aggregation-script,
search-aggregations-bucket-multi-terms-aggregation
To visualize the problem. I always get the belowing result however I tried lots of methods but I couldn't achieve to get desired result.
undesired result
desired result
Request
`
{
"query": {
"bool": {
"must": [],
"must_not": []
}
},
"size": 0,
"aggs": {
"GENDER": {
"terms": {
"field": "GENDER.keyword",
"size": 10000000,
"missing": "N/A"
// ,"order": {"MARTIAL_STATUS>Monthly_Income_0.max" : "desc" }
},
"aggs": {
"MARTIAL_STATUS": {
"terms": {
"field": "MARTIAL_STATUS.keyword",
"size": 10000000,
"missing": "N/A"
// ,"order": {"Monthly_Income_0.value" : "desc" }
},
"aggs": {
"Monthly_Income_0": {
"sum": {
"field": "Monthly_Income"
}
},
"Monthly_Income_1": {
"value_count": {
"field": "Monthly_Income"
}
},
"SALE_PRICE_2": {
"sum": {
"field": "SALE_PRICE"
}
}
// ,"sort_by_percentage": {
// "bucket_sort": {
// "sort": [ { "Monthly_Income_0.value": { "order": "desc" } } ]
// }
// }
}
}
}
}
}
}
`
Response
`
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 10000,
"relation": "gte"
},
"max_score": null,
"hits": []
},
"aggregations": {
"GENDER": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Male",
"doc_count": 40959,
"MARTIAL_STATUS": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Married",
"doc_count": 35559,
"SALE_PRICE_2": {
"value": 2.530239767013672E9
},
"Monthly_Income_0": {
"value": 3.59618565E8
},
"Monthly_Income_1": {
"value": 35559
}
},
{
"key": "Single",
"doc_count": 5399,
"SALE_PRICE_2": {
"value": 3.7742297754296875E8
},
"Monthly_Income_0": {
"value": 5.3465554E7
},
"Monthly_Income_1": {
"value": 5399
}
},
{
"key": "N/A",
"doc_count": 1,
"SALE_PRICE_2": {
"value": 87344.203125
},
"Monthly_Income_0": {
"value": 40000.0
},
"Monthly_Income_1": {
"value": 1
}
}
]
}
},
{
"key": "Female",
"doc_count": 7777,
"MARTIAL_STATUS": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Married",
"doc_count": 5299,
"SALE_PRICE_2": {
"value": 3.9976638293359375E8
},
"Monthly_Income_0": {
"value": 4.4994796E7
},
"Monthly_Income_1": {
"value": 5299
}
},
{
"key": "Single",
"doc_count": 2477,
"SALE_PRICE_2": {
"value": 1.8698677312695312E8
},
"Monthly_Income_0": {
"value": 1.8793502E7
},
"Monthly_Income_1": {
"value": 2477
}
},
{
"key": "N/A",
"doc_count": 1,
"SALE_PRICE_2": {
"value": 101006.8203125
},
"Monthly_Income_0": {
"value": 10000.0
},
"Monthly_Income_1": {
"value": 1
}
}
]
}
}
]
}
}
}
`
I try to order based on an aggerate column but I couldn't able to achieve
My understanding of your issue is that you want to group by on combination of gender and marital status
I have used runtime mapping to concatenate fields "gender" and marital status and used term aggregation to group by on run time field and sorted groups based on sum.
{
"size": 0,
"runtime_mappings": {
"gender-maritalstatus": {
"type": "keyword",
"script": {
"source": """
def gender='NA';
def maritalstatus='NA';
if(doc['Gender.keyword'].size()!=0)
gender= doc['Gender.keyword'].value;
if(doc['Marital_Status.keyword'].size()!=0)
maritalstatus= doc['Marital_Status.keyword'].value;
emit(gender+'-'+maritalstatus);
"""
}
}
},
"aggs": {
"gender-marital-grouping": {
"terms": {
"field": "gender-maritalstatus",
"order": {
"monthly_income": "desc"
},
"size": 10
},
"aggs": {
"monthly_income": {
"sum": {
"field": "Monthly_Income"
}
}
}
}
}
}
Result
"buckets" : [
{
"key" : "Female-Single",
"doc_count" : 2,
"monthly_income" : {
"value" : 300.0
}
},
{
"key" : "Male-Married",
"doc_count" : 2,
"monthly_income" : {
"value" : 200.0
}
},
{
"key" : "Female-NA",
"doc_count" : 1,
"monthly_income" : {
"value" : 100.0
}
},
{
"key" : "Male-NA",
"doc_count" : 1,
"monthly_income" : {
"value" : 100.0
}
},
{
"key" : "Male-Single",
"doc_count" : 1,
"monthly_income" : {
"value" : 100.0
}
}
]

Reaggregate on nested aggregation results using Elasticsearch

I want to compute some aggregations (using Elasticsearch 6.2) on products that have criteria. All the criteria are flattened and I want to reuse some aggregation results to reaggregate by a specific criterion.
Here is my index mapping:
PUT my_index
{
"mappings" : {
"_doc" : {
"properties" : {
"contract": {
"properties": {
"products": {
"type": "nested",
"properties": {
"productKey": {
"type": "keyword"
},
"criteria": {
"type": "nested",
"properties": {
"criterionKey": {
"type": "keyword"
},
"criterionValue": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
}
}
I populated my index with the following data:
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0001",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "above_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "all"
}
]
}
]
}
}
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0001",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "below_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "dep"
}
]
}
]
}
}
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0002",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "below_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "dep"
}
]
}
]
}
}
I am able to count the occurrences of all criterion values per product. To do so, I use the following aggregation request:
POST my_index/_doc/_search
{
"size": 0,
"aggs": {
"agg_by_product": {
"nested": {
"path": "contract.products"
},
"aggs": {
"agg_by_product_key": {
"terms": {
"field": "contract.products.productKey"
},
"aggs": {
"agg_by_product_crit": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAB", "CK_AAAA" ]
},
"aggs": {
"agg_by_product_crit_value": {
"terms": {
"field": "contract.products.criteria.criterionValue"
}
}
}
}
}
}
}
}
}
}
}
}
It returns:
{
// ...
"aggregations": {
"agg_by_product": {
"doc_count": 3,
"agg_by_product_key": {
"buckets": [
{
"key": "PK_0001",
"doc_count": 2,
"agg_by_product_crit": {
"doc_count": 8,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1
},
{
"key": "all",
"doc_count": 1
}
]
}
},
{
"key": "CK_AAAA",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
},
{
"key": "above_50",
"doc_count": 1
}
]
}
}
]
}
}
},
{
"key": "PK_0002",
"doc_count": 1,
"agg_by_product_crit": {
"doc_count": 4,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1
}
]
}
},
{
"key": "CK_AAAA",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
}
}
]
}
}
}
}
Now I would like to aggregate by criterion values of a specified criterion key, in order to get something like this:
{
// ...
"aggregations": {
"agg_by_product": {
"doc_count": 3,
"agg_by_product_key": {
"buckets": [
{
"key": "PK_0001",
"doc_count": 2,
"agg_by_product_crit": {
"doc_count": 8,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
},
{
"key": "all",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "above_50",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
]
}
}
},
{
"key": "PK_0002",
"doc_count": 1,
"agg_by_product_crit": {
"doc_count": 4,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
]
}
}
}
]
}
}
}
}
What should be the corresponding aggregation request?
Finally I found a solution using a reverse_nested aggregation.
POST my_index/_doc/_search
{
"size": 0,
"aggs": {
"agg_by_product": {
"nested": {
"path": "contract.products"
},
"aggs": {
"agg_by_product_key": {
"terms": {
"field": "contract.products.productKey"
},
"aggs": {
"agg_by_product_crit": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAB" ]
},
"aggs": {
"agg_by_product_crit_value": {
"terms": {
"field": "contract.products.criteria.criterionValue"
},
"aggs": {
"agg_back_to_root": {
"reverse_nested": {},
"aggs": {
"agg_by_product_crit2": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key2": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAA" ]
},
"aggs": {
"agg_by_product_crit_value2": {
"terms": {
"field": "contract.products.criteria.criterionValue"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}

Convert sql query to elasticsearch

I need to convert this query into elastic search, but I am facing the problem that in elastic search (having) is not supported yet.
Select sum(count) as count,prop1
from
(
SELECT Count(*) as count,prop1 FROM [table1] group by prop1,prop2
having count = 1
)
group by prop1
order by count desc limit 10
I try this query in elastic search:
`GET /analytics_data/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term":
{
"field": "test"
}
}
]
}
},
"aggs": {
"aggregation": {
"terms": {
"field": "prop1"
},
"aggs": {
"subaggregation": {
"terms": {
"field": "prop2",
"order": {
"_count": "desc"
}
}
},
"test":{
"bucket_selector": {
"buckets_path":
{
"test1": "_count"
},
"script":"params.test1 == 1"
}
}
}
}
}
}`
Here is the mapping that I use:
PUT /index
{
"mappings" : {
"timeline" : {
"properties" : {
"prop1" : {
"type" : "keyword"
},
"prop2" : {
"type" : "keyword"
}
}
}
}
}
but I cannot get the sub-aggregation buckets who have count == 1
Here is the output of the suggested answer :
{
"took": 344,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 852146,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop1": {
"doc_count_error_upper_bound": 646,
"sum_other_doc_count": 37299,
"buckets": [
{
"key": "porp1-key",
"doc_count": 348178,
"prop2": {
"doc_count_error_upper_bound": 130,
"sum_other_doc_count": 345325,
"buckets": [
{
"key": "e1552d2d-da84-4588-9b65-16c33848bb94_1",
"doc_count": 558,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "04b1a8eb-f876-459b-af9b-855493318dca_426",
"doc_count": 383,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "b165d2c7-6a23-4a4d-adbb-3b2a79d4c627_80",
"doc_count": 344,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "c4ea55dc-c3b3-492b-98a2-1ad004212c3d_99",
"doc_count": 297,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "dfc1ae22-5c7f-49ab-8488-207661b43716_294",
"doc_count": 264,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "28815490-e7ce-420b-bab8-57a6ffc3f56a_572",
"doc_count": 239,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "c3c56ec8-e0ff-46ea-841d-cc22b2dc65f6_574",
"doc_count": 217,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "473289b8-fb73-4cbb-b8d7-a5386846745f_34",
"doc_count": 187,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "670cb862-7976-4fd5-ba3f-3f8b7c03d615_11",
"doc_count": 185,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "41870755-96dd-4a00-ab76-632a1dfaecb5_341",
"doc_count": 179,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
}
]
},
"final": {
"value": 0
}
} ]
}
}
}
Try this. Aggregation final will give you the desired output.
GET /analytics_data/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"field": "test"
}
}
]
}
},
"aggs": {
"prop1": {
"terms": {
"field": "prop1",
"size": 10
},
"aggs": {
"prop2": {
"terms": {
"field": "prop2",
"size": 10
},
"aggs": {
"prop2_count": {
"value_count": {
"field": "prop2"
}
},
"prop2_check": {
"bucket_script": {
"buckets_path": {
"count": "prop2_count.value"
},
"script": "(params.count == 1) ? 1 : 0"
}
}
}
},
"final": {
"sum_bucket": {
"buckets_path": "prop2>prop2_check"
}
}
}
}
}
}
Working code :
PUT prop
{
"mappings": {
"prop": {
"properties": {
"prop1": {
"type": "keyword"
},
"prop2": {
"type": "keyword"
}
}
}
}
}
POST _bulk
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q1"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q2"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q2"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p2","prop2":"q5"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p2","prop2":"q6"}
GET prop/prop/_search
{
"size": 0,
"aggs": {
"prop1": {
"terms": {
"field": "prop1",
"size": 10
},
"aggs": {
"prop2": {
"terms": {
"field": "prop2",
"size": 10
},
"aggs": {
"prop2_count": {
"value_count": {
"field": "prop2"
}
},
"prop2_check": {
"bucket_script": {
"buckets_path": {
"count": "prop2_count.value"
},
"script": "(params.count == 1) ? 1 : 0"
}
}
}
},
"final":{
"sum_bucket": {
"buckets_path": "prop2>prop2_check"
}
}
}
}
}
}
Output :
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "p1",
"doc_count": 3,
"prop2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "q2",
"doc_count": 2,
"prop2_count": {
"value": 2
},
"prop2_check": {
"value": 0
}
},
{
"key": "q1",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
}
]
},
"final": {
"value": 1
}
},
{
"key": "p2",
"doc_count": 2,
"prop2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "q5",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
},
{
"key": "q6",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
}
]
},
"final": {
"value": 2
}
}
]
}
}
}

how to group by duplicate Field in Array List : ElasticSearch

I had problem with nested aggregation in Elasticsearch. I have mapping with nested field:
"Topics":{"type":"nested","properties":{
"CategoryLev1":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
"CategoryLev2":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}} }}
After index Document:
"Topics": [
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Money",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "2.0",
"CategoryLev2": "Money",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Electrical",
"CategoryLev1": "Product"
}
]
"Topics": [
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Money",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "2.0",
"CategoryLev2": "Methods",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Engine",
"CategoryLev1": "Product"
}
]
As you see, in my nested array I have two Topics, which have Duplicate key and Value field Then I make such query:
{
"size": 10,
"aggregations": {
"resellers": {
"nested": {
"path": "Topics"
},
"aggregations": {
"topicGroup": {
"terms": {
"field": "Topics.CategoryLev1.keyword",
"size": 10
},
"aggregations": {
"Subtopic": {
"terms": {
"field": "Topics.CategoryLev2.keyword"
}
}
}
}
}
}
}
}
Then I get following result which has group by with topic Category
{
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"resellers": {
"doc_count": 6,
"topicGroup": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Sales",
"doc_count": 3,
"Subtopic": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Money",
"doc_count": 3
},
{
"key": "Method",
"doc_count": 1
}
]
}
},
{
"key": "Product",
"doc_count": 2,
"Subtopic": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Electrical",
"doc_count": 1
},
{
"key": "Engine",
"doc_count": 1
}
]
}
}
]
}
}
}
}
But I Want to result Like this
"buckets": [
{
"key": "Sales",
"doc_count": 2,
"Subtopic": {
"buckets": [
{
"key": "Money",
"doc_count": 2
},
{
"key": "Method",
"doc_count": 1
}
]
}
},
{
"key": "Product",
"doc_count": 2,
"Subtopic": {
"buckets": [
{
"key": "Electrical",
"doc_count": 1
},
{
"key": "Engine",
"doc_count": 1
}]
}
}]
Thanks in advance :)

Resources