Reaggregate on nested aggregation results using Elasticsearch - elasticsearch

I want to compute some aggregations (using Elasticsearch 6.2) on products that have criteria. All the criteria are flattened and I want to reuse some aggregation results to reaggregate by a specific criterion.
Here is my index mapping:
PUT my_index
{
"mappings" : {
"_doc" : {
"properties" : {
"contract": {
"properties": {
"products": {
"type": "nested",
"properties": {
"productKey": {
"type": "keyword"
},
"criteria": {
"type": "nested",
"properties": {
"criterionKey": {
"type": "keyword"
},
"criterionValue": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
}
}
I populated my index with the following data:
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0001",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "above_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "all"
}
]
}
]
}
}
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0001",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "below_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "dep"
}
]
}
]
}
}
POST my_index/_doc
{
"contract": {
"products": [
{
"productKey": "PK_0002",
"criteria": [
{
"criterionKey": "CK_AAAA",
"criterionValue": "below_50"
},
{
"criterionKey": "CK_AAAB",
"criterionValue": "dep"
}
]
}
]
}
}
I am able to count the occurrences of all criterion values per product. To do so, I use the following aggregation request:
POST my_index/_doc/_search
{
"size": 0,
"aggs": {
"agg_by_product": {
"nested": {
"path": "contract.products"
},
"aggs": {
"agg_by_product_key": {
"terms": {
"field": "contract.products.productKey"
},
"aggs": {
"agg_by_product_crit": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAB", "CK_AAAA" ]
},
"aggs": {
"agg_by_product_crit_value": {
"terms": {
"field": "contract.products.criteria.criterionValue"
}
}
}
}
}
}
}
}
}
}
}
}
It returns:
{
// ...
"aggregations": {
"agg_by_product": {
"doc_count": 3,
"agg_by_product_key": {
"buckets": [
{
"key": "PK_0001",
"doc_count": 2,
"agg_by_product_crit": {
"doc_count": 8,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1
},
{
"key": "all",
"doc_count": 1
}
]
}
},
{
"key": "CK_AAAA",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
},
{
"key": "above_50",
"doc_count": 1
}
]
}
}
]
}
}
},
{
"key": "PK_0002",
"doc_count": 1,
"agg_by_product_crit": {
"doc_count": 4,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1
}
]
}
},
{
"key": "CK_AAAA",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
}
}
]
}
}
}
}
Now I would like to aggregate by criterion values of a specified criterion key, in order to get something like this:
{
// ...
"aggregations": {
"agg_by_product": {
"doc_count": 3,
"agg_by_product_key": {
"buckets": [
{
"key": "PK_0001",
"doc_count": 2,
"agg_by_product_crit": {
"doc_count": 8,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 2,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
},
{
"key": "all",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "above_50",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
]
}
}
},
{
"key": "PK_0002",
"doc_count": 1,
"agg_by_product_crit": {
"doc_count": 4,
"agg_by_product_crit_key": {
"buckets": [
{
"key": "CK_AAAB",
"doc_count": 1,
"agg_by_product_crit_value": {
"buckets": [
{
"key": "dep",
"doc_count": 1,
"AGG_BY_SOMETHING": {
"buckets": [
{
"key": "CK_AAAA",
"doc_count": 1,
"AGG_BY_SOMETHING_2": {
"buckets": [
{
"key": "below_50",
"doc_count": 1
}
]
}
}
]
}
}
]
}
}
]
}
}
}
]
}
}
}
}
What should be the corresponding aggregation request?

Finally I found a solution using a reverse_nested aggregation.
POST my_index/_doc/_search
{
"size": 0,
"aggs": {
"agg_by_product": {
"nested": {
"path": "contract.products"
},
"aggs": {
"agg_by_product_key": {
"terms": {
"field": "contract.products.productKey"
},
"aggs": {
"agg_by_product_crit": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAB" ]
},
"aggs": {
"agg_by_product_crit_value": {
"terms": {
"field": "contract.products.criteria.criterionValue"
},
"aggs": {
"agg_back_to_root": {
"reverse_nested": {},
"aggs": {
"agg_by_product_crit2": {
"nested": {
"path": "contract.products.criteria"
},
"aggs": {
"agg_by_product_crit_key2": {
"terms": {
"field": "contract.products.criteria.criterionKey",
"include": [ "CK_AAAA" ]
},
"aggs": {
"agg_by_product_crit_value2": {
"terms": {
"field": "contract.products.criteria.criterionValue"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}
}

Related

Elasticsearch use the key of term aggregation as the input in script

I stored the result of each game as a doc. The players and their scores were stored in users and scores arrays.
Sample data :
[
{
"gameId": "game01",
"users": [
"user01",
"user02"
],
"#timestamp": "2022-08-11T17:00:00.000Z",
"scores": [
4,
1
]
},
{
"gameId": "game02",
"users": [
"user01",
"user02"
],
"#timestamp": "2022-08-12T17:00:00.000Z",
"scores": [
3,
1
]
},
{
"gameId": "game02",
"users": [
"user02",
"user03"
],
"#timestamp": "2022-08-12T18:00:00.000Z",
"scores": [
2,
4
]
}
]
I expected to use the below query to aggregate the daily total scores of each game of users:
{
"aggs": {
"aggByDate": {
"date_histogram": {
"field": "#timestamp",
"interval": "1d",
"time_zone": "+8",
"min_doc_count": 1
},
"aggs": {
"aggByGame": {
"terms": {
"field": "gameId"
},
"aggs": {
"aggByUser": {
"terms": {
"field": "users"
},
"aggs": {
"totalScore": {
"sum": {
"script": {
"source": """
String targetUser = params.key; <--- I don't know how to get the key here
int i = 0;
for (def user: doc.users) {
if (user == targetUser) break;
i++;
}
return doc.scores[i];
"""
}
}
}
}
}
}
}
}
}
}
}
Expected result:
{
"aggregations": {
"aggByDate": {
"buckets": [
{
"key_as_string": "2022-08-11T00:00:00.000+08:00",
"doc_count": 1,
"aggByGame": {
"buckets": [
{
"key": "game01",
"doc_count": 1,
"aggByUser": {
"buckets": [
{
"key": "user01", <--- this is the value I want to inject into the script
"doc_count": 1,
"totalScore": {
"value": 4
}
},
{
"key": "user02",
"doc_count": 1,
"totalScore": {
"value": 1
}
}
]
}
}
]
}
},
{
"key_as_string": "2022-08-12T00:00:00.000+08:00",
"doc_count": 2,
"aggByGame": {
"buckets": [
{
"key": "game02",
"doc_count": 1,
"aggByUser": {
"buckets": [
{
"key": "user01",
"doc_count": 1,
"totalScore": {
"value": 3
}
},
{
"key": "user02",
"doc_count": 2,
"totalScore": {
"value": 3
}
},
{
"key": "user03",
"doc_count": 1,
"totalScore": {
"value": 4
}
}
]
}
}
]
}
}
]
}
}
}
But since I don't know the userId before the query, I don't know where can I let elasticsearch inject the params.key into the script in the middle of the query.
I also referenced the issue here. Seems it's not possible to access the aggregation data for sub-aggs. Do we have another workaround here? Thanks!!
(I use ElasticSearch v7.10)

Elasticsearch nested aggregation got too slow result

I have an index with a billions document in the future, for now it's around 20mil documents. It took over 10s to get the result while I need a query around 3-4s for billion documents. Is my structure was wrong or need to improve the query or server configuration? Im using amazon elasticsearch service.
This query will return amount/transactions/items of every station in every area
Query:
{
"size" : 0,
"query": {
"bool": {
"must":
[
{
"range": {
"date_sec": {
"gte": "1483228800",
"lte": "1525046400"
}
}
},
{
"range": {
"time_sec": {
"gte": "32400",
"lte": "75600"
}
}
}
]
}
},
"aggs": {
"numstoreamountclient" : {
"filter" : { "range" : { "amount" : { "gt" : 0 } } },
"aggs": {
"numstore_amountclient": {
"cardinality" : {
"field" : "id_station"
}
}
}
},
"id_station": {
"terms": {
"field": "id_station"
},
"aggs": {
"area_type": {
"terms": {
"field": "area_type"
},
"aggs": {
"max_time" : { "max" : { "field" : "time_sec" } },
"min_time" : { "min" : { "field" : "time_sec" } },
"amountclient": {
"sum": {
"field": "amount"
}
},
"itemclient": {
"sum": {
"field": "items"
}
},
"transactionclient" : {
"value_count" :
{
"field" : "id"
}
},
"inwatchinghour": {
"filter" : { "term" : { "in_watchinghour" : 1 } },
"aggs" : {
"amountclientwatch": {
"sum": {
"field": "amount"
}
},
"itemclient": {
"sum": {
"field": "items"
}
},
"transactionclientwatch" : {
"value_count" :
{
"field" : "id"
}
}
}
},
"saleclient": {
"filter" : {
"bool": {
"must":
[
{
"term" : { "in_watchinghour" : 1 }
},
{
"range": {
"items": {
"gt": "0"
}
}
},
{
"range": {
"amount": {
"gt": "0"
}
}
}
]
}
},
"aggs" : {
"sale_client" : {
"value_count" :
{
"field" : "id"
}
}
}
}
}
}
}
}
}
}
Result:
{
"took": 10757,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 19778330,
"max_score": 0,
"hits": []
},
"aggregations": {
"numstoreamountclient": {
"doc_count": 19677164,
"numstore_amountclient": {
"value": 35
}
},
"id_station": {
"doc_count_error_upper_bound": 437877,
"sum_other_doc_count": 11401869,
"buckets": [
{
"key": 2209,
"doc_count": 1456505,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1456505,
"saleclient": {
"doc_count": 708499,
"sale_client": {
"value": 708499
}
},
"inwatchinghour": {
"doc_count": 711435,
"transactionclientwatch": {
"value": 711435
},
"amountclientwatch": {
"value": 210203295816
},
"itemclient": {
"value": 4105206
}
},
"amountclient": {
"value": 427392789897
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 1456505
},
"itemclient": {
"value": 8402911
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2210,
"doc_count": 890590,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 890590,
"saleclient": {
"doc_count": 357520,
"sale_client": {
"value": 357520
}
},
"inwatchinghour": {
"doc_count": 358900,
"transactionclientwatch": {
"value": 358900
},
"amountclientwatch": {
"value": 89792941442
},
"itemclient": {
"value": 2146312
}
},
"amountclient": {
"value": 222577251265
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 890590
},
"itemclient": {
"value": 5346273
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2226,
"doc_count": 844491,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 844491,
"saleclient": {
"doc_count": 346801,
"sale_client": {
"value": 346801
}
},
"inwatchinghour": {
"doc_count": 347730,
"transactionclientwatch": {
"value": 347730
},
"amountclientwatch": {
"value": 90585228756
},
"itemclient": {
"value": 1817412
}
},
"amountclient": {
"value": 219008246857
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 844491
},
"itemclient": {
"value": 4409412
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2317,
"doc_count": 812409,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 812409,
"saleclient": {
"doc_count": 292933,
"sale_client": {
"value": 292933
}
},
"inwatchinghour": {
"doc_count": 294866,
"transactionclientwatch": {
"value": 294866
},
"amountclientwatch": {
"value": 105661613404
},
"itemclient": {
"value": 2144352
}
},
"amountclient": {
"value": 290725384084
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 812409
},
"itemclient": {
"value": 5925558
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2211,
"doc_count": 811198,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 811198,
"saleclient": {
"doc_count": 262617,
"sale_client": {
"value": 262617
}
},
"inwatchinghour": {
"doc_count": 265515,
"transactionclientwatch": {
"value": 265515
},
"amountclientwatch": {
"value": 70763222934
},
"itemclient": {
"value": 1783073
}
},
"amountclient": {
"value": 213071496626
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 811198
},
"itemclient": {
"value": 5476443
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2331,
"doc_count": 806670,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 806670,
"saleclient": {
"doc_count": 349472,
"sale_client": {
"value": 349472
}
},
"inwatchinghour": {
"doc_count": 350285,
"transactionclientwatch": {
"value": 350285
},
"amountclientwatch": {
"value": 82784018110
},
"itemclient": {
"value": 2079211
}
},
"amountclient": {
"value": 192804137579
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 806670
},
"itemclient": {
"value": 4834069
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2323,
"doc_count": 749161,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 749161,
"saleclient": {
"doc_count": 280928,
"sale_client": {
"value": 280928
}
},
"inwatchinghour": {
"doc_count": 282498,
"transactionclientwatch": {
"value": 282498
},
"amountclientwatch": {
"value": 62082735118
},
"itemclient": {
"value": 1588445
}
},
"amountclient": {
"value": 162365212278
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 749161
},
"itemclient": {
"value": 4231490
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2345,
"doc_count": 727589,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 727589,
"saleclient": {
"doc_count": 340141,
"sale_client": {
"value": 340141
}
},
"inwatchinghour": {
"doc_count": 341590,
"transactionclientwatch": {
"value": 341590
},
"amountclientwatch": {
"value": 107492036777
},
"itemclient": {
"value": 2421158
}
},
"amountclient": {
"value": 228611232646
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 727589
},
"itemclient": {
"value": 5138628
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2329,
"doc_count": 663856,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 663856,
"saleclient": {
"doc_count": 163358,
"sale_client": {
"value": 163358
}
},
"inwatchinghour": {
"doc_count": 164339,
"transactionclientwatch": {
"value": 164339
},
"amountclientwatch": {
"value": 55298080357
},
"itemclient": {
"value": 1209514
}
},
"amountclient": {
"value": 211070998632
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 663856
},
"itemclient": {
"value": 4875689
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2355,
"doc_count": 613992,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 613992,
"saleclient": {
"doc_count": 113575,
"sale_client": {
"value": 113575
}
},
"inwatchinghour": {
"doc_count": 114038,
"transactionclientwatch": {
"value": 114038
},
"amountclientwatch": {
"value": 30494132488
},
"itemclient": {
"value": 563628
}
},
"amountclient": {
"value": 140705052880
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 613992
},
"itemclient": {
"value": 2920908
},
"min_time": {
"value": 32400
}
}
]
}
}
]
}
}
}

Convert sql query to elasticsearch

I need to convert this query into elastic search, but I am facing the problem that in elastic search (having) is not supported yet.
Select sum(count) as count,prop1
from
(
SELECT Count(*) as count,prop1 FROM [table1] group by prop1,prop2
having count = 1
)
group by prop1
order by count desc limit 10
I try this query in elastic search:
`GET /analytics_data/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term":
{
"field": "test"
}
}
]
}
},
"aggs": {
"aggregation": {
"terms": {
"field": "prop1"
},
"aggs": {
"subaggregation": {
"terms": {
"field": "prop2",
"order": {
"_count": "desc"
}
}
},
"test":{
"bucket_selector": {
"buckets_path":
{
"test1": "_count"
},
"script":"params.test1 == 1"
}
}
}
}
}
}`
Here is the mapping that I use:
PUT /index
{
"mappings" : {
"timeline" : {
"properties" : {
"prop1" : {
"type" : "keyword"
},
"prop2" : {
"type" : "keyword"
}
}
}
}
}
but I cannot get the sub-aggregation buckets who have count == 1
Here is the output of the suggested answer :
{
"took": 344,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 852146,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop1": {
"doc_count_error_upper_bound": 646,
"sum_other_doc_count": 37299,
"buckets": [
{
"key": "porp1-key",
"doc_count": 348178,
"prop2": {
"doc_count_error_upper_bound": 130,
"sum_other_doc_count": 345325,
"buckets": [
{
"key": "e1552d2d-da84-4588-9b65-16c33848bb94_1",
"doc_count": 558,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "04b1a8eb-f876-459b-af9b-855493318dca_426",
"doc_count": 383,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "b165d2c7-6a23-4a4d-adbb-3b2a79d4c627_80",
"doc_count": 344,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "c4ea55dc-c3b3-492b-98a2-1ad004212c3d_99",
"doc_count": 297,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "dfc1ae22-5c7f-49ab-8488-207661b43716_294",
"doc_count": 264,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "28815490-e7ce-420b-bab8-57a6ffc3f56a_572",
"doc_count": 239,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "c3c56ec8-e0ff-46ea-841d-cc22b2dc65f6_574",
"doc_count": 217,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "473289b8-fb73-4cbb-b8d7-a5386846745f_34",
"doc_count": 187,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "670cb862-7976-4fd5-ba3f-3f8b7c03d615_11",
"doc_count": 185,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "41870755-96dd-4a00-ab76-632a1dfaecb5_341",
"doc_count": 179,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
}
]
},
"final": {
"value": 0
}
} ]
}
}
}
Try this. Aggregation final will give you the desired output.
GET /analytics_data/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"field": "test"
}
}
]
}
},
"aggs": {
"prop1": {
"terms": {
"field": "prop1",
"size": 10
},
"aggs": {
"prop2": {
"terms": {
"field": "prop2",
"size": 10
},
"aggs": {
"prop2_count": {
"value_count": {
"field": "prop2"
}
},
"prop2_check": {
"bucket_script": {
"buckets_path": {
"count": "prop2_count.value"
},
"script": "(params.count == 1) ? 1 : 0"
}
}
}
},
"final": {
"sum_bucket": {
"buckets_path": "prop2>prop2_check"
}
}
}
}
}
}
Working code :
PUT prop
{
"mappings": {
"prop": {
"properties": {
"prop1": {
"type": "keyword"
},
"prop2": {
"type": "keyword"
}
}
}
}
}
POST _bulk
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q1"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q2"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q2"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p2","prop2":"q5"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p2","prop2":"q6"}
GET prop/prop/_search
{
"size": 0,
"aggs": {
"prop1": {
"terms": {
"field": "prop1",
"size": 10
},
"aggs": {
"prop2": {
"terms": {
"field": "prop2",
"size": 10
},
"aggs": {
"prop2_count": {
"value_count": {
"field": "prop2"
}
},
"prop2_check": {
"bucket_script": {
"buckets_path": {
"count": "prop2_count.value"
},
"script": "(params.count == 1) ? 1 : 0"
}
}
}
},
"final":{
"sum_bucket": {
"buckets_path": "prop2>prop2_check"
}
}
}
}
}
}
Output :
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "p1",
"doc_count": 3,
"prop2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "q2",
"doc_count": 2,
"prop2_count": {
"value": 2
},
"prop2_check": {
"value": 0
}
},
{
"key": "q1",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
}
]
},
"final": {
"value": 1
}
},
{
"key": "p2",
"doc_count": 2,
"prop2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "q5",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
},
{
"key": "q6",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
}
]
},
"final": {
"value": 2
}
}
]
}
}
}

Exclude results based on key in elasticsearch

I have the below mapping for a type in elastic search:
"properties": {
"userid": {
"type": "integer"
},
"engid": {
"type": "short"
},
"score": {
"type": "short",
},
"name": {
"type": "string",
"index": "not_analyzed"
},
"submitTime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
And my search query as:
{
"size": 10,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"range": {
"submitTime": {
"gt": "now-18d"
}
}
}
}
},
"aggs": {
"name": {
"terms": {
"field": "name",
"order": {
"_term": "asc"
}
},
"aggs": {
"score": {
"terms": {
"field": "score"
}
}
}
}
}
}
This is giving my expected result as:
"aggregations": {
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "---",
"doc_count": 169529,
"score": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 0,
"doc_count": 160133
},
{
"key": 5,
"doc_count": 9395
},
{
"key": 4,
"doc_count": 1
}
]
}
},
{
"key": "John",
"doc_count": 1,
"score": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 5,
"doc_count": 1
}
]
}
}
Now I want to remove the bucket from my results where name='---'. I tried using 'not', but it didn't worked. Any hint will be appreciated.
PS: I am new to elasticsearch, and just trying to expand my knowledge.
You need to exclude the --- value in your query
{
"size": 10,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"range": {
"submitTime": {
"gt": "now-18d"
}
}
}
],
"must_not": [
{
"term": {
"name": "---"
}
}
]
}
}
}
},
"aggs": {
"name": {
"terms": {
"field": "name",
"order": {
"_term": "asc"
}
},
"aggs": {
"score": {
"terms": {
"field": "score"
}
}
}
}
}
}

Elasticsearch sum_bucket aggregation to sum the values contained in resulting buckets

I have a query as follows:
{
"size": 0,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"match": {
"_type": "grx-ipx"
}
},
{
"range": {
"#timestamp": {
"gte": "2015-09-08T15:00:00.000Z",
"lte": "2015-09-08T15:10:00.000Z"
}
}
}
]
}
},
"filter": {
"and": [
{
"terms": {
"inSightCustID": [
"ASD001",
"ZXC049"
]
}
},
{
"terms": {
"reportFamily": [
"GRXoIPX",
"LTEoIPX"
]
}
}
]
}
}
},
"_source": [
"inSightCustID",
"fiveMinuteIn",
"reportFamily",
"#timestamp"
],
"aggs": {
"timestamp": {
"terms": {
"field": "#timestamp",
"size": 5
},
"aggs": {
"reportFamily": {
"terms": {
"field": "reportFamily"
},
"aggs": {
"averageFiveMinute": {
"avg": {
"field": "fiveMinuteIn"
}
}
}
}
}
},
"distinct_timestamps": {
"cardinality": {
"field": "#timestamp"
}
}
}
}
This result of this query looks like:
...
"aggregations": {
"distinct_timestamps": {
"value": 3,
"value_as_string": "1970-01-01T00:00:00.003Z"
},
"timestamp": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1441724700000,
"key_as_string": "2015-09-08T15:05:00.000Z",
"doc_count": 10,
"reportFamily": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "GRXoIPX",
"doc_count": 5,
"averageFiveMinute": {
"value": 1687.6
}
},
{
"key": "LTEoIPX",
"doc_count": 5,
"averageFiveMinute": {
"value": 56710.6
}
}
]
}
},
...
What I want to do is for each bucket in the reportFamily aggregation, I want to show the sum of the averageFiveMinute values. So for instance, in the example above, I would also like to show the sum of 1687.6 and 56710.6. I want to do this for all reportFamily aggregations.
Here is what I have tried:
{
"size": 0,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"match": {
"_type": "grx-ipx"
}
},
{
"range": {
"#timestamp": {
"gte": "2015-09-08T15:00:00.000Z",
"lte": "2015-09-08T15:10:00.000Z"
}
}
}
]
}
},
"filter": {
"and": [
{
"terms": {
"inSightCustID": [
"ASD001",
"ZXC049"
]
}
},
{
"terms": {
"reportFamily": [
"GRXoIPX",
"LTEoIPX"
]
}
}
]
}
}
},
"_source": [
"inSightCustID",
"fiveMinuteIn",
"reportFamily",
"#timestamp"
],
"aggs": {
"timestamp": {
"terms": {
"field": "#timestamp",
"size": 5
},
"aggs": {
"reportFamily": {
"terms": {
"field": "reportFamily"
},
"aggs": {
"averageFiveMinute": {
"avg": {
"field": "fiveMinuteIn"
}
}
}
},
"sum_AvgFiveMinute": {
"sum_bucket": {
"buckets_path": "reportFamily>averageFiveMinute"
}
}
}
},
"distinct_timestamps": {
"cardinality": {
"field": "#timestamp"
}
}
}
}
I have added:
"sum_AvgFiveMinute": {
"sum_bucket": {
"buckets_path": "reportFamily>averageFiveMinute"
}
}
But unfortunately, this triggers an exception Parse Failure [Could not find aggregator type [sum_bucket] in [sum_AvgFiveMinute]
I expected the results to be something like:
...
"aggregations": {
"distinct_timestamps": {
"value": 3,
"value_as_string": "1970-01-01T00:00:00.003Z"
},
"timestamp": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1441724700000,
"key_as_string": "2015-09-08T15:05:00.000Z",
"doc_count": 10,
"reportFamily": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "GRXoIPX",
"doc_count": 5,
"averageFiveMinute": {
"value": 1687.6
}
},
{
"key": "LTEoIPX",
"doc_count": 5,
"averageFiveMinute": {
"value": 56710.6
}
}
]
},
"sum_AvgFiveMinute": {
"value": 58398.2
}
},
...
What is wrong with this query and how can I achieve the expected result?
Here is a link to the sum bucket aggregation docs.
Many thanks for the help.

Resources