Convert sql query to elasticsearch

Convert sql query to elasticsearch - elasticsearch

I need to convert this query into elastic search, but I am facing the problem that in elastic search (having) is not supported yet.
Select sum(count) as count,prop1
from
(
SELECT Count(*) as count,prop1 FROM [table1] group by prop1,prop2
having count = 1
)
group by prop1
order by count desc limit 10
I try this query in elastic search:
`GET /analytics_data/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term":
{
"field": "test"
}
}
]
}
},
"aggs": {
"aggregation": {
"terms": {
"field": "prop1"
},
"aggs": {
"subaggregation": {
"terms": {
"field": "prop2",
"order": {
"_count": "desc"
}
}
},
"test":{
"bucket_selector": {
"buckets_path":
{
"test1": "_count"
},
"script":"params.test1 == 1"
}
}
}
}
}
}`
Here is the mapping that I use:
PUT /index
{
"mappings" : {
"timeline" : {
"properties" : {
"prop1" : {
"type" : "keyword"
},
"prop2" : {
"type" : "keyword"
}
}
}
}
}
but I cannot get the sub-aggregation buckets who have count == 1
Here is the output of the suggested answer :
{
"took": 344,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 852146,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop1": {
"doc_count_error_upper_bound": 646,
"sum_other_doc_count": 37299,
"buckets": [
{
"key": "porp1-key",
"doc_count": 348178,
"prop2": {
"doc_count_error_upper_bound": 130,
"sum_other_doc_count": 345325,
"buckets": [
{
"key": "e1552d2d-da84-4588-9b65-16c33848bb94_1",
"doc_count": 558,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "04b1a8eb-f876-459b-af9b-855493318dca_426",
"doc_count": 383,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "b165d2c7-6a23-4a4d-adbb-3b2a79d4c627_80",
"doc_count": 344,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "c4ea55dc-c3b3-492b-98a2-1ad004212c3d_99",
"doc_count": 297,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "dfc1ae22-5c7f-49ab-8488-207661b43716_294",
"doc_count": 264,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "28815490-e7ce-420b-bab8-57a6ffc3f56a_572",
"doc_count": 239,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "c3c56ec8-e0ff-46ea-841d-cc22b2dc65f6_574",
"doc_count": 217,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "473289b8-fb73-4cbb-b8d7-a5386846745f_34",
"doc_count": 187,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "670cb862-7976-4fd5-ba3f-3f8b7c03d615_11",
"doc_count": 185,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "41870755-96dd-4a00-ab76-632a1dfaecb5_341",
"doc_count": 179,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
}
]
},
"final": {
"value": 0
}
} ]
}
}
}

Try this. Aggregation final will give you the desired output.
GET /analytics_data/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"field": "test"
}
}
]
}
},
"aggs": {
"prop1": {
"terms": {
"field": "prop1",
"size": 10
},
"aggs": {
"prop2": {
"terms": {
"field": "prop2",
"size": 10
},
"aggs": {
"prop2_count": {
"value_count": {
"field": "prop2"
}
},
"prop2_check": {
"bucket_script": {
"buckets_path": {
"count": "prop2_count.value"
},
"script": "(params.count == 1) ? 1 : 0"
}
}
}
},
"final": {
"sum_bucket": {
"buckets_path": "prop2>prop2_check"
}
}
}
}
}
}
Working code :
PUT prop
{
"mappings": {
"prop": {
"properties": {
"prop1": {
"type": "keyword"
},
"prop2": {
"type": "keyword"
}
}
}
}
}
POST _bulk
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q1"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q2"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q2"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p2","prop2":"q5"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p2","prop2":"q6"}
GET prop/prop/_search
{
"size": 0,
"aggs": {
"prop1": {
"terms": {
"field": "prop1",
"size": 10
},
"aggs": {
"prop2": {
"terms": {
"field": "prop2",
"size": 10
},
"aggs": {
"prop2_count": {
"value_count": {
"field": "prop2"
}
},
"prop2_check": {
"bucket_script": {
"buckets_path": {
"count": "prop2_count.value"
},
"script": "(params.count == 1) ? 1 : 0"
}
}
}
},
"final":{
"sum_bucket": {
"buckets_path": "prop2>prop2_check"
}
}
}
}
}
}
Output :
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "p1",
"doc_count": 3,
"prop2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "q2",
"doc_count": 2,
"prop2_count": {
"value": 2
},
"prop2_check": {
"value": 0
}
},
{
"key": "q1",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
}
]
},
"final": {
"value": 1
}
},
{
"key": "p2",
"doc_count": 2,
"prop2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "q5",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
},
{
"key": "q6",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
}
]
},
"final": {
"value": 2
}
}
]
}
}
}

Related

Elasticsearch - Count number of occurrence perd field per document

Is it possible to calculate the number of occurence of distinct values in a list field.
For example, let the following data:
[
{
"page":1,
"colors":[
{
"color": red
},
{
"color": white
},
{
"color": red
}
]
},
{
"page":2,
"colors":[
{
"color": yellow
},
{
"color": yellow
}
]
}
]
Is it possible to get a result as the follwing:
{
"page":1,
"colors_count":[
{
"Key": red,
"Count": 2
},
{
"Key": white,
"Count": 1
},
]
},
{
"page":2,
"colors_count":[
{
"Key": yellow,
"Count": 2
}
]
}
I tried using term aggregation but I got the number of distinct values, so for page:1 i got red:1 and white:1.

Yes, you can do it. you will have to use nested_field type and nested_Agg
Mapping:
PUT colors
{
"mappings": {
"properties": {
"page" : { "type": "keyword" },
"colors": {
"type": "nested",
"properties": {
"color": {
"type": "keyword"
}
}
}
}
}
}
Insert Documents:
PUT colors/_doc/1
{
"page": 1,
"colors": [
{
"color": "red"
},
{
"color": "white"
},
{
"color": "red"
}
]
}
PUT colors/_doc/2
{
"page": 2,
"colors": [
{
"color": "yellow"
},
{
"color": "yellow"
}
]
}
Query:
GET colors/_search
{
"size" :0,
"aggs": {
"groupByPage": {
"terms": {
"field": "page"
},
"aggs": {
"colors": {
"nested": {
"path": "colors"
},
"aggs": {
"genres": {
"terms": {
"field": "colors.color"
}
}
}
}
}
}
}
}
Output:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"groupByPage": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1", // page field value
"doc_count": 1,
"colors": {
"doc_count": 3,
"genres": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "red",
"doc_count": 2
},
{
"key": "white",
"doc_count": 1
}
]
}
}
},
{
"key": "2", // page field value
"doc_count": 1,
"colors": {
"doc_count": 2,
"genres": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "yellow",
"doc_count": 2
}
]
}
}
}
]
}
}
}

Elasticsearch sorting based on multiple aggeration

I try to get my data with different aggeration criterias afterwards I want to order it based on one of aggeration criteria. In this specific case I want to get my data to be ordered descendly based on "Monthly_Income/ SUM" criteria.
I searched and tried lots of thing but none of them worked for me. Could you give me the answer because I am new on elasticsearch.
what I searched so far and couldn't solve the problem ;
"ordering_by_a_sub_aggregation,
Sorting Based on "Deep" Metrics,
search-aggregations-bucket-terms-aggregation-script,
search-aggregations-bucket-multi-terms-aggregation
To visualize the problem. I always get the belowing result however I tried lots of methods but I couldn't achieve to get desired result.
undesired result
desired result
Request
`
{
"query": {
"bool": {
"must": [],
"must_not": []
}
},
"size": 0,
"aggs": {
"GENDER": {
"terms": {
"field": "GENDER.keyword",
"size": 10000000,
"missing": "N/A"
// ,"order": {"MARTIAL_STATUS>Monthly_Income_0.max" : "desc" }
},
"aggs": {
"MARTIAL_STATUS": {
"terms": {
"field": "MARTIAL_STATUS.keyword",
"size": 10000000,
"missing": "N/A"
// ,"order": {"Monthly_Income_0.value" : "desc" }
},
"aggs": {
"Monthly_Income_0": {
"sum": {
"field": "Monthly_Income"
}
},
"Monthly_Income_1": {
"value_count": {
"field": "Monthly_Income"
}
},
"SALE_PRICE_2": {
"sum": {
"field": "SALE_PRICE"
}
}
// ,"sort_by_percentage": {
// "bucket_sort": {
// "sort": [ { "Monthly_Income_0.value": { "order": "desc" } } ]
// }
// }
}
}
}
}
}
}
`
Response
`
{
"took": 0,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 10000,
"relation": "gte"
},
"max_score": null,
"hits": []
},
"aggregations": {
"GENDER": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Male",
"doc_count": 40959,
"MARTIAL_STATUS": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Married",
"doc_count": 35559,
"SALE_PRICE_2": {
"value": 2.530239767013672E9
},
"Monthly_Income_0": {
"value": 3.59618565E8
},
"Monthly_Income_1": {
"value": 35559
}
},
{
"key": "Single",
"doc_count": 5399,
"SALE_PRICE_2": {
"value": 3.7742297754296875E8
},
"Monthly_Income_0": {
"value": 5.3465554E7
},
"Monthly_Income_1": {
"value": 5399
}
},
{
"key": "N/A",
"doc_count": 1,
"SALE_PRICE_2": {
"value": 87344.203125
},
"Monthly_Income_0": {
"value": 40000.0
},
"Monthly_Income_1": {
"value": 1
}
}
]
}
},
{
"key": "Female",
"doc_count": 7777,
"MARTIAL_STATUS": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Married",
"doc_count": 5299,
"SALE_PRICE_2": {
"value": 3.9976638293359375E8
},
"Monthly_Income_0": {
"value": 4.4994796E7
},
"Monthly_Income_1": {
"value": 5299
}
},
{
"key": "Single",
"doc_count": 2477,
"SALE_PRICE_2": {
"value": 1.8698677312695312E8
},
"Monthly_Income_0": {
"value": 1.8793502E7
},
"Monthly_Income_1": {
"value": 2477
}
},
{
"key": "N/A",
"doc_count": 1,
"SALE_PRICE_2": {
"value": 101006.8203125
},
"Monthly_Income_0": {
"value": 10000.0
},
"Monthly_Income_1": {
"value": 1
}
}
]
}
}
]
}
}
}
`
I try to order based on an aggerate column but I couldn't able to achieve

My understanding of your issue is that you want to group by on combination of gender and marital status
I have used runtime mapping to concatenate fields "gender" and marital status and used term aggregation to group by on run time field and sorted groups based on sum.
{
"size": 0,
"runtime_mappings": {
"gender-maritalstatus": {
"type": "keyword",
"script": {
"source": """
def gender='NA';
def maritalstatus='NA';
if(doc['Gender.keyword'].size()!=0)
gender= doc['Gender.keyword'].value;
if(doc['Marital_Status.keyword'].size()!=0)
maritalstatus= doc['Marital_Status.keyword'].value;
emit(gender+'-'+maritalstatus);
"""
}
}
},
"aggs": {
"gender-marital-grouping": {
"terms": {
"field": "gender-maritalstatus",
"order": {
"monthly_income": "desc"
},
"size": 10
},
"aggs": {
"monthly_income": {
"sum": {
"field": "Monthly_Income"
}
}
}
}
}
}
Result
"buckets" : [
{
"key" : "Female-Single",
"doc_count" : 2,
"monthly_income" : {
"value" : 300.0
}
},
{
"key" : "Male-Married",
"doc_count" : 2,
"monthly_income" : {
"value" : 200.0
}
},
{
"key" : "Female-NA",
"doc_count" : 1,
"monthly_income" : {
"value" : 100.0
}
},
{
"key" : "Male-NA",
"doc_count" : 1,
"monthly_income" : {
"value" : 100.0
}
},
{
"key" : "Male-Single",
"doc_count" : 1,
"monthly_income" : {
"value" : 100.0
}
}
]

Elasticsearch - Count duplicated and unique values for a nested field

Elasticsearch - Count duplicated and unique values
I need also same kind of count but that field is in nested properties as
[{
"firstname": "john",
"lastname": "doe",
"addressList": [{
"addressId": 39640,
"txt": "sdf",
}, {
"addressId": 39641,
"txt": "NEW",
}, {
"addressId": 39640,
"txt": "sdf",
}, {
"addressId": 39641,
"txt": "NEW"
}
]
}, {
"firstname": "jane",
"lastname": "smith",
"addressList": [{
"addressId": 39644,
"txt": "sdf",
}, {
"addressId": 39642,
"txt": "NEW",
}, {
"addressId": 39644,
"txt": "sdf",
}, {
"addressId": 39642,
"txt": "NEW"
}
]
}
]
what would be the query for addressId duplicate counts ? Need you help on this user:3838328

I got the answer for nested field duplicate counts as
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"prop_counts": {
"nested": {
"path": "addressList"
},
"aggs": {
"duplicate_aggs": {
"terms": {
"field": "addressList.addressId",
"min_doc_count": 2,
"size": 100 <----- Note this
}
},
"duplicate_bucketcount": {
"stats_bucket": {
"buckets_path": "duplicate_aggs._count"
}
},
"nonduplicate_aggs": {
"terms": {
"field": "addressList.addressId",
"size": 100 <---- Note this
},
"aggs": {
"equal_one": {
"bucket_selector": {
"buckets_path": {
"count": "_count"
},
"script": "params.count == 1"
}
}
}
},
"nonduplicate_bucketcount": {
"sum_bucket": {
"buckets_path": "nonduplicate_aggs._count"
}
}
}
}
}
}
Response as
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop_counts": {
"doc_count": 8,
"duplicate_aggs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": 39640,
"doc_count": 2
}, {
"key": 39641,
"doc_count": 2
}, {
"key": 39644,
"doc_count": 2
}, {
"key": 39642,
"doc_count": 2
}
]
},
"nonduplicate_aggs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
},
"duplicate_bucketcount": {
"count": 4,
"min": 2,
"max": 2,
"avg": 2,
"sum": 8
},
"nonduplicate_bucketcount": {
"value": 0
}
}
}
}

Elasticsearch nested aggregation got too slow result

I have an index with a billions document in the future, for now it's around 20mil documents. It took over 10s to get the result while I need a query around 3-4s for billion documents. Is my structure was wrong or need to improve the query or server configuration? Im using amazon elasticsearch service.
This query will return amount/transactions/items of every station in every area
Query:
{
"size" : 0,
"query": {
"bool": {
"must":
[
{
"range": {
"date_sec": {
"gte": "1483228800",
"lte": "1525046400"
}
}
},
{
"range": {
"time_sec": {
"gte": "32400",
"lte": "75600"
}
}
}
]
}
},
"aggs": {
"numstoreamountclient" : {
"filter" : { "range" : { "amount" : { "gt" : 0 } } },
"aggs": {
"numstore_amountclient": {
"cardinality" : {
"field" : "id_station"
}
}
}
},
"id_station": {
"terms": {
"field": "id_station"
},
"aggs": {
"area_type": {
"terms": {
"field": "area_type"
},
"aggs": {
"max_time" : { "max" : { "field" : "time_sec" } },
"min_time" : { "min" : { "field" : "time_sec" } },
"amountclient": {
"sum": {
"field": "amount"
}
},
"itemclient": {
"sum": {
"field": "items"
}
},
"transactionclient" : {
"value_count" :
{
"field" : "id"
}
},
"inwatchinghour": {
"filter" : { "term" : { "in_watchinghour" : 1 } },
"aggs" : {
"amountclientwatch": {
"sum": {
"field": "amount"
}
},
"itemclient": {
"sum": {
"field": "items"
}
},
"transactionclientwatch" : {
"value_count" :
{
"field" : "id"
}
}
}
},
"saleclient": {
"filter" : {
"bool": {
"must":
[
{
"term" : { "in_watchinghour" : 1 }
},
{
"range": {
"items": {
"gt": "0"
}
}
},
{
"range": {
"amount": {
"gt": "0"
}
}
}
]
}
},
"aggs" : {
"sale_client" : {
"value_count" :
{
"field" : "id"
}
}
}
}
}
}
}
}
}
}
Result:
{
"took": 10757,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 19778330,
"max_score": 0,
"hits": []
},
"aggregations": {
"numstoreamountclient": {
"doc_count": 19677164,
"numstore_amountclient": {
"value": 35
}
},
"id_station": {
"doc_count_error_upper_bound": 437877,
"sum_other_doc_count": 11401869,
"buckets": [
{
"key": 2209,
"doc_count": 1456505,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1456505,
"saleclient": {
"doc_count": 708499,
"sale_client": {
"value": 708499
}
},
"inwatchinghour": {
"doc_count": 711435,
"transactionclientwatch": {
"value": 711435
},
"amountclientwatch": {
"value": 210203295816
},
"itemclient": {
"value": 4105206
}
},
"amountclient": {
"value": 427392789897
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 1456505
},
"itemclient": {
"value": 8402911
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2210,
"doc_count": 890590,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 890590,
"saleclient": {
"doc_count": 357520,
"sale_client": {
"value": 357520
}
},
"inwatchinghour": {
"doc_count": 358900,
"transactionclientwatch": {
"value": 358900
},
"amountclientwatch": {
"value": 89792941442
},
"itemclient": {
"value": 2146312
}
},
"amountclient": {
"value": 222577251265
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 890590
},
"itemclient": {
"value": 5346273
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2226,
"doc_count": 844491,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 844491,
"saleclient": {
"doc_count": 346801,
"sale_client": {
"value": 346801
}
},
"inwatchinghour": {
"doc_count": 347730,
"transactionclientwatch": {
"value": 347730
},
"amountclientwatch": {
"value": 90585228756
},
"itemclient": {
"value": 1817412
}
},
"amountclient": {
"value": 219008246857
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 844491
},
"itemclient": {
"value": 4409412
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2317,
"doc_count": 812409,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 812409,
"saleclient": {
"doc_count": 292933,
"sale_client": {
"value": 292933
}
},
"inwatchinghour": {
"doc_count": 294866,
"transactionclientwatch": {
"value": 294866
},
"amountclientwatch": {
"value": 105661613404
},
"itemclient": {
"value": 2144352
}
},
"amountclient": {
"value": 290725384084
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 812409
},
"itemclient": {
"value": 5925558
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2211,
"doc_count": 811198,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 811198,
"saleclient": {
"doc_count": 262617,
"sale_client": {
"value": 262617
}
},
"inwatchinghour": {
"doc_count": 265515,
"transactionclientwatch": {
"value": 265515
},
"amountclientwatch": {
"value": 70763222934
},
"itemclient": {
"value": 1783073
}
},
"amountclient": {
"value": 213071496626
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 811198
},
"itemclient": {
"value": 5476443
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2331,
"doc_count": 806670,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 806670,
"saleclient": {
"doc_count": 349472,
"sale_client": {
"value": 349472
}
},
"inwatchinghour": {
"doc_count": 350285,
"transactionclientwatch": {
"value": 350285
},
"amountclientwatch": {
"value": 82784018110
},
"itemclient": {
"value": 2079211
}
},
"amountclient": {
"value": 192804137579
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 806670
},
"itemclient": {
"value": 4834069
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2323,
"doc_count": 749161,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 749161,
"saleclient": {
"doc_count": 280928,
"sale_client": {
"value": 280928
}
},
"inwatchinghour": {
"doc_count": 282498,
"transactionclientwatch": {
"value": 282498
},
"amountclientwatch": {
"value": 62082735118
},
"itemclient": {
"value": 1588445
}
},
"amountclient": {
"value": 162365212278
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 749161
},
"itemclient": {
"value": 4231490
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2345,
"doc_count": 727589,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 727589,
"saleclient": {
"doc_count": 340141,
"sale_client": {
"value": 340141
}
},
"inwatchinghour": {
"doc_count": 341590,
"transactionclientwatch": {
"value": 341590
},
"amountclientwatch": {
"value": 107492036777
},
"itemclient": {
"value": 2421158
}
},
"amountclient": {
"value": 228611232646
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 727589
},
"itemclient": {
"value": 5138628
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2329,
"doc_count": 663856,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 663856,
"saleclient": {
"doc_count": 163358,
"sale_client": {
"value": 163358
}
},
"inwatchinghour": {
"doc_count": 164339,
"transactionclientwatch": {
"value": 164339
},
"amountclientwatch": {
"value": 55298080357
},
"itemclient": {
"value": 1209514
}
},
"amountclient": {
"value": 211070998632
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 663856
},
"itemclient": {
"value": 4875689
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2355,
"doc_count": 613992,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 613992,
"saleclient": {
"doc_count": 113575,
"sale_client": {
"value": 113575
}
},
"inwatchinghour": {
"doc_count": 114038,
"transactionclientwatch": {
"value": 114038
},
"amountclientwatch": {
"value": 30494132488
},
"itemclient": {
"value": 563628
}
},
"amountclient": {
"value": 140705052880
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 613992
},
"itemclient": {
"value": 2920908
},
"min_time": {
"value": 32400
}
}
]
}
}
]
}
}
}

Elasticsearch range bucket aggregation based on doc_count

I have an elasticsearch aggregation query like this.
{
"aggs": {
"customer": {
"aggs": {
"Total_Sale": {
"sum": {
"field": "amount"
}
}
},
"terms": {
"field": "org",
"size": 50000
}
}
}
}
And it results in bucket aggregation like following
{
"aggregations": {
"customer": {
"buckets": [
{
"Total_Sale": { "value": 9999 },
"doc_count": 8,
"key": "cats"
},
{
"Total_Sale": { "value": 8888 },
"doc_count": 6,
"key": "tigers"
},
{
"Total_Sale": { "value": 444},
"doc_count": 5,
"key": "lions"
},
{
"Total_Sale": { "value": 555 },
"doc_count": 2,
"key": "wolves"
}
]
}
}
}
I want another range bucket aggregation based on doc_count. So, final result required is
{
"buckets": [
{
"Sum_of_Total_Sale": 555, // If I can form bucket, I can get this using sum_bucket. So, getting bucket is important.
"Sum_of_doc_count": 2,
"doc_count": 1,
"key": "*-3",
"to": 3.0
},
{
"Sum_of_Total_Sale": 9332,
"Sum_of_doc_count": 11,
"doc_count": 2,
"from": 4.0,
"key": "4-6",
"to": 6.0
},
{
"Sum_of_Total_Sale": 9999,
"Sum_of_doc_count": 8,
"doc_count": 1,
"from": 7.0,
"key": "7-*"
}
]
}
Bucket Selector Aggregation and then using bucket sum aggregation will not work because there is more than one key for range.
Bucket Script Aggregation does calculation within bucket.
Can I add scripted doc field for each document which help me to create these buckets?

There's no aggregation that I know of that can allow you to do this in one shot. however, there is one technique that I use from time to time to overcome this limitation. The idea is to repeat the same terms/sum aggregation and then use a bucket_selector pipeline aggregation for each of the ranges you're interested in.
POST index/_search
{
"size": 0,
"aggs": {
"*-3": {
"terms": {
"field": "org",
"size": 1000
},
"aggs": {
"Total_Sale": {
"sum": {
"field": "amount"
}
},
"*-3": {
"bucket_selector": {
"buckets_path": {
"docCount": "_count"
},
"script": "params.docCount <= 3"
}
}
}
},
"*-3_Total_Sales": {
"sum_bucket": {
"buckets_path": "*-3>Total_Sale"
}
},
"*-3_Total_Docs": {
"sum_bucket": {
"buckets_path": "*-3>_count"
}
},
"4-6": {
"terms": {
"field": "org",
"size": 1000
},
"aggs": {
"Total_Sale": {
"sum": {
"field": "amount"
}
},
"4-6": {
"bucket_selector": {
"buckets_path": {
"docCount": "_count"
},
"script": "params.docCount >= 4 && params.docCount <= 6"
}
}
}
},
"4-6_Total_Sales": {
"sum_bucket": {
"buckets_path": "4-6>Total_Sale"
}
},
"4-6_Total_Docs": {
"sum_bucket": {
"buckets_path": "4-6>_count"
}
},
"7-*": {
"terms": {
"field": "org",
"size": 1000
},
"aggs": {
"Total_Sale": {
"sum": {
"field": "amount"
}
},
"7-*": {
"bucket_selector": {
"buckets_path": {
"docCount": "_count"
},
"script": "params.docCount >= 7"
}
}
}
},
"7-*_Total_Sales": {
"sum_bucket": {
"buckets_path": "7-*>Total_Sale"
}
},
"7_*_Total_Docs": {
"sum_bucket": {
"buckets_path": "7-*>_count"
}
}
}
}
You'll get an answer that looks like this, which contains exactly the figures you're looking for in the xyz_Total_Sales and xyz_Total_Docs results:
"aggregations": {
"*-3": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "wolves",
"doc_count": 2,
"Total_Sale": {
"value": 555
}
}
]
},
"7-*": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "cats",
"doc_count": 8,
"Total_Sale": {
"value": 9999
}
}
]
},
"4-6": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "tigers",
"doc_count": 6,
"Total_Sale": {
"value": 8888
}
},
{
"key": "lions",
"doc_count": 5,
"Total_Sale": {
"value": 444
}
}
]
},
"*-3_Total_Sales": {
"value": 555
},
"*-3_Total_Docs": {
"value": 2
},
"4-6_Total_Sales": {
"value": 9332
},
"4-6_Total_Docs": {
"value": 11
},
"7-*_Total_Sales": {
"value": 9999
},
"7_*_Total_Docs": {
"value": 8
}
}

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Convert sql query to elasticsearch - elasticsearch

Related

Elasticsearch - Count number of occurrence perd field per document

Elasticsearch sorting based on multiple aggeration

Elasticsearch - Count duplicated and unique values for a nested field

Elasticsearch nested aggregation got too slow result

Elasticsearch range bucket aggregation based on doc_count

Categories

Resources