Elasticsearch sort on derivative aggregation to find trending topics - elasticsearch

I am collecting twitter data and want to find the account that received the highest number of new followers during a period of 48 hours.
The index is populated regularly with account data and a count of followers, together with a datestamp.
I got so far as by getting a derivative value, but I can't figure out how to sort on the derivative so as to return the accounts with the highest derivatives (accounts which won or lost the highest amount of followers during the last 48 hours).
The mapping:
{
"twfollowers" : {
"mappings" : {
"twfollowers" : {
"properties" : {
"followers" : {
"type" : "long"
},
"logDate" : {
"type" : "date"
},
"screen_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"user_id" : {
"type" : "long"
}
}
}
}
}
}
And here is the query:
GET /twfollowers/twfollowers/_search
{
"query": {
"bool" : {
"must" : {
"range": {"logDate": {
"gte" : "now-2d/d",
"lt" : "now/d"
}}
}
}
},
"size": 0,
"aggs": {
"users": {
"terms": {
"field": "screen_name.keyword",
"size": 10
},
"aggs": {
"my_date_histo": {
"date_histogram": {
"field": "logDate",
"interval": "day"
},
"aggs": {
"the_sum": {
"max": {
"field": "followers"
}
},
"the_diff": {
"derivative": {
"buckets_path": "the_sum"
}
}
}
}
}
}
}
}
And finally the results:
{
"took": 12,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 129621,
"max_score": 0,
"hits": []
},
"aggregations": {
"users": {
"doc_count_error_upper_bound": 356,
"sum_other_doc_count": 122394,
"buckets": [
{
"key": "Sero83954560",
"doc_count": 968,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 268,
"the_sum": {
"value": 870
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 700,
"the_sum": {
"value": 873
},
"the_diff": {
"value": 3
}
}
]
}
},
{
"key": "Hajk",
"doc_count": 913,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 287,
"the_sum": {
"value": 1529
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 626,
"the_sum": {
"value": 1532
},
"the_diff": {
"value": 3
}
}
]
}
},
{
"key": "idagoraSE",
"doc_count": 831,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 173,
"the_sum": {
"value": 1165
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 658,
"the_sum": {
"value": 1166
},
"the_diff": {
"value": 1
}
}
]
}
},
{
"key": "Dodgeman2ever",
"doc_count": 779,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 235,
"the_sum": {
"value": 2017
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 544,
"the_sum": {
"value": 2031
},
"the_diff": {
"value": 14
}
}
]
}
},
{
"key": "KettilsMead",
"doc_count": 743,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 453,
"the_sum": {
"value": 2860
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 290,
"the_sum": {
"value": 2862
},
"the_diff": {
"value": 2
}
}
]
}
},
{
"key": "BWhalbergarvid",
"doc_count": 683,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 87,
"the_sum": {
"value": 200
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 596,
"the_sum": {
"value": 203
},
"the_diff": {
"value": 3
}
}
]
}
},
{
"key": "MorKarins",
"doc_count": 601,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 86,
"the_sum": {
"value": 3874
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 515,
"the_sum": {
"value": 3875
},
"the_diff": {
"value": 1
}
}
]
}
},
{
"key": "erlhel",
"doc_count": 593,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 218,
"the_sum": {
"value": 730
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 375,
"the_sum": {
"value": 730
},
"the_diff": {
"value": 0
}
}
]
}
},
{
"key": "SaveSweden2018",
"doc_count": 560,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 157,
"the_sum": {
"value": 711
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 403,
"the_sum": {
"value": 732
},
"the_diff": {
"value": 21
}
}
]
}
},
{
"key": "MXCartoons",
"doc_count": 556,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 40,
"the_sum": {
"value": 4313
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 516,
"the_sum": {
"value": 4315
},
"the_diff": {
"value": 2
}
}
]
}
}
]
}
}
}

Related

Elastic search terms aggregation for getting filter options

im trying to implement product searching and want to get search results along with filters to filter from. i have managed to get the filter keys reference, but also want values of those keys
my product body is
{
...product,
"attributes": [
{
"name": "Color",
"value": "Aqua Blue"
},
{
"name": "Gender",
"value": "Female"
},
{
"name": "Occasion",
"value": "Active Wear"
},
{
"name": "Size",
"value": "0"
}
],
}
and im using the this query in es
GET product/_search
{
"aggs": {
"filters": {
"terms": {
"field": "attributes.name"
},
"aggs": {
"values": {
"terms": {
"field": "attributes.value",
"size": 10
}
}
}
}
}
}
Not sure why, but im getting all values for each key
"aggregations": {
"filters": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Color",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Gender",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Occasion",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
},
{
"key": "Size",
"doc_count": 3,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 3
},
{
"key": "Aqua Blue",
"doc_count": 3
},
{
"key": "Female",
"doc_count": 3
},
{
"key": "0",
"doc_count": 2
},
{
"key": "10XL",
"doc_count": 1
}
]
}
}
]
}
Also i do not want to specify manually all keys explicitly like Color, Size to get their respective values each.
Thanks :)
To keep things simple must you use a single field to store attributes:
"gender":"Male"
I assume you have tons of attributes so you create an array instead, to handle that you will have to use "nested" field type.
Nested type preserves the relation between each of the nested document properties. If you dont use nested you will see all the properties and values mixed and you will not be able to aggregate by a property without manually adding filters.
You can read an article I wrote about that here:
https://opster.com/guides/elasticsearch/data-architecture/elasticsearch-nested-field-object-field/
Mappings :
PUT test_product_nested
{
"mappings": {
"properties": {
"attributes": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
This query will only show Red products of size XL and aggregate by attributes.
If you want to do OR's instead of AND's you must use "should" clauses instead of "filter" clauses.
Query
POST test_product_nested/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"filter": [
{
"term": {
"attributes.name.keyword": "Color"
}
},
{
"term": {
"attributes.value.keyword": "Red"
}
}
]
}
}
}
},
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"filter": [
{
"term": {
"attributes.name.keyword": "Size"
}
},
{
"term": {
"attributes.value.keyword": "XL"
}
}
]
}
}
}
}
]
}
},
"aggs": {
"attributes": {
"nested": {
"path": "attributes"
},
"aggs": {
"name": {
"terms": {
"field": "attributes.name.keyword"
},
"aggs": {
"values": {
"terms": {
"field": "attributes.value.keyword",
"size": 10
}
}
}
}
}
}
}
}
Results
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test_product_nested",
"_id": "aJRayoQBtNG1OrZoEOQi",
"_score": 0,
"_source": {
"title": "Product 1",
"attributes": [
{
"name": "Color",
"value": "Red"
},
{
"name": "Gender",
"value": "Female"
},
{
"name": "Occasion",
"value": "Active Wear"
},
{
"name": "Size",
"value": "XL"
}
]
}
}
]
},
"aggregations": {
"attributes": {
"doc_count": 4,
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Color",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Red",
"doc_count": 1
}
]
}
},
{
"key": "Gender",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Female",
"doc_count": 1
}
]
}
},
{
"key": "Occasion",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Active Wear",
"doc_count": 1
}
]
}
},
{
"key": "Size",
"doc_count": 1,
"values": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "XL",
"doc_count": 1
}
]
}
}
]
}
}
}
}

Elasticsearch nested aggregation got too slow result

I have an index with a billions document in the future, for now it's around 20mil documents. It took over 10s to get the result while I need a query around 3-4s for billion documents. Is my structure was wrong or need to improve the query or server configuration? Im using amazon elasticsearch service.
This query will return amount/transactions/items of every station in every area
Query:
{
"size" : 0,
"query": {
"bool": {
"must":
[
{
"range": {
"date_sec": {
"gte": "1483228800",
"lte": "1525046400"
}
}
},
{
"range": {
"time_sec": {
"gte": "32400",
"lte": "75600"
}
}
}
]
}
},
"aggs": {
"numstoreamountclient" : {
"filter" : { "range" : { "amount" : { "gt" : 0 } } },
"aggs": {
"numstore_amountclient": {
"cardinality" : {
"field" : "id_station"
}
}
}
},
"id_station": {
"terms": {
"field": "id_station"
},
"aggs": {
"area_type": {
"terms": {
"field": "area_type"
},
"aggs": {
"max_time" : { "max" : { "field" : "time_sec" } },
"min_time" : { "min" : { "field" : "time_sec" } },
"amountclient": {
"sum": {
"field": "amount"
}
},
"itemclient": {
"sum": {
"field": "items"
}
},
"transactionclient" : {
"value_count" :
{
"field" : "id"
}
},
"inwatchinghour": {
"filter" : { "term" : { "in_watchinghour" : 1 } },
"aggs" : {
"amountclientwatch": {
"sum": {
"field": "amount"
}
},
"itemclient": {
"sum": {
"field": "items"
}
},
"transactionclientwatch" : {
"value_count" :
{
"field" : "id"
}
}
}
},
"saleclient": {
"filter" : {
"bool": {
"must":
[
{
"term" : { "in_watchinghour" : 1 }
},
{
"range": {
"items": {
"gt": "0"
}
}
},
{
"range": {
"amount": {
"gt": "0"
}
}
}
]
}
},
"aggs" : {
"sale_client" : {
"value_count" :
{
"field" : "id"
}
}
}
}
}
}
}
}
}
}
Result:
{
"took": 10757,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 19778330,
"max_score": 0,
"hits": []
},
"aggregations": {
"numstoreamountclient": {
"doc_count": 19677164,
"numstore_amountclient": {
"value": 35
}
},
"id_station": {
"doc_count_error_upper_bound": 437877,
"sum_other_doc_count": 11401869,
"buckets": [
{
"key": 2209,
"doc_count": 1456505,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1456505,
"saleclient": {
"doc_count": 708499,
"sale_client": {
"value": 708499
}
},
"inwatchinghour": {
"doc_count": 711435,
"transactionclientwatch": {
"value": 711435
},
"amountclientwatch": {
"value": 210203295816
},
"itemclient": {
"value": 4105206
}
},
"amountclient": {
"value": 427392789897
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 1456505
},
"itemclient": {
"value": 8402911
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2210,
"doc_count": 890590,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 890590,
"saleclient": {
"doc_count": 357520,
"sale_client": {
"value": 357520
}
},
"inwatchinghour": {
"doc_count": 358900,
"transactionclientwatch": {
"value": 358900
},
"amountclientwatch": {
"value": 89792941442
},
"itemclient": {
"value": 2146312
}
},
"amountclient": {
"value": 222577251265
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 890590
},
"itemclient": {
"value": 5346273
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2226,
"doc_count": 844491,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 844491,
"saleclient": {
"doc_count": 346801,
"sale_client": {
"value": 346801
}
},
"inwatchinghour": {
"doc_count": 347730,
"transactionclientwatch": {
"value": 347730
},
"amountclientwatch": {
"value": 90585228756
},
"itemclient": {
"value": 1817412
}
},
"amountclient": {
"value": 219008246857
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 844491
},
"itemclient": {
"value": 4409412
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2317,
"doc_count": 812409,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 812409,
"saleclient": {
"doc_count": 292933,
"sale_client": {
"value": 292933
}
},
"inwatchinghour": {
"doc_count": 294866,
"transactionclientwatch": {
"value": 294866
},
"amountclientwatch": {
"value": 105661613404
},
"itemclient": {
"value": 2144352
}
},
"amountclient": {
"value": 290725384084
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 812409
},
"itemclient": {
"value": 5925558
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2211,
"doc_count": 811198,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 811198,
"saleclient": {
"doc_count": 262617,
"sale_client": {
"value": 262617
}
},
"inwatchinghour": {
"doc_count": 265515,
"transactionclientwatch": {
"value": 265515
},
"amountclientwatch": {
"value": 70763222934
},
"itemclient": {
"value": 1783073
}
},
"amountclient": {
"value": 213071496626
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 811198
},
"itemclient": {
"value": 5476443
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2331,
"doc_count": 806670,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 806670,
"saleclient": {
"doc_count": 349472,
"sale_client": {
"value": 349472
}
},
"inwatchinghour": {
"doc_count": 350285,
"transactionclientwatch": {
"value": 350285
},
"amountclientwatch": {
"value": 82784018110
},
"itemclient": {
"value": 2079211
}
},
"amountclient": {
"value": 192804137579
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 806670
},
"itemclient": {
"value": 4834069
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2323,
"doc_count": 749161,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 749161,
"saleclient": {
"doc_count": 280928,
"sale_client": {
"value": 280928
}
},
"inwatchinghour": {
"doc_count": 282498,
"transactionclientwatch": {
"value": 282498
},
"amountclientwatch": {
"value": 62082735118
},
"itemclient": {
"value": 1588445
}
},
"amountclient": {
"value": 162365212278
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 749161
},
"itemclient": {
"value": 4231490
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2345,
"doc_count": 727589,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 727589,
"saleclient": {
"doc_count": 340141,
"sale_client": {
"value": 340141
}
},
"inwatchinghour": {
"doc_count": 341590,
"transactionclientwatch": {
"value": 341590
},
"amountclientwatch": {
"value": 107492036777
},
"itemclient": {
"value": 2421158
}
},
"amountclient": {
"value": 228611232646
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 727589
},
"itemclient": {
"value": 5138628
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2329,
"doc_count": 663856,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 663856,
"saleclient": {
"doc_count": 163358,
"sale_client": {
"value": 163358
}
},
"inwatchinghour": {
"doc_count": 164339,
"transactionclientwatch": {
"value": 164339
},
"amountclientwatch": {
"value": 55298080357
},
"itemclient": {
"value": 1209514
}
},
"amountclient": {
"value": 211070998632
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 663856
},
"itemclient": {
"value": 4875689
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2355,
"doc_count": 613992,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 613992,
"saleclient": {
"doc_count": 113575,
"sale_client": {
"value": 113575
}
},
"inwatchinghour": {
"doc_count": 114038,
"transactionclientwatch": {
"value": 114038
},
"amountclientwatch": {
"value": 30494132488
},
"itemclient": {
"value": 563628
}
},
"amountclient": {
"value": 140705052880
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 613992
},
"itemclient": {
"value": 2920908
},
"min_time": {
"value": 32400
}
}
]
}
}
]
}
}
}

how to group by duplicate Field in Array List : ElasticSearch

I had problem with nested aggregation in Elasticsearch. I have mapping with nested field:
"Topics":{"type":"nested","properties":{
"CategoryLev1":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
"CategoryLev2":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}} }}
After index Document:
"Topics": [
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Money",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "2.0",
"CategoryLev2": "Money",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Electrical",
"CategoryLev1": "Product"
}
]
"Topics": [
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Money",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "2.0",
"CategoryLev2": "Methods",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Engine",
"CategoryLev1": "Product"
}
]
As you see, in my nested array I have two Topics, which have Duplicate key and Value field Then I make such query:
{
"size": 10,
"aggregations": {
"resellers": {
"nested": {
"path": "Topics"
},
"aggregations": {
"topicGroup": {
"terms": {
"field": "Topics.CategoryLev1.keyword",
"size": 10
},
"aggregations": {
"Subtopic": {
"terms": {
"field": "Topics.CategoryLev2.keyword"
}
}
}
}
}
}
}
}
Then I get following result which has group by with topic Category
{
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"resellers": {
"doc_count": 6,
"topicGroup": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Sales",
"doc_count": 3,
"Subtopic": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Money",
"doc_count": 3
},
{
"key": "Method",
"doc_count": 1
}
]
}
},
{
"key": "Product",
"doc_count": 2,
"Subtopic": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Electrical",
"doc_count": 1
},
{
"key": "Engine",
"doc_count": 1
}
]
}
}
]
}
}
}
}
But I Want to result Like this
"buckets": [
{
"key": "Sales",
"doc_count": 2,
"Subtopic": {
"buckets": [
{
"key": "Money",
"doc_count": 2
},
{
"key": "Method",
"doc_count": 1
}
]
}
},
{
"key": "Product",
"doc_count": 2,
"Subtopic": {
"buckets": [
{
"key": "Electrical",
"doc_count": 1
},
{
"key": "Engine",
"doc_count": 1
}]
}
}]
Thanks in advance :)

Why am I getting NaN from an elasticsearch aggregate query?

In the query below, occasionally I receive a "NaN" response (see the response below the query).
I'm assuming that, occasionally, some invalid data gets in to the "amount" field (the one being aggregated). If that is a valid assumption, how can I find those documents with the invalid "amount" fields so I can troubleshoot them?
If that's not a valid assumption, how do I troubleshoot the occasional "NaN" value being returned?
REQUEST:
POST /_msearch
{
"search_type": "query_then_fetch",
"ignore_unavailable": true,
"index": [
"view-2017-10-22",
"view-2017-10-23"
]
}
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"range": {
"handling-time": {
"gte": "1508706273585",
"lte": "1508792673586",
"format": "epoch_millis"
}
}
},
{
"query_string": {
"analyze_wildcard": true,
"query": "+page:\"checkout order confirmation\" +pageType:\"d\""
}
}
]
}
},
"aggs": {
"2": {
"date_histogram": {
"interval": "1h",
"field": "time",
"min_doc_count": 0,
"extended_bounds": {
"min": "1508706273585",
"max": "1508792673586"
},
"format": "epoch_millis"
},
"aggs": {
"1": {
"sum": {
"field": "amount"
}
}
}
}
}
}
RESPONSE:
{
"responses": [
{
"took": 12,
"timed_out": false,
"_shards": {
"total": 10,
"successful": 10,
"failed": 0
},
"hits": {
"total": 44587,
"max_score": 0,
"hits": []
},
"aggregations": {
"2": {
"buckets": [
{
"1": {
"value": "NaN"
},
"key_as_string": "1508706000000",
"key": 1508706000000,
"doc_count": 2915
},
{
"1": {
"value": 300203.74
},
"key_as_string": "1508709600000",
"key": 1508709600000,
"doc_count": 2851
},
{
"1": {
"value": 348139.5600000001
},
"key_as_string": "1508713200000",
"key": 1508713200000,
"doc_count": 3197
},
{
"1": {
"value": "NaN"
},
"key_as_string": "1508716800000",
"key": 1508716800000,
"doc_count": 3449
},
{
"1": {
"value": "NaN"
},
"key_as_string": "1508720400000",
"key": 1508720400000,
"doc_count": 3482
},
{
"1": {
"value": 364449.60999999987
},
"key_as_string": "1508724000000",
"key": 1508724000000,
"doc_count": 3103
},
{
"1": {
"value": 334914.68
},
"key_as_string": "1508727600000",
"key": 1508727600000,
"doc_count": 2722
},
{
"1": {
"value": 315368.09000000014
},
"key_as_string": "1508731200000",
"key": 1508731200000,
"doc_count": 2161
},
{
"1": {
"value": 102244.34
},
"key_as_string": "1508734800000",
"key": 1508734800000,
"doc_count": 742
},
{
"1": {
"value": 37178.63
},
"key_as_string": "1508738400000",
"key": 1508738400000,
"doc_count": 333
},
{
"1": {
"value": 25345.68
},
"key_as_string": "1508742000000",
"key": 1508742000000,
"doc_count": 233
},
{
"1": {
"value": 85454.47000000002
},
"key_as_string": "1508745600000",
"key": 1508745600000,
"doc_count": 477
},
{
"1": {
"value": 24102.719999999994
},
"key_as_string": "1508749200000",
"key": 1508749200000,
"doc_count": 195
},
{
"1": {
"value": 23352.309999999994
},
"key_as_string": "1508752800000",
"key": 1508752800000,
"doc_count": 294
},
{
"1": {
"value": 44353.409999999996
},
"key_as_string": "1508756400000",
"key": 1508756400000,
"doc_count": 450
},
{
"1": {
"value": 80129.89999999998
},
"key_as_string": "1508760000000",
"key": 1508760000000,
"doc_count": 867
},
{
"1": {
"value": 122797.11
},
"key_as_string": "1508763600000",
"key": 1508763600000,
"doc_count": 1330
},
{
"1": {
"value": 157442.29000000004
},
"key_as_string": "1508767200000",
"key": 1508767200000,
"doc_count": 1872
},
{
"1": {
"value": 198831.71
},
"key_as_string": "1508770800000",
"key": 1508770800000,
"doc_count": 2251
},
{
"1": {
"value": 218384.08000000002
},
"key_as_string": "1508774400000",
"key": 1508774400000,
"doc_count": 2305
},
{
"1": {
"value": 229829.22000000006
},
"key_as_string": "1508778000000",
"key": 1508778000000,
"doc_count": 2381
},
{
"1": {
"value": 217157.56000000006
},
"key_as_string": "1508781600000",
"key": 1508781600000,
"doc_count": 2433
},
{
"1": {
"value": 208877.13
},
"key_as_string": "1508785200000",
"key": 1508785200000,
"doc_count": 2223
},
{
"1": {
"value": "NaN"
},
"key_as_string": "1508788800000",
"key": 1508788800000,
"doc_count": 2166
},
{
"1": {
"value": 18268.14
},
"key_as_string": "1508792400000",
"key": 1508792400000,
"doc_count": 155
}
]
}
},
"status": 200
}
]
}
You can do a search for <fieldName>:NaN (on numeric fields) to find numbers that are set to NaN.
Obviously, once you find those, you can either fix the root cause of the field being set to NaN, or you can exclude those records from the aggregation by adding a -<fieldName>:NaN to the query.
(It turns out that the input was feeding in some garbage characters once in every few million documents.)

How to calculate the total number of buckets that have "value" greater than 0?

I have this query that calculates the number of events per bucket. How can I calculate the total number of buckets that have value greater than 0?
GET myindex/_search?
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"PlateNumber": "111"
}
}
]
}
},
"aggs": {
"daily_intensity": {
"date_histogram": {
"field": "Datetime",
"interval": "day"
},
"aggs": {
"count_of_events": {
"value_count": {
"field": "Monthday"
}
}
}
}
}
}
This is the output that I get. The expected answer that I want to get is 26, because there are totally 26 elements in buckets that have value greater than 0. Basically I do not need the output of all buckets, I only need this total number.
{
"took": 237,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 98,
"max_score": 0,
"hits": []
},
"aggregations": {
"daily_intensity": {
"buckets": [
{
"key_as_string": "2017-05-01T00:00:00.000Z",
"key": 1493596800000,
"doc_count": 3,
"count_of_events": {
"value": 3
}
},
{
"key_as_string": "2017-05-02T00:00:00.000Z",
"key": 1493683200000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-03T00:00:00.000Z",
"key": 1493769600000,
"doc_count": 4,
"count_of_events": {
"value": 4
}
},
{
"key_as_string": "2017-05-04T00:00:00.000Z",
"key": 1493856000000,
"doc_count": 6,
"count_of_events": {
"value": 6
}
},
{
"key_as_string": "2017-05-05T00:00:00.000Z",
"key": 1493942400000,
"doc_count": 0,
"count_of_events": {
"value": 0
}
},
{
"key_as_string": "2017-05-06T00:00:00.000Z",
"key": 1494028800000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-07T00:00:00.000Z",
"key": 1494115200000,
"doc_count": 5,
"count_of_events": {
"value": 5
}
},
{
"key_as_string": "2017-05-08T00:00:00.000Z",
"key": 1494201600000,
"doc_count": 6,
"count_of_events": {
"value": 6
}
},
{
"key_as_string": "2017-05-09T00:00:00.000Z",
"key": 1494288000000,
"doc_count": 2,
"count_of_events": {
"value": 2
}
},
{
"key_as_string": "2017-05-10T00:00:00.000Z",
"key": 1494374400000,
"doc_count": 3,
"count_of_events": {
"value": 3
}
},
{
"key_as_string": "2017-05-11T00:00:00.000Z",
"key": 1494460800000,
"doc_count": 0,
"count_of_events": {
"value": 0
}
},
{
"key_as_string": "2017-05-12T00:00:00.000Z",
"key": 1494547200000,
"doc_count": 3,
"count_of_events": {
"value": 3
}
},
{
"key_as_string": "2017-05-13T00:00:00.000Z",
"key": 1494633600000,
"doc_count": 0,
"count_of_events": {
"value": 0
}
},
{
"key_as_string": "2017-05-14T00:00:00.000Z",
"key": 1494720000000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-15T00:00:00.000Z",
"key": 1494806400000,
"doc_count": 3,
"count_of_events": {
"value": 3
}
},
{
"key_as_string": "2017-05-16T00:00:00.000Z",
"key": 1494892800000,
"doc_count": 0,
"count_of_events": {
"value": 0
}
},
{
"key_as_string": "2017-05-17T00:00:00.000Z",
"key": 1494979200000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-18T00:00:00.000Z",
"key": 1495065600000,
"doc_count": 3,
"count_of_events": {
"value": 3
}
},
{
"key_as_string": "2017-05-19T00:00:00.000Z",
"key": 1495152000000,
"doc_count": 2,
"count_of_events": {
"value": 2
}
},
{
"key_as_string": "2017-05-20T00:00:00.000Z",
"key": 1495238400000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-21T00:00:00.000Z",
"key": 1495324800000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-22T00:00:00.000Z",
"key": 1495411200000,
"doc_count": 5,
"count_of_events": {
"value": 5
}
},
{
"key_as_string": "2017-05-23T00:00:00.000Z",
"key": 1495497600000,
"doc_count": 16,
"count_of_events": {
"value": 16
}
},
{
"key_as_string": "2017-05-24T00:00:00.000Z",
"key": 1495584000000,
"doc_count": 4,
"count_of_events": {
"value": 4
}
},
{
"key_as_string": "2017-05-25T00:00:00.000Z",
"key": 1495670400000,
"doc_count": 6,
"count_of_events": {
"value": 6
}
},
{
"key_as_string": "2017-05-26T00:00:00.000Z",
"key": 1495756800000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-27T00:00:00.000Z",
"key": 1495843200000,
"doc_count": 5,
"count_of_events": {
"value": 5
}
},
{
"key_as_string": "2017-05-28T00:00:00.000Z",
"key": 1495929600000,
"doc_count": 4,
"count_of_events": {
"value": 4
}
},
{
"key_as_string": "2017-05-29T00:00:00.000Z",
"key": 1496016000000,
"doc_count": 5,
"count_of_events": {
"value": 5
}
},
{
"key_as_string": "2017-05-30T00:00:00.000Z",
"key": 1496102400000,
"doc_count": 2,
"count_of_events": {
"value": 2
}
},
{
"key_as_string": "2017-05-31T00:00:00.000Z",
"key": 1496188800000,
"doc_count": 4,
"count_of_events": {
"value": 4
}
}
]
}
}
}
You can use Bucket Script Aggregation & Sum Bucket Aggregation to achieve this. Try below query.
GET myindex/_search?
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"PlateNumber": "111"
}
}
]
}
},
"aggs": {
"daily_intensity": {
"date_histogram": {
"field": "Datetime",
"interval": "day"
},
"aggs": {
"count_of_events": {
"value_count": {
"field": "Monthday"
}
},
"check": {
"bucket_script": {
"buckets_path": {
"count": "count_of_events"
},
"script": "return (params.count > 0 ? 1 : 0)"
}
}
}
},
"bucket_count": {
"sum_bucket": {
"buckets_path": "daily_intensity>check"
}
}
}
}

Resources