Why am I getting NaN from an elasticsearch aggregate query? - elasticsearch

In the query below, occasionally I receive a "NaN" response (see the response below the query).
I'm assuming that, occasionally, some invalid data gets in to the "amount" field (the one being aggregated). If that is a valid assumption, how can I find those documents with the invalid "amount" fields so I can troubleshoot them?
If that's not a valid assumption, how do I troubleshoot the occasional "NaN" value being returned?
REQUEST:
POST /_msearch
{
"search_type": "query_then_fetch",
"ignore_unavailable": true,
"index": [
"view-2017-10-22",
"view-2017-10-23"
]
}
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"range": {
"handling-time": {
"gte": "1508706273585",
"lte": "1508792673586",
"format": "epoch_millis"
}
}
},
{
"query_string": {
"analyze_wildcard": true,
"query": "+page:\"checkout order confirmation\" +pageType:\"d\""
}
}
]
}
},
"aggs": {
"2": {
"date_histogram": {
"interval": "1h",
"field": "time",
"min_doc_count": 0,
"extended_bounds": {
"min": "1508706273585",
"max": "1508792673586"
},
"format": "epoch_millis"
},
"aggs": {
"1": {
"sum": {
"field": "amount"
}
}
}
}
}
}
RESPONSE:
{
"responses": [
{
"took": 12,
"timed_out": false,
"_shards": {
"total": 10,
"successful": 10,
"failed": 0
},
"hits": {
"total": 44587,
"max_score": 0,
"hits": []
},
"aggregations": {
"2": {
"buckets": [
{
"1": {
"value": "NaN"
},
"key_as_string": "1508706000000",
"key": 1508706000000,
"doc_count": 2915
},
{
"1": {
"value": 300203.74
},
"key_as_string": "1508709600000",
"key": 1508709600000,
"doc_count": 2851
},
{
"1": {
"value": 348139.5600000001
},
"key_as_string": "1508713200000",
"key": 1508713200000,
"doc_count": 3197
},
{
"1": {
"value": "NaN"
},
"key_as_string": "1508716800000",
"key": 1508716800000,
"doc_count": 3449
},
{
"1": {
"value": "NaN"
},
"key_as_string": "1508720400000",
"key": 1508720400000,
"doc_count": 3482
},
{
"1": {
"value": 364449.60999999987
},
"key_as_string": "1508724000000",
"key": 1508724000000,
"doc_count": 3103
},
{
"1": {
"value": 334914.68
},
"key_as_string": "1508727600000",
"key": 1508727600000,
"doc_count": 2722
},
{
"1": {
"value": 315368.09000000014
},
"key_as_string": "1508731200000",
"key": 1508731200000,
"doc_count": 2161
},
{
"1": {
"value": 102244.34
},
"key_as_string": "1508734800000",
"key": 1508734800000,
"doc_count": 742
},
{
"1": {
"value": 37178.63
},
"key_as_string": "1508738400000",
"key": 1508738400000,
"doc_count": 333
},
{
"1": {
"value": 25345.68
},
"key_as_string": "1508742000000",
"key": 1508742000000,
"doc_count": 233
},
{
"1": {
"value": 85454.47000000002
},
"key_as_string": "1508745600000",
"key": 1508745600000,
"doc_count": 477
},
{
"1": {
"value": 24102.719999999994
},
"key_as_string": "1508749200000",
"key": 1508749200000,
"doc_count": 195
},
{
"1": {
"value": 23352.309999999994
},
"key_as_string": "1508752800000",
"key": 1508752800000,
"doc_count": 294
},
{
"1": {
"value": 44353.409999999996
},
"key_as_string": "1508756400000",
"key": 1508756400000,
"doc_count": 450
},
{
"1": {
"value": 80129.89999999998
},
"key_as_string": "1508760000000",
"key": 1508760000000,
"doc_count": 867
},
{
"1": {
"value": 122797.11
},
"key_as_string": "1508763600000",
"key": 1508763600000,
"doc_count": 1330
},
{
"1": {
"value": 157442.29000000004
},
"key_as_string": "1508767200000",
"key": 1508767200000,
"doc_count": 1872
},
{
"1": {
"value": 198831.71
},
"key_as_string": "1508770800000",
"key": 1508770800000,
"doc_count": 2251
},
{
"1": {
"value": 218384.08000000002
},
"key_as_string": "1508774400000",
"key": 1508774400000,
"doc_count": 2305
},
{
"1": {
"value": 229829.22000000006
},
"key_as_string": "1508778000000",
"key": 1508778000000,
"doc_count": 2381
},
{
"1": {
"value": 217157.56000000006
},
"key_as_string": "1508781600000",
"key": 1508781600000,
"doc_count": 2433
},
{
"1": {
"value": 208877.13
},
"key_as_string": "1508785200000",
"key": 1508785200000,
"doc_count": 2223
},
{
"1": {
"value": "NaN"
},
"key_as_string": "1508788800000",
"key": 1508788800000,
"doc_count": 2166
},
{
"1": {
"value": 18268.14
},
"key_as_string": "1508792400000",
"key": 1508792400000,
"doc_count": 155
}
]
}
},
"status": 200
}
]
}

You can do a search for <fieldName>:NaN (on numeric fields) to find numbers that are set to NaN.
Obviously, once you find those, you can either fix the root cause of the field being set to NaN, or you can exclude those records from the aggregation by adding a -<fieldName>:NaN to the query.
(It turns out that the input was feeding in some garbage characters once in every few million documents.)

Related

Using Vega with Elasticsearch data containing nested aggregations (or dividing one aggregation by another in Elasticsearch)

I'm trying to do something with Elasticsearch that should be quite simple. I have an index which contains documents of the shape: {"timestamp": int, "pricePerUnit": int, "units": int}. I want to visualize the average price over time in a histogram. Note that I don't want the average of the "pricePerUnit", I want the average price paid per unit, which means finding the total value in each time bucket by multiplying the "pricePerUnit" by the "units" for each document, and summing the total value sold in each document, then dividing by the sum of the total units sold in the time bucket to get the average price paid per unit. A standard Kibana line chart won't work. I can get the average "pricePerUnit * units", but can't divide this aggregation by the sum of the total units. Also can't be done in TSVB, as this doesn't allow for scripts/scripted fields. Can't use timelion, because the "timestamp" field isn't a time field (I know, but there's nothing I can do about it). I'm therefore trying to use Vega. However, I'm running into a problem with nested aggregations. Here's the ES query I'm running:
{
"$schema": "https://vega.github.io/schema/vega/v3.json",
"data": {
"name": "vals",
"url": {
"index": "index_name",
"body": {
"aggs": {
"2": {
"histogram": {
"field": "timestamp",
"interval": 2000,
"min_doc_count": 1
},
"aggs": {
"1": {
"avg": {
"field": "pricePerUnit",
"script": {
"inline": "doc['pricePerUnit'].value * doc['units'].value",
"lang": "painless"
}
}
}
}
}
},
"size": 0,
"stored_fields": [
"*"
],
"script_fields": {
"spend": {
"script": {
"source": "doc['pricePerUnit'].value * doc['units'].value",
"lang": "painless"
}
}
},
"docvalue_fields": [],
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"range": {
"timeslot.startTime": {
"gte": 1621292400,
"lt": 1621428349
}
}
}
],
"should": [],
"must_not": []
}
}
},
"format": {"property": "aggregations.2.buckets"}
}
}
,
"scales": [
{
"name": "yscale",
"type": "linear",
"zero": true,
"domain": {"data": "vals", "field": "1.value"},
"range": "height"
},
{
"name": "xscale",
"type": "time",
"range": "width"
}
],
"axes": [
{"scale": "yscale", "orient": "left"},
{"scale": "xscale", "orient": "bottom"}
],
"marks": [
{
"type": "line",
"encode": {
"update": {
"x": {"scale": "xscale", "field": "key"},
"y": {"scale": "yscale", "field": "1.value"}
}
}
}
]
}
It gives me the following result set:
"took": 1,
"timed_out": false,
"_shards": {
"total": 4,
"successful": 4,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 401,
"max_score": null,
"hits": []
},
"aggregations": {
"2": {
"buckets": [
{
"1": {
"value": 86340
},
"key": 1621316000,
"doc_count": 7
},
{
"1": {
"value": 231592.92307692306
},
"key": 1621318000,
"doc_count": 13
},
{
"1": {
"value": 450529.23529411765
},
"key": 1621320000,
"doc_count": 17
},
{
"1": {
"value": 956080.0555555555
},
"key": 1621322000,
"doc_count": 18
},
{
"1": {
"value": 1199865.5714285714
},
"key": 1621324000,
"doc_count": 14
},
{
"1": {
"value": 875300.7368421053
},
"key": 1621326000,
"doc_count": 19
},
{
"1": {
"value": 926738.8
},
"key": 1621328000,
"doc_count": 20
},
{
"1": {
"value": 3239475.3333333335
},
"key": 1621330000,
"doc_count": 18
},
{
"1": {
"value": 3798063.714285714
},
"key": 1621332000,
"doc_count": 21
},
{
"1": {
"value": 482089.5
},
"key": 1621334000,
"doc_count": 4
},
{
"1": {
"value": 222952.33333333334
},
"key": 1621336000,
"doc_count": 12
},
{
"1": {
"value": 742225.75
},
"key": 1621338000,
"doc_count": 8
},
{
"1": {
"value": 204203.25
},
"key": 1621340000,
"doc_count": 4
},
{
"1": {
"value": 294886
},
"key": 1621342000,
"doc_count": 4
},
{
"1": {
"value": 284393.75
},
"key": 1621344000,
"doc_count": 4
},
{
"1": {
"value": 462800.5
},
"key": 1621346000,
"doc_count": 4
},
{
"1": {
"value": 233321.2
},
"key": 1621348000,
"doc_count": 5
},
{
"1": {
"value": 436757.8
},
"key": 1621350000,
"doc_count": 5
},
{
"1": {
"value": 4569021
},
"key": 1621352000,
"doc_count": 1
},
{
"1": {
"value": 368489.5
},
"key": 1621354000,
"doc_count": 4
},
{
"1": {
"value": 208359.4
},
"key": 1621356000,
"doc_count": 5
},
{
"1": {
"value": 7827146.375
},
"key": 1621358000,
"doc_count": 8
},
{
"1": {
"value": 63873.5
},
"key": 1621360000,
"doc_count": 6
},
{
"1": {
"value": 21300
},
"key": 1621364000,
"doc_count": 1
},
{
"1": {
"value": 138500
},
"key": 1621366000,
"doc_count": 2
},
{
"1": {
"value": 5872400
},
"key": 1621372000,
"doc_count": 1
},
{
"1": {
"value": 720200
},
"key": 1621374000,
"doc_count": 1
},
{
"1": {
"value": 208634.33333333334
},
"key": 1621402000,
"doc_count": 3
},
{
"1": {
"value": 306248.5
},
"key": 1621404000,
"doc_count": 10
},
{
"1": {
"value": 328983.77777777775
},
"key": 1621406000,
"doc_count": 18
},
{
"1": {
"value": 1081724
},
"key": 1621408000,
"doc_count": 10
},
{
"1": {
"value": 2451076.785714286
},
"key": 1621410000,
"doc_count": 14
},
{
"1": {
"value": 1952910.2857142857
},
"key": 1621412000,
"doc_count": 14
},
{
"1": {
"value": 2294818.1875
},
"key": 1621414000,
"doc_count": 16
},
{
"1": {
"value": 2841910.388888889
},
"key": 1621416000,
"doc_count": 18
},
{
"1": {
"value": 2401278.9523809524
},
"key": 1621418000,
"doc_count": 21
},
{
"1": {
"value": 4311845.4
},
"key": 1621420000,
"doc_count": 5
},
{
"1": {
"value": 617102.5333333333
},
"key": 1621422000,
"doc_count": 15
},
{
"1": {
"value": 590469.7142857143
},
"key": 1621424000,
"doc_count": 14
},
{
"1": {
"value": 391918.85714285716
},
"key": 1621426000,
"doc_count": 14
},
{
"1": {
"value": 202163.66666666666
},
"key": 1621428000,
"doc_count": 3
}
]
}
}
}
The problem is that I can't extract the "value" field from the "1" sub-aggregation. I've tried using a flatten transform, but it doesn't seem to work. If anyone can either:
a) Tell me how to solve this specific problem with Vega; or
b) Tell me another way to solve my original problem
I'd be much obliged!
Your DSL query is looking great. If I've read this correctly I believe what you are looking for is a project transform. This can make life a lot easier when dealing with nested variables, as there are certain instances where they just don't function as expected.
You also need to reference data within marks otherwise it will plot nothing.
Below is how to fix this, you'll just need to add your url parameter in.
{
$schema: https://vega.github.io/schema/vega/v3.json
data: [
{
name: vals
url: ... // fill this in
transform: [
{
type: project
fields: [
1.value
doc_count
key
]
as: [
val
doc_count
key
]
}
]
}
]
scales: [
{
name: yscale
type: linear
zero: true
domain: {
data: vals
field: val
}
range: height
}
{
name: xscale
type: time
domain: {
data: vals
field: key
}
range: width
}
]
axes: [
{
scale: yscale
orient: left
}
{
scale: xscale
orient: bottom
}
]
marks: [
{
type: line
from: {
data: vals
}
encode: {
update: {
x: {
scale: xscale
field: key
}
y: {
scale: yscale
field: val
}
}
}
}
]
}
In future if you are having issues, look at the examples found on the Vega Gallery. They also have extensive documentation. These two combined is all you need.

date.getHourOfDay() is giving strange results in aggregation

I am indexing some events and trying to get unique hours but the terms aggregation is giving weird response . I have the following query.
{
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"City": [
"Chicago"
]
}
},
{
"range": {
"eventDate": {
"gte": "2018-06-22",
"lte": "2018-06-22"
}
}
}
]
}
},
"aggs": {
"Hours": {
"terms": {
"script": "doc['eventDate'].date.getHourOfDay()"
}
}
}
}
This query produces following response.
"buckets": [
{
"key": "19",
"doc_count": 12
},
{
"key": "9",
"doc_count": 7
},
{
"key": "15",
"doc_count": 4
},
{
"key": "16",
"doc_count": 4
},
{
"key": "20",
"doc_count": 4
},
{
"key": "12",
"doc_count": 2
},
{
"key": "6",
"doc_count": 2
},
{
"key": "8",
"doc_count": 2
},
{
"key": "10",
"doc_count": 1
},
{
"key": "11",
"doc_count": 1
}
]
Now I changed the range to get the events for past one month
{
"range": {
"eventDate": {
"gte": "2018-05-22",
"lte": "2018-06-22"
}
}
}
and the response I got was
"Hours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1319,
"buckets": [
{
"key": "22",
"doc_count": 805
},
{
"key": "14",
"doc_count": 370
},
{
"key": "15",
"doc_count": 250
},
{
"key": "21",
"doc_count": 248
},
{
"key": "16",
"doc_count": 195
},
{
"key": "0",
"doc_count": 191
},
{
"key": "13",
"doc_count": 176
},
{
"key": "3",
"doc_count": 168
},
{
"key": "20",
"doc_count": 159
},
{
"key": "11",
"doc_count": 148
}
]
}
As you can see I got buckets with key 6,8,9,10 and 12 in the response of first query but not in the second query which is very strange as documents returned by first query is a small subset of the second query. Is this a bug or am I missing something obvious?
Thanks

Elasticsearch sort on derivative aggregation to find trending topics

I am collecting twitter data and want to find the account that received the highest number of new followers during a period of 48 hours.
The index is populated regularly with account data and a count of followers, together with a datestamp.
I got so far as by getting a derivative value, but I can't figure out how to sort on the derivative so as to return the accounts with the highest derivatives (accounts which won or lost the highest amount of followers during the last 48 hours).
The mapping:
{
"twfollowers" : {
"mappings" : {
"twfollowers" : {
"properties" : {
"followers" : {
"type" : "long"
},
"logDate" : {
"type" : "date"
},
"screen_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"user_id" : {
"type" : "long"
}
}
}
}
}
}
And here is the query:
GET /twfollowers/twfollowers/_search
{
"query": {
"bool" : {
"must" : {
"range": {"logDate": {
"gte" : "now-2d/d",
"lt" : "now/d"
}}
}
}
},
"size": 0,
"aggs": {
"users": {
"terms": {
"field": "screen_name.keyword",
"size": 10
},
"aggs": {
"my_date_histo": {
"date_histogram": {
"field": "logDate",
"interval": "day"
},
"aggs": {
"the_sum": {
"max": {
"field": "followers"
}
},
"the_diff": {
"derivative": {
"buckets_path": "the_sum"
}
}
}
}
}
}
}
}
And finally the results:
{
"took": 12,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 129621,
"max_score": 0,
"hits": []
},
"aggregations": {
"users": {
"doc_count_error_upper_bound": 356,
"sum_other_doc_count": 122394,
"buckets": [
{
"key": "Sero83954560",
"doc_count": 968,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 268,
"the_sum": {
"value": 870
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 700,
"the_sum": {
"value": 873
},
"the_diff": {
"value": 3
}
}
]
}
},
{
"key": "Hajk",
"doc_count": 913,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 287,
"the_sum": {
"value": 1529
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 626,
"the_sum": {
"value": 1532
},
"the_diff": {
"value": 3
}
}
]
}
},
{
"key": "idagoraSE",
"doc_count": 831,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 173,
"the_sum": {
"value": 1165
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 658,
"the_sum": {
"value": 1166
},
"the_diff": {
"value": 1
}
}
]
}
},
{
"key": "Dodgeman2ever",
"doc_count": 779,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 235,
"the_sum": {
"value": 2017
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 544,
"the_sum": {
"value": 2031
},
"the_diff": {
"value": 14
}
}
]
}
},
{
"key": "KettilsMead",
"doc_count": 743,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 453,
"the_sum": {
"value": 2860
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 290,
"the_sum": {
"value": 2862
},
"the_diff": {
"value": 2
}
}
]
}
},
{
"key": "BWhalbergarvid",
"doc_count": 683,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 87,
"the_sum": {
"value": 200
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 596,
"the_sum": {
"value": 203
},
"the_diff": {
"value": 3
}
}
]
}
},
{
"key": "MorKarins",
"doc_count": 601,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 86,
"the_sum": {
"value": 3874
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 515,
"the_sum": {
"value": 3875
},
"the_diff": {
"value": 1
}
}
]
}
},
{
"key": "erlhel",
"doc_count": 593,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 218,
"the_sum": {
"value": 730
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 375,
"the_sum": {
"value": 730
},
"the_diff": {
"value": 0
}
}
]
}
},
{
"key": "SaveSweden2018",
"doc_count": 560,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 157,
"the_sum": {
"value": 711
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 403,
"the_sum": {
"value": 732
},
"the_diff": {
"value": 21
}
}
]
}
},
{
"key": "MXCartoons",
"doc_count": 556,
"my_date_histo": {
"buckets": [
{
"key_as_string": "2018-09-03T00:00:00.000Z",
"key": 1535932800000,
"doc_count": 40,
"the_sum": {
"value": 4313
}
},
{
"key_as_string": "2018-09-04T00:00:00.000Z",
"key": 1536019200000,
"doc_count": 516,
"the_sum": {
"value": 4315
},
"the_diff": {
"value": 2
}
}
]
}
}
]
}
}
}

How to calculate the total number of buckets that have "value" greater than 0?

I have this query that calculates the number of events per bucket. How can I calculate the total number of buckets that have value greater than 0?
GET myindex/_search?
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"PlateNumber": "111"
}
}
]
}
},
"aggs": {
"daily_intensity": {
"date_histogram": {
"field": "Datetime",
"interval": "day"
},
"aggs": {
"count_of_events": {
"value_count": {
"field": "Monthday"
}
}
}
}
}
}
This is the output that I get. The expected answer that I want to get is 26, because there are totally 26 elements in buckets that have value greater than 0. Basically I do not need the output of all buckets, I only need this total number.
{
"took": 237,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 98,
"max_score": 0,
"hits": []
},
"aggregations": {
"daily_intensity": {
"buckets": [
{
"key_as_string": "2017-05-01T00:00:00.000Z",
"key": 1493596800000,
"doc_count": 3,
"count_of_events": {
"value": 3
}
},
{
"key_as_string": "2017-05-02T00:00:00.000Z",
"key": 1493683200000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-03T00:00:00.000Z",
"key": 1493769600000,
"doc_count": 4,
"count_of_events": {
"value": 4
}
},
{
"key_as_string": "2017-05-04T00:00:00.000Z",
"key": 1493856000000,
"doc_count": 6,
"count_of_events": {
"value": 6
}
},
{
"key_as_string": "2017-05-05T00:00:00.000Z",
"key": 1493942400000,
"doc_count": 0,
"count_of_events": {
"value": 0
}
},
{
"key_as_string": "2017-05-06T00:00:00.000Z",
"key": 1494028800000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-07T00:00:00.000Z",
"key": 1494115200000,
"doc_count": 5,
"count_of_events": {
"value": 5
}
},
{
"key_as_string": "2017-05-08T00:00:00.000Z",
"key": 1494201600000,
"doc_count": 6,
"count_of_events": {
"value": 6
}
},
{
"key_as_string": "2017-05-09T00:00:00.000Z",
"key": 1494288000000,
"doc_count": 2,
"count_of_events": {
"value": 2
}
},
{
"key_as_string": "2017-05-10T00:00:00.000Z",
"key": 1494374400000,
"doc_count": 3,
"count_of_events": {
"value": 3
}
},
{
"key_as_string": "2017-05-11T00:00:00.000Z",
"key": 1494460800000,
"doc_count": 0,
"count_of_events": {
"value": 0
}
},
{
"key_as_string": "2017-05-12T00:00:00.000Z",
"key": 1494547200000,
"doc_count": 3,
"count_of_events": {
"value": 3
}
},
{
"key_as_string": "2017-05-13T00:00:00.000Z",
"key": 1494633600000,
"doc_count": 0,
"count_of_events": {
"value": 0
}
},
{
"key_as_string": "2017-05-14T00:00:00.000Z",
"key": 1494720000000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-15T00:00:00.000Z",
"key": 1494806400000,
"doc_count": 3,
"count_of_events": {
"value": 3
}
},
{
"key_as_string": "2017-05-16T00:00:00.000Z",
"key": 1494892800000,
"doc_count": 0,
"count_of_events": {
"value": 0
}
},
{
"key_as_string": "2017-05-17T00:00:00.000Z",
"key": 1494979200000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-18T00:00:00.000Z",
"key": 1495065600000,
"doc_count": 3,
"count_of_events": {
"value": 3
}
},
{
"key_as_string": "2017-05-19T00:00:00.000Z",
"key": 1495152000000,
"doc_count": 2,
"count_of_events": {
"value": 2
}
},
{
"key_as_string": "2017-05-20T00:00:00.000Z",
"key": 1495238400000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-21T00:00:00.000Z",
"key": 1495324800000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-22T00:00:00.000Z",
"key": 1495411200000,
"doc_count": 5,
"count_of_events": {
"value": 5
}
},
{
"key_as_string": "2017-05-23T00:00:00.000Z",
"key": 1495497600000,
"doc_count": 16,
"count_of_events": {
"value": 16
}
},
{
"key_as_string": "2017-05-24T00:00:00.000Z",
"key": 1495584000000,
"doc_count": 4,
"count_of_events": {
"value": 4
}
},
{
"key_as_string": "2017-05-25T00:00:00.000Z",
"key": 1495670400000,
"doc_count": 6,
"count_of_events": {
"value": 6
}
},
{
"key_as_string": "2017-05-26T00:00:00.000Z",
"key": 1495756800000,
"doc_count": 1,
"count_of_events": {
"value": 1
}
},
{
"key_as_string": "2017-05-27T00:00:00.000Z",
"key": 1495843200000,
"doc_count": 5,
"count_of_events": {
"value": 5
}
},
{
"key_as_string": "2017-05-28T00:00:00.000Z",
"key": 1495929600000,
"doc_count": 4,
"count_of_events": {
"value": 4
}
},
{
"key_as_string": "2017-05-29T00:00:00.000Z",
"key": 1496016000000,
"doc_count": 5,
"count_of_events": {
"value": 5
}
},
{
"key_as_string": "2017-05-30T00:00:00.000Z",
"key": 1496102400000,
"doc_count": 2,
"count_of_events": {
"value": 2
}
},
{
"key_as_string": "2017-05-31T00:00:00.000Z",
"key": 1496188800000,
"doc_count": 4,
"count_of_events": {
"value": 4
}
}
]
}
}
}
You can use Bucket Script Aggregation & Sum Bucket Aggregation to achieve this. Try below query.
GET myindex/_search?
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"PlateNumber": "111"
}
}
]
}
},
"aggs": {
"daily_intensity": {
"date_histogram": {
"field": "Datetime",
"interval": "day"
},
"aggs": {
"count_of_events": {
"value_count": {
"field": "Monthday"
}
},
"check": {
"bucket_script": {
"buckets_path": {
"count": "count_of_events"
},
"script": "return (params.count > 0 ? 1 : 0)"
}
}
}
},
"bucket_count": {
"sum_bucket": {
"buckets_path": "daily_intensity>check"
}
}
}
}

ElasticSearch Facets Terms Query float precision

I have some problems with the facets terms query (with ElasticSearch 1.7.0), all float values are right stored in the database and in the elasticsearch bulk too, but when I request the data I get the values like that "2.99000000954".
The strange thing is that when I put my request with "2.99000000954", the engine found the good Article related with this data, the article with the "2.99" value.
Please have a look on the below codes files and the curl call request:
Mapping (from _plugin/head)
"pvi": {
"include_in_all": false,
"type": "float",
"fields": {
"raw": {
"type": "float"
},
"sort": {
"type": "float"
}
}
}
elastic_bulk_Article_en_XXX.json0
{
"pvi": [
"2.99"
],
}
The curl call
curl -XGET 'http://elasticsearch:9200/entrepriseName_search_index_fr_fr/Article/_search' -d '{"query":{"filtered":{"query":{"match_all":{}},"filter":[]}},"aggs":{"pvi":{"filter":{"query":{"query_string":{"query":"*","fuzzy_prefix_length":1,"fields":["pvi"]}}},"aggs":{"pvi":{"terms":{"field":"pvi","size":25,"order":{"_term":"asc"}}}}}},"size":0}'
The curl call results
{
"aggregations": {
"pvi": {
"doc_count": 1007,
"pvi": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1
},
{
"key": 2.99000000954,
"doc_count": 1
},
{
"key": 3.99000000954,
"doc_count": 6
},
{
"key": 4.98999977112,
"doc_count": 33
},
{
"key": 5.98999977112,
"doc_count": 46
},
{
"key": 6.98999977112,
"doc_count": 11
},
{
"key": 7.98999977112,
"doc_count": 69
},
{
"key": 9.98999977112,
"doc_count": 78
},
{
"key": 12.9899997711,
"doc_count": 107
},
{
"key": 15.9899997711,
"doc_count": 135
},
{
"key": 17.9899997711,
"doc_count": 60
},
{
"key": 19.9899997711,
"doc_count": 158
},
{
"key": 22.9899997711,
"doc_count": 17
},
{
"key": 25.9899997711,
"doc_count": 143
},
{
"key": 27.9899997711,
"doc_count": 2
},
{
"key": 29.9899997711,
"doc_count": 70
},
{
"key": 35.9900016785,
"doc_count": 25
},
{
"key": 39,
"doc_count": 1
},
{
"key": 39.9900016785,
"doc_count": 28
},
{
"key": 49.9900016785,
"doc_count": 12
},
{
"key": 59.9900016785,
"doc_count": 3
},
{
"key": 69.9899978638,
"doc_count": 1
}
]
}
}
},
"query": null,
"checked": "{}"
}
I've found the solution, I changed the datatype from float to long and everything works!

Resources