ElasticSearch difference aggregation - elasticsearch

This is a piece of my data stored :
[
{
"name": "disk.device.write.requests",
"type": "cumulative",
"unit": "request",
"volume": 0,
"user_id": "b0407ee332f6474c87d1e666262d4783",
"project_id": "75ebb9556f8c4e36b0d3e722a57ff3bb",
"resource_id": "7837ab92-5eb7-4cdc-9da3-5f1d2a385841-hda",
"timestamp": "2021-11-14T13:28:45.873289",
"resource_metadata": {
"display_name": "ali",
"name": "instance-00000004",
"instance_id": "7837ab92-5eb7-4cdc-9da3-5f1d2a385841",
"instance_type": "Tochal",
"host": "b34b47c6129603ae3d0387bfa8bf8fe487b0a8424d7e3debb6c69b6d",
"instance_host": "os",
"flavor": {
"id": "t1",
"name": "Tochal",
"vcpus": 4,
"ram": 4096,
"disk": 40,
"ephemeral": 0,
"swap": 0
},
"status": "active",
"state": "running",
"task_state": "",
"image": {
"id": "f77ec16e-1c4e-4ed7-b340-b537ab008367"
},
"image_ref": "f77ec16e-1c4e-4ed7-b340-b537ab008367",
"image_ref_url": null,
"architecture": "x86_64",
"os_type": "hvm",
"vcpus": 4,
"memory_mb": 4096,
"disk_gb": 40,
"ephemeral_gb": 0,
"root_gb": 40,
"disk_name": "hda"
},
"source": "openstack",
"id": "cafd91ab-454e-11ec-b8ba-3b125e027b37",
"monotonic_time": null
}
...]
I've written an aggregation to group data in one hour by name field for a project:
{
index: 'cm',
size: 0,
pretty: true,
body: {
query: {
bool: {
must: [
{
match: {
project_id: '75ebb9556f8c4e36b0d3e722a57ff3bb',
},
},
{
range: {
timestamp: {
gte: 'now-1H',
},
},
},
],
},
},
aggs: {
names: {
terms: { field: 'name' },
aggs: {
myvalue: { sum: { field: 'volume' } },
},
},
},
},
}
And it's output is:
"aggregations": {
"names": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 4,
"buckets": [
{
"key": "disk.device.read.bytes",
"doc_count": 8,
"hatprice": {
"value": 2311073040
}
},
{
"key": "disk.device.read.requests",
"doc_count": 8,
"hatprice": {
"value": 68796
}
},
{
"key": "disk.device.write.bytes",
"doc_count": 8,
"hatprice": {
"value": 13117853696
}
},
{
"key": "disk.device.write.requests",
"doc_count": 8,
"hatprice": {
"value": 776618
}
},
{
"key": "cpu",
"doc_count": 4,
"hatprice": {
"value": 4541150000000
}
},
{
"key": "memory.usage",
"doc_count": 4,
"hatprice": {
"value": 1741
}
},
{
"key": "network.incoming.bytes",
"doc_count": 4,
"hatprice": {
"value": 532735722
}
},
{
"key": "network.incoming.packets",
"doc_count": 4,
"hatprice": {
"value": 864945
}
},
{
"key": "network.outgoing.bytes",
"doc_count": 4,
"hatprice": {
"value": 58562803
}
},
{
"key": "network.outgoing.packets",
"doc_count": 4,
"hatprice": {
"value": 439204
}
}
]
}
}
I need to aggregate base on volume field minus previous hour volume value.
Is it possible?
e.g: network.incoming.bytes in current hour. (not from first).

I think I've solved the problem using SERIAL_DIFF but not sure:
{
index: 'cm',
size: 0,
pretty: true,
body: {
query: {
bool: {
must: [
{
match: {
project_id: '75ebb9556f8c4e36b0d3e722a57ff3bb',
},
},
{
range: {
timestamp: {
gte: 'now-2H',
},
},
},
],
},
},
aggs: {
names: {
terms: { field: 'name' },
aggs: {
mydateh: {
date_histogram: {
field: 'timestamp',
calendar_interval: 'hour',
},
aggs: {
volrate: { max: { field: 'volume' } },
diff: {
serial_diff: {
buckets_path: 'volrate',
lag: 1,
},
},
},
},
},
},
},
},
}

Related

Using Vega with Elasticsearch data containing nested aggregations (or dividing one aggregation by another in Elasticsearch)

I'm trying to do something with Elasticsearch that should be quite simple. I have an index which contains documents of the shape: {"timestamp": int, "pricePerUnit": int, "units": int}. I want to visualize the average price over time in a histogram. Note that I don't want the average of the "pricePerUnit", I want the average price paid per unit, which means finding the total value in each time bucket by multiplying the "pricePerUnit" by the "units" for each document, and summing the total value sold in each document, then dividing by the sum of the total units sold in the time bucket to get the average price paid per unit. A standard Kibana line chart won't work. I can get the average "pricePerUnit * units", but can't divide this aggregation by the sum of the total units. Also can't be done in TSVB, as this doesn't allow for scripts/scripted fields. Can't use timelion, because the "timestamp" field isn't a time field (I know, but there's nothing I can do about it). I'm therefore trying to use Vega. However, I'm running into a problem with nested aggregations. Here's the ES query I'm running:
{
"$schema": "https://vega.github.io/schema/vega/v3.json",
"data": {
"name": "vals",
"url": {
"index": "index_name",
"body": {
"aggs": {
"2": {
"histogram": {
"field": "timestamp",
"interval": 2000,
"min_doc_count": 1
},
"aggs": {
"1": {
"avg": {
"field": "pricePerUnit",
"script": {
"inline": "doc['pricePerUnit'].value * doc['units'].value",
"lang": "painless"
}
}
}
}
}
},
"size": 0,
"stored_fields": [
"*"
],
"script_fields": {
"spend": {
"script": {
"source": "doc['pricePerUnit'].value * doc['units'].value",
"lang": "painless"
}
}
},
"docvalue_fields": [],
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"range": {
"timeslot.startTime": {
"gte": 1621292400,
"lt": 1621428349
}
}
}
],
"should": [],
"must_not": []
}
}
},
"format": {"property": "aggregations.2.buckets"}
}
}
,
"scales": [
{
"name": "yscale",
"type": "linear",
"zero": true,
"domain": {"data": "vals", "field": "1.value"},
"range": "height"
},
{
"name": "xscale",
"type": "time",
"range": "width"
}
],
"axes": [
{"scale": "yscale", "orient": "left"},
{"scale": "xscale", "orient": "bottom"}
],
"marks": [
{
"type": "line",
"encode": {
"update": {
"x": {"scale": "xscale", "field": "key"},
"y": {"scale": "yscale", "field": "1.value"}
}
}
}
]
}
It gives me the following result set:
"took": 1,
"timed_out": false,
"_shards": {
"total": 4,
"successful": 4,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 401,
"max_score": null,
"hits": []
},
"aggregations": {
"2": {
"buckets": [
{
"1": {
"value": 86340
},
"key": 1621316000,
"doc_count": 7
},
{
"1": {
"value": 231592.92307692306
},
"key": 1621318000,
"doc_count": 13
},
{
"1": {
"value": 450529.23529411765
},
"key": 1621320000,
"doc_count": 17
},
{
"1": {
"value": 956080.0555555555
},
"key": 1621322000,
"doc_count": 18
},
{
"1": {
"value": 1199865.5714285714
},
"key": 1621324000,
"doc_count": 14
},
{
"1": {
"value": 875300.7368421053
},
"key": 1621326000,
"doc_count": 19
},
{
"1": {
"value": 926738.8
},
"key": 1621328000,
"doc_count": 20
},
{
"1": {
"value": 3239475.3333333335
},
"key": 1621330000,
"doc_count": 18
},
{
"1": {
"value": 3798063.714285714
},
"key": 1621332000,
"doc_count": 21
},
{
"1": {
"value": 482089.5
},
"key": 1621334000,
"doc_count": 4
},
{
"1": {
"value": 222952.33333333334
},
"key": 1621336000,
"doc_count": 12
},
{
"1": {
"value": 742225.75
},
"key": 1621338000,
"doc_count": 8
},
{
"1": {
"value": 204203.25
},
"key": 1621340000,
"doc_count": 4
},
{
"1": {
"value": 294886
},
"key": 1621342000,
"doc_count": 4
},
{
"1": {
"value": 284393.75
},
"key": 1621344000,
"doc_count": 4
},
{
"1": {
"value": 462800.5
},
"key": 1621346000,
"doc_count": 4
},
{
"1": {
"value": 233321.2
},
"key": 1621348000,
"doc_count": 5
},
{
"1": {
"value": 436757.8
},
"key": 1621350000,
"doc_count": 5
},
{
"1": {
"value": 4569021
},
"key": 1621352000,
"doc_count": 1
},
{
"1": {
"value": 368489.5
},
"key": 1621354000,
"doc_count": 4
},
{
"1": {
"value": 208359.4
},
"key": 1621356000,
"doc_count": 5
},
{
"1": {
"value": 7827146.375
},
"key": 1621358000,
"doc_count": 8
},
{
"1": {
"value": 63873.5
},
"key": 1621360000,
"doc_count": 6
},
{
"1": {
"value": 21300
},
"key": 1621364000,
"doc_count": 1
},
{
"1": {
"value": 138500
},
"key": 1621366000,
"doc_count": 2
},
{
"1": {
"value": 5872400
},
"key": 1621372000,
"doc_count": 1
},
{
"1": {
"value": 720200
},
"key": 1621374000,
"doc_count": 1
},
{
"1": {
"value": 208634.33333333334
},
"key": 1621402000,
"doc_count": 3
},
{
"1": {
"value": 306248.5
},
"key": 1621404000,
"doc_count": 10
},
{
"1": {
"value": 328983.77777777775
},
"key": 1621406000,
"doc_count": 18
},
{
"1": {
"value": 1081724
},
"key": 1621408000,
"doc_count": 10
},
{
"1": {
"value": 2451076.785714286
},
"key": 1621410000,
"doc_count": 14
},
{
"1": {
"value": 1952910.2857142857
},
"key": 1621412000,
"doc_count": 14
},
{
"1": {
"value": 2294818.1875
},
"key": 1621414000,
"doc_count": 16
},
{
"1": {
"value": 2841910.388888889
},
"key": 1621416000,
"doc_count": 18
},
{
"1": {
"value": 2401278.9523809524
},
"key": 1621418000,
"doc_count": 21
},
{
"1": {
"value": 4311845.4
},
"key": 1621420000,
"doc_count": 5
},
{
"1": {
"value": 617102.5333333333
},
"key": 1621422000,
"doc_count": 15
},
{
"1": {
"value": 590469.7142857143
},
"key": 1621424000,
"doc_count": 14
},
{
"1": {
"value": 391918.85714285716
},
"key": 1621426000,
"doc_count": 14
},
{
"1": {
"value": 202163.66666666666
},
"key": 1621428000,
"doc_count": 3
}
]
}
}
}
The problem is that I can't extract the "value" field from the "1" sub-aggregation. I've tried using a flatten transform, but it doesn't seem to work. If anyone can either:
a) Tell me how to solve this specific problem with Vega; or
b) Tell me another way to solve my original problem
I'd be much obliged!
Your DSL query is looking great. If I've read this correctly I believe what you are looking for is a project transform. This can make life a lot easier when dealing with nested variables, as there are certain instances where they just don't function as expected.
You also need to reference data within marks otherwise it will plot nothing.
Below is how to fix this, you'll just need to add your url parameter in.
{
$schema: https://vega.github.io/schema/vega/v3.json
data: [
{
name: vals
url: ... // fill this in
transform: [
{
type: project
fields: [
1.value
doc_count
key
]
as: [
val
doc_count
key
]
}
]
}
]
scales: [
{
name: yscale
type: linear
zero: true
domain: {
data: vals
field: val
}
range: height
}
{
name: xscale
type: time
domain: {
data: vals
field: key
}
range: width
}
]
axes: [
{
scale: yscale
orient: left
}
{
scale: xscale
orient: bottom
}
]
marks: [
{
type: line
from: {
data: vals
}
encode: {
update: {
x: {
scale: xscale
field: key
}
y: {
scale: yscale
field: val
}
}
}
}
]
}
In future if you are having issues, look at the examples found on the Vega Gallery. They also have extensive documentation. These two combined is all you need.

Elasticsearch nested aggregation got too slow result

I have an index with a billions document in the future, for now it's around 20mil documents. It took over 10s to get the result while I need a query around 3-4s for billion documents. Is my structure was wrong or need to improve the query or server configuration? Im using amazon elasticsearch service.
This query will return amount/transactions/items of every station in every area
Query:
{
"size" : 0,
"query": {
"bool": {
"must":
[
{
"range": {
"date_sec": {
"gte": "1483228800",
"lte": "1525046400"
}
}
},
{
"range": {
"time_sec": {
"gte": "32400",
"lte": "75600"
}
}
}
]
}
},
"aggs": {
"numstoreamountclient" : {
"filter" : { "range" : { "amount" : { "gt" : 0 } } },
"aggs": {
"numstore_amountclient": {
"cardinality" : {
"field" : "id_station"
}
}
}
},
"id_station": {
"terms": {
"field": "id_station"
},
"aggs": {
"area_type": {
"terms": {
"field": "area_type"
},
"aggs": {
"max_time" : { "max" : { "field" : "time_sec" } },
"min_time" : { "min" : { "field" : "time_sec" } },
"amountclient": {
"sum": {
"field": "amount"
}
},
"itemclient": {
"sum": {
"field": "items"
}
},
"transactionclient" : {
"value_count" :
{
"field" : "id"
}
},
"inwatchinghour": {
"filter" : { "term" : { "in_watchinghour" : 1 } },
"aggs" : {
"amountclientwatch": {
"sum": {
"field": "amount"
}
},
"itemclient": {
"sum": {
"field": "items"
}
},
"transactionclientwatch" : {
"value_count" :
{
"field" : "id"
}
}
}
},
"saleclient": {
"filter" : {
"bool": {
"must":
[
{
"term" : { "in_watchinghour" : 1 }
},
{
"range": {
"items": {
"gt": "0"
}
}
},
{
"range": {
"amount": {
"gt": "0"
}
}
}
]
}
},
"aggs" : {
"sale_client" : {
"value_count" :
{
"field" : "id"
}
}
}
}
}
}
}
}
}
}
Result:
{
"took": 10757,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 19778330,
"max_score": 0,
"hits": []
},
"aggregations": {
"numstoreamountclient": {
"doc_count": 19677164,
"numstore_amountclient": {
"value": 35
}
},
"id_station": {
"doc_count_error_upper_bound": 437877,
"sum_other_doc_count": 11401869,
"buckets": [
{
"key": 2209,
"doc_count": 1456505,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1456505,
"saleclient": {
"doc_count": 708499,
"sale_client": {
"value": 708499
}
},
"inwatchinghour": {
"doc_count": 711435,
"transactionclientwatch": {
"value": 711435
},
"amountclientwatch": {
"value": 210203295816
},
"itemclient": {
"value": 4105206
}
},
"amountclient": {
"value": 427392789897
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 1456505
},
"itemclient": {
"value": 8402911
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2210,
"doc_count": 890590,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 890590,
"saleclient": {
"doc_count": 357520,
"sale_client": {
"value": 357520
}
},
"inwatchinghour": {
"doc_count": 358900,
"transactionclientwatch": {
"value": 358900
},
"amountclientwatch": {
"value": 89792941442
},
"itemclient": {
"value": 2146312
}
},
"amountclient": {
"value": 222577251265
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 890590
},
"itemclient": {
"value": 5346273
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2226,
"doc_count": 844491,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 844491,
"saleclient": {
"doc_count": 346801,
"sale_client": {
"value": 346801
}
},
"inwatchinghour": {
"doc_count": 347730,
"transactionclientwatch": {
"value": 347730
},
"amountclientwatch": {
"value": 90585228756
},
"itemclient": {
"value": 1817412
}
},
"amountclient": {
"value": 219008246857
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 844491
},
"itemclient": {
"value": 4409412
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2317,
"doc_count": 812409,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 812409,
"saleclient": {
"doc_count": 292933,
"sale_client": {
"value": 292933
}
},
"inwatchinghour": {
"doc_count": 294866,
"transactionclientwatch": {
"value": 294866
},
"amountclientwatch": {
"value": 105661613404
},
"itemclient": {
"value": 2144352
}
},
"amountclient": {
"value": 290725384084
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 812409
},
"itemclient": {
"value": 5925558
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2211,
"doc_count": 811198,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 811198,
"saleclient": {
"doc_count": 262617,
"sale_client": {
"value": 262617
}
},
"inwatchinghour": {
"doc_count": 265515,
"transactionclientwatch": {
"value": 265515
},
"amountclientwatch": {
"value": 70763222934
},
"itemclient": {
"value": 1783073
}
},
"amountclient": {
"value": 213071496626
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 811198
},
"itemclient": {
"value": 5476443
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2331,
"doc_count": 806670,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 806670,
"saleclient": {
"doc_count": 349472,
"sale_client": {
"value": 349472
}
},
"inwatchinghour": {
"doc_count": 350285,
"transactionclientwatch": {
"value": 350285
},
"amountclientwatch": {
"value": 82784018110
},
"itemclient": {
"value": 2079211
}
},
"amountclient": {
"value": 192804137579
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 806670
},
"itemclient": {
"value": 4834069
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2323,
"doc_count": 749161,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 749161,
"saleclient": {
"doc_count": 280928,
"sale_client": {
"value": 280928
}
},
"inwatchinghour": {
"doc_count": 282498,
"transactionclientwatch": {
"value": 282498
},
"amountclientwatch": {
"value": 62082735118
},
"itemclient": {
"value": 1588445
}
},
"amountclient": {
"value": 162365212278
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 749161
},
"itemclient": {
"value": 4231490
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2345,
"doc_count": 727589,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 727589,
"saleclient": {
"doc_count": 340141,
"sale_client": {
"value": 340141
}
},
"inwatchinghour": {
"doc_count": 341590,
"transactionclientwatch": {
"value": 341590
},
"amountclientwatch": {
"value": 107492036777
},
"itemclient": {
"value": 2421158
}
},
"amountclient": {
"value": 228611232646
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 727589
},
"itemclient": {
"value": 5138628
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2329,
"doc_count": 663856,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 663856,
"saleclient": {
"doc_count": 163358,
"sale_client": {
"value": 163358
}
},
"inwatchinghour": {
"doc_count": 164339,
"transactionclientwatch": {
"value": 164339
},
"amountclientwatch": {
"value": 55298080357
},
"itemclient": {
"value": 1209514
}
},
"amountclient": {
"value": 211070998632
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 663856
},
"itemclient": {
"value": 4875689
},
"min_time": {
"value": 32400
}
}
]
}
},
{
"key": 2355,
"doc_count": 613992,
"area_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 613992,
"saleclient": {
"doc_count": 113575,
"sale_client": {
"value": 113575
}
},
"inwatchinghour": {
"doc_count": 114038,
"transactionclientwatch": {
"value": 114038
},
"amountclientwatch": {
"value": 30494132488
},
"itemclient": {
"value": 563628
}
},
"amountclient": {
"value": 140705052880
},
"max_time": {
"value": 75600
},
"transactionclient": {
"value": 613992
},
"itemclient": {
"value": 2920908
},
"min_time": {
"value": 32400
}
}
]
}
}
]
}
}
}

elasticsearch nested aggregation inside a reverse nested aggregation

Elasticsearch version: 2.3.1
JVM version: 1.8.0_66 / 25.66-b17
OS version: Mac OS X 10.11.4
I am having trouble getting the correct values to show up in a 4 level deep aggregation scenario where the first two levels are nested, the third is reverse_nested, and the fourth is nested again.
Here is my index mapping:
curl -XDELETE localhost:9200/orders-d
curl -XPUT localhost:9200/orders-d
curl -XPUT localhost:9200/orders-d/order-d/_mapping -d '{
"order-d": {
"properties": {
"id": {
"type": "string"
},
"orderNumber": {
"type": "string"
},
"groupId": {
"type": "string"
},
"groupOrderNumber": {
"type": "string"
},
"dateCreated": {
"type": "date"
},
"dateUpdated": {
"type": "date"
},
"location": {
"type": "object"
},
"orderSubmitter": {
"type": "object"
},
"distributor": {
"type": "object"
},
"salesRep": {
"type": "object"
},
"status": {
"type": "string"
},
"total": {
"type": "double"
},
"isTTOrder": {
"type": "boolean"
},
"lineItems": {
"type": "nested",
"include_in_parent": true,
"properties": {
"product": {
"type": "object"
},
"category": {
"type": "object"
},
"subCategory": {
"type": "object"
},
"quantity": {
"type": "double"
},
"unitPrice": {
"type": "double"
},
"totalPrice": {
"type": "double"
},
"pricedByUnitPrice": {
"type": "double"
}
}
}
}
}
}'
Here are the documents:
curl -XPUT localhost:9200/orders-d/order-d/0 -d '{
"id": "571652632a19085c008b4577",
"orderNumber": "1617590686",
"groupId": "571652632a19085c008b4578",
"groupOrderNumber": "3485944627",
"dateCreated": "2016-04-19",
"dateUpdated": null,
"location": {
"id": "54e53853505eb66b008b4569",
"name": "Andrews Diner"
},
"orderSubmitter": {
"id": "54e53853505eb66b008b4567",
"name": "Kostantino Plaitis"
},
"distributor": {
"id": "55c3879459ad0c63008b4569",
"name": "Performance Foodservice Metro NY"
},
"salesRep": null,
"status": "pending",
"total": 5410.21,
"isTTOrder": true,
"lineItems": [{
"product": {
"id": "55bfb445c440b26a008b4571",
"name": "Sabrett Sauerkraut 12 x 2 lb bags"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4586",
"name": "Other Sauces Dipping\/Condiments\/Savoury Toppings\/Savoury Spreads\/Marinades (Perishable)"
},
"quantity": 1,
"unitPrice": 25.24,
"totalPrice": 25.24,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55bc219238c0376e008b4570",
"name": "Franks Red Hot Cayenne Pepper Sauce 4 x 1 gallon"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4606",
"name": "Other Sauces Dipping\/Condiments\/Savoury Toppings\/Savoury Spreads\/Marinades (Shelf Stable)"
},
"quantity": 1,
"unitPrice": 45.06,
"totalPrice": 45.06,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "56d76c41bd821fda008b459a",
"name": "Cereal, Classic Variety Pack, Kelloggs 1\/60 ct."
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b462d",
"name": "Grains\/Cereal - Ready to Eat - (Shelf Stable)"
},
"quantity": 1,
"unitPrice": 56.03,
"totalPrice": 56.03,
"pricedByUnitPrice": 0
}]
}'
curl -XPUT localhost:9200/orders-d/order-d/0 -d '{
"id": "571652632a19085c008b4576",
"orderNumber": "2041063294",
"groupId": "571652632a19085c008b4578",
"groupOrderNumber": "3485944627",
"dateCreated": "2016-04-19",
"dateUpdated": null,
"location": {
"id": "54e53853505eb66b008b4569",
"name": "Andrews Diner"
},
"orderSubmitter": {
"id": "54e53853505eb66b008b4567",
"name": "Kostantino Plaitis"
},
"distributor": {
"id": "55cdeece0a41216c008b4583",
"name": "Driscoll Foods"
},
"salesRep": null,
"status": "pending",
"total": 7575.27,
"isTTOrder": true,
"lineItems": [{
"product": {
"id": "55ad05e08d28c36b008b456c",
"name": "Pepper 3000 pcs"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b4582",
"name": "Herbs\/Spices (Shelf Stable)"
},
"quantity": 3,
"unitPrice": 8.95,
"totalPrice": 26.85,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55b3a12f6b415c68008b4568",
"name": "Venice Maid Deluxe Corned Beef Hash 6 x 6 lb 10 oz"
},
"category": {
"id": "53df846c3b8e77710e7b23f7",
"name": "Meat"
},
"subCategory": {
"id": "54d8c56a279871b9078b4581",
"name": "Beef - Prepared\/Processed"
},
"quantity": 1,
"unitPrice": 59.75,
"totalPrice": 59.75,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55b145798c26dc69008b4568",
"name": "Aladdin Bakers Sesame Bread Sticks 150 x 2 packs"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b45b0",
"name": "Dried Breads (Shelf Stable)"
},
"quantity": 8,
"unitPrice": 15.5,
"totalPrice": 124,
"pricedByUnitPrice": 0
}, {
"product": {
"id": "55ad074a8d28c36f008b456d",
"name": "Smuckers Breakfast Syrup 100 cups"
},
"category": {
"id": "53df845b3b8e77710e7b23ec",
"name": "Groceries & Dry Food"
},
"subCategory": {
"id": "53e1e8723b8e77a52b8b457d",
"name": "Syrup\/Treacle\/Molasses (Shelf Stable)"
},
"quantity": 10,
"unitPrice": 8.95,
"totalPrice": 89.5,
"pricedByUnitPrice": 0
}]
}'
Here is my query:
curl -XPOST localhost:9200/orders-d/_search -d '{
"from": 0,
"size": 0,
"aggregations": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"terms": {
"field": "lineItems.category.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"terms": {
"field": "lineItems.product.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"terms": {
"field": "distributor.id",
"size": 0
},
"aggs": {
"totalLineItems": {
"aggs": {
"totalLineItems": {
"sum": {
"field": "lineItems.totalPrice"
}
}
},
"nested": {
"path": "lineItems"
}
}
}
}
},
"reverse_nested": {}
}
}
}
}
}
},
"nested": {
"path": "lineItems"
}
}
},
"query": {
"bool": {
"must": [{
"range": {
"dateCreated": {
"format": "yyyy-MM-dd",
"gte": "2016-01-01",
"lte": "2016-04-30"
}
}
}]
}
}
}'
...and here are my results:
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "53df845b3b8e77710e7b23ec",
"doc_count": 3,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55ad05e08d28c36b008b456c",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}, {
"key": "55ad074a8d28c36f008b456d",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}, {
"key": "55b145798c26dc69008b4568",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}]
}
}, {
"key": "53df846c3b8e77710e7b23f7",
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55b3a12f6b415c68008b4568",
"doc_count": 1,
"totalLineItems": {
"doc_count": 1,
"totalLineItems": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "55cdeece0a41216c008b4583",
"doc_count": 1,
"totalLineItems": {
"doc_count": 4,
"totalLineItems": {
"value": 300.1
}
}
}]
}
}
}]
}
}]
}
}
}
}
As you can see from the results, all the aggregated values for each drilldown of totalLineItems have the same exact value. This is obviously incorrect.
Did I do something wrong, is it a bug, or is nesting inside a reverse nesting unsupported?

elasticsearch terms and sum aggregation

I have documents in elasticsearch (1.5) that looks like:
{
"gender": [
{
"name": "unknown",
"value": 12
},
{
"name": "male",
"value": 89
},
{
"name": "female",
"value": 84
}
]
}
not all of the documents contains the three options (male/female/unknown)
i would like to get the sum of all values per each gender name. like that:
{
"buckets": [
{
"key": "unknown",
"doc_count": 112,
"gender_a": {
"value": 462
}
},
{
"key": "male",
"doc_count": 107,
"gender_a": {
"value": 438
}
},
{
"key": "female",
"doc_count": 36,
"gender_a": {
"value": 186
}
}
]
}
i tried this query:
{
"aggs": {
"gender_name": {
"terms": {
"field": "gender.name"
},
"aggs": {
"gender_sum": {
"sum": {
"field": "gender.value"
}
}
}
}
}
}
but something weird is going on, and i don't get the right values.
any idea what i am missing ?
You will probably need to make sure that your "gender" property has type "nested". With that, I was able to make the following do what I think you're asking.
First I set up a simple index:
PUT /test_index
{
"mappings": {
"doc": {
"properties": {
"gender": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"value": {
"type": "long"
}
}
}
}
}
}
}
Then added a couple of docs:
PUT /test_index/doc/1
{
"gender": [
{
"name": "unknown",
"value": 12
},
{
"name": "male",
"value": 89
},
{
"name": "female",
"value": 84
}
]
}
PUT /test_index/doc/2
{
"gender": [
{
"name": "male",
"value": 8
},
{
"name": "female",
"value": 4
}
]
}
Then I was able to get total counts by gender name as follows:
POST /test_index/_search?search_type=count
{
"aggs": {
"genders": {
"nested": {
"path": "gender"
},
"aggs": {
"gender_terms": {
"terms": {
"field": "gender.name"
},
"aggs": {
"gender_name_value_sums": {
"sum": {
"field": "gender.value"
}
}
}
}
}
}
}
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"genders": {
"doc_count": 5,
"gender_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "female",
"doc_count": 2,
"gender_name_value_sums": {
"value": 88,
"value_as_string": "88.0"
}
},
{
"key": "male",
"doc_count": 2,
"gender_name_value_sums": {
"value": 97,
"value_as_string": "97.0"
}
},
{
"key": "unknown",
"doc_count": 1,
"gender_name_value_sums": {
"value": 12,
"value_as_string": "12.0"
}
}
]
}
}
}
}
Here is the code I used to test it:
http://sense.qbox.io/gist/d4533215806b858aa2cc1565546d167fdec3c973

Find min value of field in nested array object after aggregation

I would like to find the minimum value of a field in a nested array object after aggregation.
Data example:
[
{
"id": "i1",
"version": 1,
"entries": [
{
"name": "n1",
"position": 1
}, {
"name": "n2",
"position": 2
}
]
}, {
"id": "i1"
"version": 2,
"entries": [
{
"name": "n2",
"position": 3
}, {
"name": "n3",
"position": 4
}
]
},
{
"id": "i2",
"version": 1,
"entries": [
{
"name": "n1",
"position": 8
}, {
"name": "n2",
"position": 7
}
]
}, {
"id": "i2"
"version": 2,
"entries": [
{
"name": "n2",
"position": 6
}, {
"name": "n3",
"position": 5
}
]
}
]
Pseudo Query:
SELECT min(entries["n2"].position) WHERE entries.name="n2" GROUP BY id;
Expected Result:
[
{
"id": "i1",
"min(position)": 2
}, {
"id": "i2",
"min(position)": 6
}
]
I can do this in code, but it's not performant, as I need to return the document sources which can be quite large.
I am thinking of denormalizing the data, but would like to first know if this request is not possible at all.
You can do it by nesting several aggregations like this:
terms agg -> nested agg -> filter agg -> min agg
To test it I set up an index:
PUT /test_index
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"doc": {
"properties": {
"entries": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"position": {
"type": "long"
}
}
},
"id": {
"type": "string"
},
"version": {
"type": "long"
}
}
}
}
}
And indexed your docs:
PUT /test_index/doc/_bulk
{"index":{"_id":1}}
{"id":"i1","version":1,"entries":[{"name":"n1","position":1},{"name":"n2","position":2}]}
{"index":{"_id":2}}
{"id":"i1","version":2,"entries":[{"name":"n2","position":3},{"name":"n3","position":4}]}
{"index":{"_id":3}}
{"id":"i2","version":1,"entries":[{"name":"n1","position":8},{"name":"n2","position":7}]}
{"index":{"_id":4}}
{"id":"i2","version":2,"entries":[{"name":"n2","position":6},{"name":"n3","position":5}]}
Here is the query:
POST /test_index/_search?search_type=count
{
"aggs": {
"id_terms": {
"terms": {
"field": "id"
},
"aggs": {
"nested_entries": {
"nested": {
"path": "entries"
},
"aggs": {
"filter_name": {
"filter": {
"term": {
"entries.name": "n2"
}
},
"aggs": {
"min_position": {
"min": {
"field": "position"
}
}
}
}
}
}
}
}
}
}
and the result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0,
"hits": []
},
"aggregations": {
"id_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "i1",
"doc_count": 2,
"nested_entries": {
"doc_count": 4,
"filter_name": {
"doc_count": 2,
"min_position": {
"value": 2,
"value_as_string": "2.0"
}
}
}
},
{
"key": "i2",
"doc_count": 2,
"nested_entries": {
"doc_count": 4,
"filter_name": {
"doc_count": 2,
"min_position": {
"value": 6,
"value_as_string": "6.0"
}
}
}
}
]
}
}
}
Here is the code I used all together:
http://sense.qbox.io/gist/34a013099ef07fb527d9d7cf8490ad1bbafa718b

Resources