Elasticsearch - Count duplicated and unique values for a nested field

Elasticsearch - Count duplicated and unique values for a nested field - elasticsearch

Elasticsearch - Count duplicated and unique values
I need also same kind of count but that field is in nested properties as
[{
"firstname": "john",
"lastname": "doe",
"addressList": [{
"addressId": 39640,
"txt": "sdf",
}, {
"addressId": 39641,
"txt": "NEW",
}, {
"addressId": 39640,
"txt": "sdf",
}, {
"addressId": 39641,
"txt": "NEW"
}
]
}, {
"firstname": "jane",
"lastname": "smith",
"addressList": [{
"addressId": 39644,
"txt": "sdf",
}, {
"addressId": 39642,
"txt": "NEW",
}, {
"addressId": 39644,
"txt": "sdf",
}, {
"addressId": 39642,
"txt": "NEW"
}
]
}
]
what would be the query for addressId duplicate counts ? Need you help on this user:3838328

I got the answer for nested field duplicate counts as
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"prop_counts": {
"nested": {
"path": "addressList"
},
"aggs": {
"duplicate_aggs": {
"terms": {
"field": "addressList.addressId",
"min_doc_count": 2,
"size": 100 <----- Note this
}
},
"duplicate_bucketcount": {
"stats_bucket": {
"buckets_path": "duplicate_aggs._count"
}
},
"nonduplicate_aggs": {
"terms": {
"field": "addressList.addressId",
"size": 100 <---- Note this
},
"aggs": {
"equal_one": {
"bucket_selector": {
"buckets_path": {
"count": "_count"
},
"script": "params.count == 1"
}
}
}
},
"nonduplicate_bucketcount": {
"sum_bucket": {
"buckets_path": "nonduplicate_aggs._count"
}
}
}
}
}
}
Response as
{
"took": 4,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop_counts": {
"doc_count": 8,
"duplicate_aggs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": 39640,
"doc_count": 2
}, {
"key": 39641,
"doc_count": 2
}, {
"key": 39644,
"doc_count": 2
}, {
"key": 39642,
"doc_count": 2
}
]
},
"nonduplicate_aggs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
},
"duplicate_bucketcount": {
"count": 4,
"min": 2,
"max": 2,
"avg": 2,
"sum": 8
},
"nonduplicate_bucketcount": {
"value": 0
}
}
}
}

Related

Elasticsearch - Count number of occurrence perd field per document

Is it possible to calculate the number of occurence of distinct values in a list field.
For example, let the following data:
[
{
"page":1,
"colors":[
{
"color": red
},
{
"color": white
},
{
"color": red
}
]
},
{
"page":2,
"colors":[
{
"color": yellow
},
{
"color": yellow
}
]
}
]
Is it possible to get a result as the follwing:
{
"page":1,
"colors_count":[
{
"Key": red,
"Count": 2
},
{
"Key": white,
"Count": 1
},
]
},
{
"page":2,
"colors_count":[
{
"Key": yellow,
"Count": 2
}
]
}
I tried using term aggregation but I got the number of distinct values, so for page:1 i got red:1 and white:1.

Yes, you can do it. you will have to use nested_field type and nested_Agg
Mapping:
PUT colors
{
"mappings": {
"properties": {
"page" : { "type": "keyword" },
"colors": {
"type": "nested",
"properties": {
"color": {
"type": "keyword"
}
}
}
}
}
}
Insert Documents:
PUT colors/_doc/1
{
"page": 1,
"colors": [
{
"color": "red"
},
{
"color": "white"
},
{
"color": "red"
}
]
}
PUT colors/_doc/2
{
"page": 2,
"colors": [
{
"color": "yellow"
},
{
"color": "yellow"
}
]
}
Query:
GET colors/_search
{
"size" :0,
"aggs": {
"groupByPage": {
"terms": {
"field": "page"
},
"aggs": {
"colors": {
"nested": {
"path": "colors"
},
"aggs": {
"genres": {
"terms": {
"field": "colors.color"
}
}
}
}
}
}
}
}
Output:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"groupByPage": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1", // page field value
"doc_count": 1,
"colors": {
"doc_count": 3,
"genres": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "red",
"doc_count": 2
},
{
"key": "white",
"doc_count": 1
}
]
}
}
},
{
"key": "2", // page field value
"doc_count": 1,
"colors": {
"doc_count": 2,
"genres": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "yellow",
"doc_count": 2
}
]
}
}
}
]
}
}
}

Convert sql query to elasticsearch

I need to convert this query into elastic search, but I am facing the problem that in elastic search (having) is not supported yet.
Select sum(count) as count,prop1
from
(
SELECT Count(*) as count,prop1 FROM [table1] group by prop1,prop2
having count = 1
)
group by prop1
order by count desc limit 10
I try this query in elastic search:
`GET /analytics_data/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term":
{
"field": "test"
}
}
]
}
},
"aggs": {
"aggregation": {
"terms": {
"field": "prop1"
},
"aggs": {
"subaggregation": {
"terms": {
"field": "prop2",
"order": {
"_count": "desc"
}
}
},
"test":{
"bucket_selector": {
"buckets_path":
{
"test1": "_count"
},
"script":"params.test1 == 1"
}
}
}
}
}
}`
Here is the mapping that I use:
PUT /index
{
"mappings" : {
"timeline" : {
"properties" : {
"prop1" : {
"type" : "keyword"
},
"prop2" : {
"type" : "keyword"
}
}
}
}
}
but I cannot get the sub-aggregation buckets who have count == 1
Here is the output of the suggested answer :
{
"took": 344,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 852146,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop1": {
"doc_count_error_upper_bound": 646,
"sum_other_doc_count": 37299,
"buckets": [
{
"key": "porp1-key",
"doc_count": 348178,
"prop2": {
"doc_count_error_upper_bound": 130,
"sum_other_doc_count": 345325,
"buckets": [
{
"key": "e1552d2d-da84-4588-9b65-16c33848bb94_1",
"doc_count": 558,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "04b1a8eb-f876-459b-af9b-855493318dca_426",
"doc_count": 383,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "b165d2c7-6a23-4a4d-adbb-3b2a79d4c627_80",
"doc_count": 344,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "c4ea55dc-c3b3-492b-98a2-1ad004212c3d_99",
"doc_count": 297,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "dfc1ae22-5c7f-49ab-8488-207661b43716_294",
"doc_count": 264,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "28815490-e7ce-420b-bab8-57a6ffc3f56a_572",
"doc_count": 239,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "c3c56ec8-e0ff-46ea-841d-cc22b2dc65f6_574",
"doc_count": 217,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "473289b8-fb73-4cbb-b8d7-a5386846745f_34",
"doc_count": 187,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "670cb862-7976-4fd5-ba3f-3f8b7c03d615_11",
"doc_count": 185,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
},
{
"key": "41870755-96dd-4a00-ab76-632a1dfaecb5_341",
"doc_count": 179,
"prop2_count": {
"value": 0
},
"prop2_check": {
"value": 0
}
}
]
},
"final": {
"value": 0
}
} ]
}
}
}

Try this. Aggregation final will give you the desired output.
GET /analytics_data/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"field": "test"
}
}
]
}
},
"aggs": {
"prop1": {
"terms": {
"field": "prop1",
"size": 10
},
"aggs": {
"prop2": {
"terms": {
"field": "prop2",
"size": 10
},
"aggs": {
"prop2_count": {
"value_count": {
"field": "prop2"
}
},
"prop2_check": {
"bucket_script": {
"buckets_path": {
"count": "prop2_count.value"
},
"script": "(params.count == 1) ? 1 : 0"
}
}
}
},
"final": {
"sum_bucket": {
"buckets_path": "prop2>prop2_check"
}
}
}
}
}
}
Working code :
PUT prop
{
"mappings": {
"prop": {
"properties": {
"prop1": {
"type": "keyword"
},
"prop2": {
"type": "keyword"
}
}
}
}
}
POST _bulk
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q1"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q2"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p1","prop2":"q2"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p2","prop2":"q5"}
{"index":{"_index":"prop","_type":"prop"}}
{"prop1":"p2","prop2":"q6"}
GET prop/prop/_search
{
"size": 0,
"aggs": {
"prop1": {
"terms": {
"field": "prop1",
"size": 10
},
"aggs": {
"prop2": {
"terms": {
"field": "prop2",
"size": 10
},
"aggs": {
"prop2_count": {
"value_count": {
"field": "prop2"
}
},
"prop2_check": {
"bucket_script": {
"buckets_path": {
"count": "prop2_count.value"
},
"script": "(params.count == 1) ? 1 : 0"
}
}
}
},
"final":{
"sum_bucket": {
"buckets_path": "prop2>prop2_check"
}
}
}
}
}
}
Output :
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 5,
"max_score": 0,
"hits": []
},
"aggregations": {
"prop1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "p1",
"doc_count": 3,
"prop2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "q2",
"doc_count": 2,
"prop2_count": {
"value": 2
},
"prop2_check": {
"value": 0
}
},
{
"key": "q1",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
}
]
},
"final": {
"value": 1
}
},
{
"key": "p2",
"doc_count": 2,
"prop2": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "q5",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
},
{
"key": "q6",
"doc_count": 1,
"prop2_count": {
"value": 1
},
"prop2_check": {
"value": 1
}
}
]
},
"final": {
"value": 2
}
}
]
}
}
}

query for elasticsearch returning count

I am struggling to create the query/rule that will help me create an alerting script. I want to query the elasticsearch API for counts on a specific index so that I can get alerted when the count reaches a certain threshold.
The following query is an attempt as I have no experience with this:
{
"query": {
"filtered": {
"query": {
"query_string": {
"analyze_wildcard": true,
"query": "*"
}
},
"filter": {
"bool": {
"must": [
{
"query": {
"match": {
"PStream": {
"query": "*",
"type": "phrase"
}
}
}
},
{
"range": {
"#timestamp": {
"gte": 1447789445320,
"lte": 1447793045320
}
}
}
],
"must_not": []
}
}
}
},
"highlight": {
"pre_tags": [
"#kibana-highlighted-field#"
],
"post_tags": [
"#/kibana-highlighted-field#"
],
"fields": {
"*": {}
},
"fragment_size": 2147483647
},
"size": 500,
"sort": [
{
"#timestamp": {
"order": "desc",
"unmapped_type": "boolean"
}
}
],
"aggs": {
"2": {
"date_histogram": {
"field": "#timestamp",
"interval": "1m",
"pre_zone": "-05:00",
"pre_zone_adjust_large_interval": true,
"min_doc_count": 0,
"extended_bounds": {
"min": 1447789445317,
"max": 1447793045317
}
}
}
},
The field PStream is the field that I am focused on
EDIT:
An example of the data going to the index:
{
"_index": "logstash-2015.11.17",
"_type": "logs",
"_id": "AVEXMKu2YVnF1NOjr9YT",
"_score": null,
"_source": {
"authorUrl": "",
"postUrl": "",
"pubDate": "2015-11-17T15:18:24",
"scrapeDate": "2015-11-17T15:44:03",
"clientId": "136902834",
"query": "Jenny Balatsinou",
"PType": "post",
"tLatency": 1539,
"PLang": "en",
"PStream": "864321",
"PName": "xStackOverflow",
"#version": "1",
"#timestamp": "2015-11-17T20:44:03.400Z"
},
"fields": {
"#timestamp": [
1447793043400
],
"pubDate": [
1447773504000
],
"scrapeDate": [
1447775043000
]
},
"sort": [
1447793043400
]
there are about 20 million of these messages getting indexed daily into Elasticsearch. I have created a dashboard in Kibana where I view this data and stats. I would like to write the proper query that I can use in a java program that periodically runs and checks this index using this query. It should return the hourly total count grouped by the PStream variable which has multiple values. So anytime the value is 0 it will send an alert.
Eg. Output:
"result": {
"total": 74,
"successful": 63,
"failed": 11,
{
{
"index": "logstash-2015.11.08",
"PStream": "37647338933",
"Count": 1234532
},
{
"index": "logstash-2015.11.08",
"PStream": "45345343566",
"Count": 156532
},

As a quick example (per comments above), I just set up a trivial index:
DELETE /test_index
PUT /test_index
added some (simplified) data:
PUT /test_index/doc/_bulk
{"index":{"_id":1}}
{"PStream": "864321","#timestamp": "2015-11-17T20:44:03.400Z"}
{"index":{"_id":2}}
{"PStream": "864321","#timestamp": "2015-11-17T21:44:03.400Z"}
{"index":{"_id":3}}
{"PStream": "864321","#timestamp": "2015-11-17T20:44:03.400Z"}
{"index":{"_id":4}}
{"PStream": "864322","#timestamp": "2015-11-17T21:44:03.400Z"}
And now I can get the "PStream" terms inside an hour histogram:
POST /test_index/_search
{
"size": 0,
"aggs" : {
"timestamp_histogram" : {
"date_histogram" : {
"field" : "#timestamp",
"interval" : "hour"
},
"aggs": {
"pstream_terms": {
"terms": {
"field": "PStream"
}
}
}
}
}
}
...
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0,
"hits": []
},
"aggregations": {
"timestamp_histogram": {
"buckets": [
{
"key_as_string": "2015-11-17T20:00:00.000Z",
"key": 1447790400000,
"doc_count": 2,
"pstream_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "864321",
"doc_count": 2
}
]
}
},
{
"key_as_string": "2015-11-17T21:00:00.000Z",
"key": 1447794000000,
"doc_count": 2,
"pstream_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "864321",
"doc_count": 1
},
{
"key": "864322",
"doc_count": 1
}
]
}
}
]
}
}
}
or the other way around:
POST /test_index/_search
{
"size": 0,
"aggs": {
"pstream_terms": {
"terms": {
"field": "PStream"
},
"aggs": {
"timestamp_histogram": {
"date_histogram": {
"field": "#timestamp",
"interval": "hour"
}
}
}
}
}
}
...
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 4,
"max_score": 0,
"hits": []
},
"aggregations": {
"pstream_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "864321",
"doc_count": 3,
"timestamp_histogram": {
"buckets": [
{
"key_as_string": "2015-11-17T20:00:00.000Z",
"key": 1447790400000,
"doc_count": 2
},
{
"key_as_string": "2015-11-17T21:00:00.000Z",
"key": 1447794000000,
"doc_count": 1
}
]
}
},
{
"key": "864322",
"doc_count": 1,
"timestamp_histogram": {
"buckets": [
{
"key_as_string": "2015-11-17T21:00:00.000Z",
"key": 1447794000000,
"doc_count": 1
}
]
}
}
]
}
}
}
Here's the code I used:
http://sense.qbox.io/gist/6c0c30db1cf0fb8529bcfec21c0ce5c02a5ae94c

Elasticsearch include field in result set of aggregation

How can field of type string be included in the result set of an aggregation?
For example given the following mapping:
{
"sport": {
"mappings": {
"runners": {
"properties": {
"name": {
"type": "string"
},
"city": {
"type": "string"
},
"region": {
"type": "string"
},
"sport": {
"type": "string"
}
}
}
}
}
}
Sample data:
curl -XPOST "http://localhost:9200/sport/_bulk" -d'
{"index":{"_index":"sport","_type":"runner"}}
{"name":"Gary", "city":"New York","region":"A","sport":"Soccer"}
{"index":{"_index":"sport","_type":"runner"}}
{"name":"Bob", "city":"New York","region":"A","sport":"Tennis"}
{"index":{"_index":"sport","_type":"runner"}}
{"name":"Mike", "city":"Atlanta","region":"B","sport":"Soccer"}
'
How can the field name be included in result set of the aggregation:
{
"size": 0,
"aggregations": {
"agg": {
"terms": {
"field": "city"}
}
}
}

This seems to do what you want, if I'm understanding you correctly:
POST /sport/_search
{
"size": 0,
"aggregations": {
"city_terms": {
"terms": {
"field": "city"
},
"aggs": {
"name_terms": {
"terms": {
"field": "name"
}
}
}
}
}
}
With the data you provided, it returns:
{
"took": 43,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 0,
"hits": []
},
"aggregations": {
"city_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "new",
"doc_count": 2,
"name_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "bob",
"doc_count": 1
},
{
"key": "gary",
"doc_count": 1
}
]
}
},
{
"key": "york",
"doc_count": 2,
"name_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "bob",
"doc_count": 1
},
{
"key": "gary",
"doc_count": 1
}
]
}
},
{
"key": "atlanta",
"doc_count": 1,
"name_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "mike",
"doc_count": 1
}
]
}
}
]
}
}
}
(You may want to add "index":"not_analyzed" to one or both fields in your mapping, if these results are not what you were expecting.)
Here's the code I used to test it:
http://sense.qbox.io/gist/07735aadc082c1c60409931c279f3fd85a340dbb

Elasticsearch, Nested Aggregations

Im writing dynamic query generation which allows to aggregate by any fields combination in the mapping. As the mapping(truncated) below, there are fields in nested type. e.g aggregate by [activities.activity,duration], or [activities.activity, activities.duration] or [applicationName, duration]
Mapping:
{
nested: {
properties: {
#timestamp: {
type: "date",
format: "dateOptionalTime"
},
activities: {
type: "nested",
include_in_parent: true,
properties: {
activity: {
type: "string",
index: "not_analyzed"
},
duration: {
type: "long"
},
entry: {
properties: {
blockName: {
type: "string",
index: "not_analyzed"
},
blockid: {
type: "string"
},
time: {
type: "date",
format: "dateOptionalTime"
}
}
},
exit: {
properties: {
blockName: {
type: "string",
index: "not_analyzed"
},
blockid: {
type: "string"
},
time: {
type: "date",
format: "dateOptionalTime"
}
}
},
seq: {
type: "integer"
}
}
},
applicationName: {
type: "string",
index: "not_analyzed"
},
duration: {
type: "long"
}
}
}}
Sample document:
{
"#timestamp": "2015-09-15T17:35:24.020Z",
"duration": "37616",
"applicationName": "my application name",
"activities": [{
"duration": "20362",
"entry": {
"blockid": "2",
"time": "2015-09-15T17:35:24.493Z",
"blockName": "My Self Service"
},
"exit": {
"blockid": "2",
"time": "2015-09-15T17:35:44.855Z",
"blockName": "My Self Service"
},
"seq": 1,
"activity": "Prompter v2.3"
}, {
"duration": "96",
"entry": {
"blockid": "2",
"time": "2015-09-15T17:35:45.268Z",
"blockName": "My Self Service"
},
"exit": {
"blockid": "2",
"time": "2015-09-15T17:35:45.364Z",
"blockName": "My Self Service"
},
"seq": 2,
"activity": "Start v2.5"
}, {
"duration": "15931",
"entry": {
"blockid": "2",
"time": "2015-09-15T17:35:45.669Z",
"blockName": "My Self Service"
},
"exit": {
"blockid": "2",
"time": "2015-09-15T17:36:01.600Z",
"blockName": "My Self Service"
},
"seq": 3,
"activity": "System v2.3"
}]}
Sample query:
{
"size": 0,
"aggs": {
"dim0": {
"nested" : {
"path": "activities"
},
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs": {
"dim_reverse":{
"reverse_nested":{},
"aggs":{
"avg_duration": {
"avg": {
"field": "duration"
}
}
}
}
}
}
}
}
}}
Question,
as you can see in the query, when averaging on a root level field under a nested field. reverse_nested must be included so that the root level field "duration" can be seen. That means when generating the query, we need to check the combination of fields to see if the parent/child fields are the cases of fields are nested, nested under the same path or at the root level, then generate the proper query. This may be more complicated when aggregating on more fields, for example, aggregate by [applicationName, activities.duration, duration,activities.activity]. Does anyone know more elegant way to do that? the logic may be simpler if we can specify absolute path

Not real an answer to my question but adding more examples as it may help others to understand nested aggregation better.
aggs field average field
case1 yes yes
case2 yes no
case3 no yes
case4 no no
yes->nested type, no->not nested type
Case1 with same path
Query
{
"size": 0,
"aggs": {
"dim0": {
"nested" : {
"path": "activities"
},
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs":{
"avg_duration": {
"avg": {
"field": "activities.duration"
}
}
}
}
}
}
}}
Result:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim0": {
"doc_count": 3,
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Prompter v2.3",
"doc_count": 1,
"avg_duration": {
"value": 20362.0
}
}, {
"key": "Start v2.5",
"doc_count": 1,
"avg_duration": {
"value": 96.0
}
}, {
"key": "System v2.3",
"doc_count": 1,
"avg_duration": {
"value": 15931.0
}
}]
}
}
}}
case1, both fields are nested, but reverse_nested to have the same average value on all the "activities.duration"
query
{
"size": 0,
"aggs": {
"dim0": {
"nested" : {
"path": "activities"
},
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs": {
"dim_reverse1":{
"reverse_nested":{
},
"aggs":{
"avg_duration": {
"avg": {
"field": "activities.duration"
}
}
}
}
}
}
}
}
}}
result
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim0": {
"doc_count": 3,
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Prompter v2.3",
"doc_count": 1,
"dim_reverse1": {
"doc_count": 1,
"avg_duration": {
"value": 12129.666666666666
}
}
}, {
"key": "Start v2.5",
"doc_count": 1,
"dim_reverse1": {
"doc_count": 1,
"avg_duration": {
"value": 12129.666666666666
}
}
}, {
"key": "System v2.3",
"doc_count": 1,
"dim_reverse1": {
"doc_count": 1,
"avg_duration": {
"value": 12129.666666666666
}
}
}]
}
}
}}
Case3
Query
{
"size": 0,
"aggs": {
"dim1": {
"terms": {
"field": "applicationName"
},
"aggs":{
"avg_duration": {
"avg": {
"field": "activities.duration"
}
}
}
}
}}
Result
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "my application name",
"doc_count": 1,
"avg_duration": {
"value": 12129.666666666666
}
}]
}
}}
Case2 includes reserver_nested to back to the root level
Query
{
"size": 0,
"aggs": {
"dim0": {
"nested" : {
"path": "activities"
},
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs": {
"dim_reverse":{
"reverse_nested":{},
"aggs":{
"avg_duration": {
"avg": {
"field": "duration"
}
}
}
}
}
}
}
}
}}
Result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim0": {
"doc_count": 3,
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Prompter v2.3",
"doc_count": 1,
"dim_reverse": {
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}
}, {
"key": "Start v2.5",
"doc_count": 1,
"dim_reverse": {
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}
}, {
"key": "System v2.3",
"doc_count": 1,
"dim_reverse": {
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}
}]
}
}
}}
Case2, without specify the nested path
Query
{
"size": 0,
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs":{
"avg_duration": {
"avg": {
"field": "duration"
}
}
}
}
}}
Result The result is identical to the previous one
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Prompter v2.3",
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}, {
"key": "Start v2.5",
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}, {
"key": "System v2.3",
"doc_count": 1,
"avg_duration": {
"value": 37616.0
}
}]
}
}
}
Case2, without specifying reserver_nested, "duration" at the root level is not seen
Query
{
"size": 0,
"aggs": {
"dim0": {
"nested" : {
"path": "activities"
},
"aggs": {
"dim1": {
"terms": {
"field": "activities.activity"
},
"aggs":{
"avg_duration": {
"avg": {
"field": "duration"
}
}
}
}
}
}
}}
Result
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.0,
"hits": []
},
"aggregations": {
"dim0": {
"doc_count": 3,
"dim1": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "Prompter v2.3",
"doc_count": 1,
"avg_duration": {
"value": null
}
}, {
"key": "Start v2.5",
"doc_count": 1,
"avg_duration": {
"value": null
}
}, {
"key": "System v2.3",
"doc_count": 1,
"avg_duration": {
"value": null
}
}]
}
}
}}

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Elasticsearch - Count duplicated and unique values for a nested field - elasticsearch

Related

Elasticsearch - Count number of occurrence perd field per document

Convert sql query to elasticsearch

query for elasticsearch returning count

Elasticsearch include field in result set of aggregation

Elasticsearch, Nested Aggregations

Categories

Resources