elasticsearch aggregation group by null key - elasticsearch

here is the data in my elasticsearch server:
{"system": "aaa"},
{"system": "bbb"},
{"system": null}
I want to get the statistics for system. then I did the query:
{
"aggs" : {
"myAggrs" : {
"terms" : { "field" : "system" }
}
}
it gives me the result:
{
"key": "aaa",
"doc_count": 1
},
{
"key": "bbb",
"doc_count": 1
}
but the "key" : null is not included in the result, how can I get it?
here is my expect result:
{
"key": "aaa",
"doc_count": 1
},
{
"key": "bbb",
"doc_count": 1
},
{
"key": null,
"doc_count": 1
}

I don't think you can do this with terms. Try with another aggregation:
{
"aggs": {
"myAggrs": {
"terms": {
"field": "system"
}
},
"missing_system": {
"missing": {
"field": "system"
}
}
}
}
And the result will be:
"aggregations": {
"myAggrs": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "aaa",
"doc_count": 1
},
{
"key": "bbb",
"doc_count": 1
}
]
},
"missing_system": {
"doc_count": 1
}
}

Related

how to group by duplicate Field in Array List : ElasticSearch

I had problem with nested aggregation in Elasticsearch. I have mapping with nested field:
"Topics":{"type":"nested","properties":{
"CategoryLev1":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}},
"CategoryLev2":{"type":"text","fields":{"keyword":{"type":"keyword","ignore_above":256}}} }}
After index Document:
"Topics": [
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Money",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "2.0",
"CategoryLev2": "Money",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Electrical",
"CategoryLev1": "Product"
}
]
"Topics": [
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Money",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "2.0",
"CategoryLev2": "Methods",
"CategoryLev1": "Sales"
},
{
"CategoryRelevancy": "1.0",
"CategoryLev2": "Engine",
"CategoryLev1": "Product"
}
]
As you see, in my nested array I have two Topics, which have Duplicate key and Value field Then I make such query:
{
"size": 10,
"aggregations": {
"resellers": {
"nested": {
"path": "Topics"
},
"aggregations": {
"topicGroup": {
"terms": {
"field": "Topics.CategoryLev1.keyword",
"size": 10
},
"aggregations": {
"Subtopic": {
"terms": {
"field": "Topics.CategoryLev2.keyword"
}
}
}
}
}
}
}
}
Then I get following result which has group by with topic Category
{
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"resellers": {
"doc_count": 6,
"topicGroup": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Sales",
"doc_count": 3,
"Subtopic": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Money",
"doc_count": 3
},
{
"key": "Method",
"doc_count": 1
}
]
}
},
{
"key": "Product",
"doc_count": 2,
"Subtopic": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Electrical",
"doc_count": 1
},
{
"key": "Engine",
"doc_count": 1
}
]
}
}
]
}
}
}
}
But I Want to result Like this
"buckets": [
{
"key": "Sales",
"doc_count": 2,
"Subtopic": {
"buckets": [
{
"key": "Money",
"doc_count": 2
},
{
"key": "Method",
"doc_count": 1
}
]
}
},
{
"key": "Product",
"doc_count": 2,
"Subtopic": {
"buckets": [
{
"key": "Electrical",
"doc_count": 1
},
{
"key": "Engine",
"doc_count": 1
}]
}
}]
Thanks in advance :)

Elasticsearch - Get aggregation key sort as number

I made query result that aggregate some data, and its aggregation key is number. I tried to sort result of aggregation by key. elasticsearch treated key as string.
Since the number of current result bucket is pretty large, it's unable to modify on client side. Any idea of this?
Here is my query.
"aggregations" : {
"startcount" : {
"terms" : {
"script" : "round(doc['startat'].value/1000)",
"size" : 1000,
"order" : { "_term" : "asc" }
}
}
}
and current result bucket.
"buckets": [
{
"key": "0",
"doc_count": 68
},
{
"key": "1",
"doc_count": 21
},
{
"key": "10",
"doc_count": 6
},
{
"key": "11",
"doc_count": 16
},
It's my expect result.
"buckets": [
{
"key": "0",
"doc_count": 68
},
{
"key": "1",
"doc_count": 21
},
{
"key": "2", // not '10'
"doc_count": 6
},
{
"key": "3", // not '11'
"doc_count": 16
},
Using the value_script approach should fix the alphabetical sort issue:
Example:
{
"size": 0,
"aggregations": {
"startcount": {
"terms": {
"field": "startat",
"script": "round(_value/1000)",
"size": 1000,
"order": {
"_term": "asc"
}
}
}
}
}
This is a multiple group by scenario where data are being sorted by the key descending order.
{
"size": 0,
"aggs": {
"categories": {
"filter": {
"exists": {
"field": "organization_industries"
}
},
"aggs": {
"names": {
"terms": {
"field": "organization_revenue_in_thousands_int.keyword",
"size": 200,
"order": {
"_key": "desc"
}
},
"aggs": {
"industry_stats": {
"terms": {
"field": "organization_industries.keyword"
}
}
}
}
}
}
}
}
Output
"aggregations": {
"categories": {
"doc_count": 195161605,
"names": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 19226983,
"buckets": [
{
"key": "99900",
"doc_count": 1742,
"industry_stats": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "internet",
"doc_count": 1605
},
{
"key": "investment management",
"doc_count": 81
},
{
"key": "biotechnology",
"doc_count": 54
},
{
"key": "computer & network security",
"doc_count": 2
}
]
}
},
{
"key": "998000",
"doc_count": 71,
"industry_stats": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "finance",
"doc_count": 48
},
{
"key": "information technology & services",
"doc_count": 23
}
]
}
}
}
]
}
}
enter code here

elasticsearch aggregation result key filter from field or fields

I am using elasticsearch 1.7 and i have to find filter must not from aggregation key value
Below is the structure :
{"RU": "2016-06-25T15:07:46.144","zt":"bl","zi":"z101"}
{"RU": "2016-06-25T15:07:46.144","zt":"bl","zi":"z102"}
{"RU": "2016-06-25T15:07:46.144","zt":"bl","zi":"z103"}
{"RU": "2016-06-25T15:07:46.144","zt":"un","zi":"z201"}
{"RU": "2016-06-25T15:07:46.144","zt":"un","zi":"z202"}
{"RU": "2016-06-25T15:07:46.144","zt":"g1","zi":"z101"}
{"RU": "2016-06-25T15:07:46.144","zt":"g1","zi":"z502"}
{"RU": "2016-06-25T15:07:46.144","zt":"g2","zi":"z201"}
{"RU": "2016-06-25T15:07:46.144","zt":"g2","zi":"z503"}
My query :
{"size": 0,
"aggs": {
"findunique": {
"filter": {
"bool": {
"must_not": [
{
"terms": {
"zt": [
"bl",
"un"
]
}
}
],
"must": [
{
"terms": {
"zt": [
"g1",
"g2"
]
}
}
]
}
},
"aggs": {
"uniquezi": {
"terms": {
"field": "zi"
}
}
}
}
}
}
-------------------------------------------------------
output :
{"aggregations": {
"findunique": {
"doc_count": 4,
"uniquezi": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "z101",
"doc_count": 1
},
{
"key": "z201",
"doc_count": 1
},
{
"key": "z502",
"doc_count": 1
},
{
"key": "z503",
"doc_count": 1
}
]
}
}
}
}}
Now i am looking to don't show zi =z101 and z201 should not come in list as that belonging to zt = bl and zt = un
Please suggest me Thanks !
As a suggestion you could try adding two aggregations with filer set on "zt" field.
This way you will get two sets and can extract all from "Wanted" which are not in "Unwanted" later in code.
{
"size": 0,
"aggs" : {
"messages" : {
"filters" : {
"filters" : {
"wanted" : { "terms" : { "zt" : [ "g1", "g2" ] }},
"unwanted" : { "terms" : { "zt" : [ "bl", "un" ] }}
}
},
"aggs" : {
"monthly" : {
"terms": {"field" : "zi"}
}
}
}
}
}
Response:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 9,
"max_score": 0,
"hits": []
},
"aggregations": {
"messages": {
"buckets": {
"wanted": {
"doc_count": 4,
"distinctValuesAgg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "z101",
"doc_count": 1
},
{
"key": "z201",
"doc_count": 1
},
{
"key": "z502",
"doc_count": 1
},
{
"key": "z503",
"doc_count": 1
}
]
}
},
"unwanted": {
"doc_count": 5,
"distinctValuesAgg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "z101",
"doc_count": 1
},
{
"key": "z102",
"doc_count": 1
},
{
"key": "z103",
"doc_count": 1
},
{
"key": "z201",
"doc_count": 1
},
{
"key": "z202",
"doc_count": 1
}
]
}
}
}
}
}
}

How to use elasticsearch facet query to groupby the result

I have a json data in the below format
{
"ID": { "Color": "Black", "Product": "Car" },
"ID": { "Color": "Black", "Product": "Car" },
"ID": { "Color": "Black", "Product": "Van" },
"ID": { "Color": "Black", "Product": "Van" },
"ID": { "Color": "Ash", "Product": "Bike" }
}
I want to calculate the count of car and the corresponding color. I am using elasticsearch facet to do this.
My query
$http.post('http://localhost:9200/product/productinfoinfo/_search?size=5', { "aggregations": { "ProductInfo": { "terms": { "field": "product" } } }, "facets": { "ProductColor": { "terms": { "field": "Color", "size": 10 } } } })
I am getting the output like below
"facets": { "ProductColor": { "_type": "terms", "missing": 0, "total": 7115, "other": 1448, "terms": [ { "term": "Black", "count": 4 }, { "term": "Ash","count":1} },
"aggregations": { "ProductInfo": { "doc_count_error_upper_bound": 94, "sum_other_doc_count": 11414, "buckets": [ { "key": "Car", "doc_count": 2 }, { "key": "Van", "doc_count": 2 }, { "key": "Bike", "doc_count": 1 } ] } } }
What I actually want is,
[ { "key": "Car", "doc_count": 2, "Color":"Black", "count":2 }, { "key": "Van", "doc_count": 2,"Color":"Black", "count":2 }, { "key": "Bike", "doc_count": 1,"Color":"Ash", "count":1 } ]
I would like to groupby the result . Is it possible to do it in elasticsearch query.
Thanks in advance
This is because you're using both aggregations and facets, which, if they are similar, are not meant to be used together.
Facets are deprecated and will be soon removed from ElasticSearch.
Aggregations are the way to go to make "group by"-like queries.
You just have to nest another terms aggregation in the first one, like this :
{
"aggs": {
"By_type": {
"terms": {
"field": "Product"
},
"aggs": {
"By_color": {
"terms": {
"field": "Color"
}
}
}
}
}
}
And the result will be close to what you want :
"aggregations": {
"By_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "bike",
"doc_count": 2,
"By_color": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "ash",
"doc_count": 1
},
{
"key": "black",
"doc_count": 1
}
]
}
},
{
"key": "car",
"doc_count": 2,
"By_color": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "black",
"doc_count": 2
}
]
}
},
{
"key": "van",
"doc_count": 1,
"By_color": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "black",
"doc_count": 1
}
]
}
}
]
}
}

How to Perform a MultiTerms Aggregation using Script?

In the elastic documentation it says I can perform multi-term aggregation if I use script (reference). It is not clear to me how this is done. Basically what I am after is count(*) ... group by logsource,pid. Without a script, it seems I can only do one group by.
Can someone show me an example?
Using script can be costly, but to answer your question,
POST /_search
{
"size": 0,
"aggs": {
"test": {
"terms": {
"script": "doc['logsource'].value+\":\"+doc['pid'].value",
"size": 0
}
}
}
}
Will do!
I think by using sub aggregations I can get the intended result, take for example:
{
"query" : {
"match": {
"message": "error"
}
},
"aggs": {
"g_logsource": {
"terms": {
"field": "logsource"
},
"aggs": {
"g_pid": {
"terms": {
"field": "pid"
},
"aggs" : {
"ts" : {
"date_histogram" : {
"field" : "#timestamp",
"interval" : "1h"
}
}
}
}
}
}
}
}
Returns:
"aggregations": {
"g_logsource": {
"doc_count_error_upper_bound": 0,
"buckets": [
{
"key": "nyhq",
"doc_count": 2129,
"g_pid": {
"doc_count_error_upper_bound": 5,
"buckets": [
{
"key": "5641",
"doc_count": 9,
"ts": {
"buckets": [
{
"key_as_string": "2014-12-07T04:00:00.000Z",
"key": 1417924800000,
"doc_count": 2
},
{
"key_as_string": "2014-12-07T08:00:00.000Z",
"key": 1417939200000,
"doc_count": 4
},
{
"key_as_string": "2014-12-07T18:00:00.000Z",
"key": 1417975200000,
"doc_count": 1
},
{
"key_as_string": "2014-12-07T20:00:00.000Z",
"key": 1417982400000,
"doc_count": 2
}
]
}
},
{
"key": "14839",
"doc_count": 3,
"ts": {
"buckets": [
{
"key_as_string": "2014-12-07T09:00:00.000Z",
"key": 1417942800000,
"doc_count": 1
},
{
"key_as_string": "2014-12-07T20:00:00.000Z",
"key": 1417982400000,
"doc_count": 2
}
]
}
}
In my code, I can then combine groups to be {logsource: nyhq, pid: 5641} as the identifer for each time series. I think this is the same as GROUP BY in SQL. Would appreciate any comments confirming this.

Resources