Nest aggregation results are null however there are data in the debugger - elasticsearch

I'm working on aggregations in NEST, so far everything has worked well, but now when I try to access nested fields through .children the result is null, however the debugger is showing the data correctly.
If I post this query through postman I get the following results:
{
"took": 50,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 9,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"filter#CollarSize": {
"meta": {},
"doc_count": 9,
"nested#VariantsProperties": {
"doc_count": 53,
"sterms#CollarSize": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "CollarSize",
"doc_count": 39,
"sterms#banana": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "15",
"doc_count": 7
},
{
"key": "16",
"doc_count": 7
},
{
"key": "17",
"doc_count": 6
},
{
"key": "18",
"doc_count": 6
},
{
"key": "LAR",
"doc_count": 2
},
{
"key": "MED",
"doc_count": 2
},
{
"key": "SML",
"doc_count": 2
},
{
"key": "X.L",
"doc_count": 2
},
{
"key": "XXL",
"doc_count": 2
},
{
"key": "15.5",
"doc_count": 1
},
{
"key": "16.5",
"doc_count": 1
},
{
"key": "XXXL",
"doc_count": 1
}
]
}
},
{
"key": "Colour",
"doc_count": 14,
"sterms#banana": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Blue",
"doc_count": 7
},
{
"key": "White",
"doc_count": 7
}
]
}
}
]
}
},
"sterms#CollarSize": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
}
}
Is there a way to get inside the child "CollarSize" ? I've tried different combinations with .nested, .children, .terms, .filter however none of these seems to work.

You can get "CollarSize" terms and "banana" terms for each with
var response = client.Search<object>(/** your query here **/);
var collarSizeSignificantTermsAgg = response.Aggregations.Filter("CollarSize").Nested("VariantsProperties").Terms("CollarSize");
foreach(var bucket in collarSizeSignificantTermsAgg.Buckets)
{
Console.WriteLine(bucket.Key);
var bananaSigTerms = bucket.Terms("banana");
foreach(var subBucket in bananaSigTerms.Buckets)
{
Console.WriteLine($"key: {subBucket.Key}, doc_count: {subBucket.DocCount}");
}
}
which prints
CollarSize
key: 15, doc_count: 7
key: 16, doc_count: 7
key: 17, doc_count: 6
key: 18, doc_count: 6
key: LAR, doc_count: 2
key: MED, doc_count: 2
key: SML, doc_count: 2
key: X.L, doc_count: 2
key: XXL, doc_count: 2
key: 15.5, doc_count: 1
key: 16.5, doc_count: 1
key: XXXL, doc_count: 1
Colour
key: Blue, doc_count: 7
key: White, doc_count: 7
Here's a full example, using InMemoryConnection to stub the response
private static void Main()
{
var defaultIndex = "my_index";
var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var json = #"{
""took"": 50,
""timed_out"": false,
""_shards"": {
""total"": 1,
""successful"": 1,
""skipped"": 0,
""failed"": 0
},
""hits"": {
""total"": {
""value"": 9,
""relation"": ""eq""
},
""max_score"": null,
""hits"": []
},
""aggregations"": {
""filter#CollarSize"": {
""meta"": { },
""doc_count"": 9,
""nested#VariantsProperties"": {
""doc_count"": 53,
""sterms#CollarSize"": {
""doc_count_error_upper_bound"": 0,
""sum_other_doc_count"": 0,
""buckets"": [
{
""key"": ""CollarSize"",
""doc_count"": 39,
""sterms#banana"": {
""doc_count_error_upper_bound"": 0,
""sum_other_doc_count"": 0,
""buckets"": [
{
""key"": ""15"",
""doc_count"": 7
},
{
""key"": ""16"",
""doc_count"": 7
},
{
""key"": ""17"",
""doc_count"": 6
},
{
""key"": ""18"",
""doc_count"": 6
},
{
""key"": ""LAR"",
""doc_count"": 2
},
{
""key"": ""MED"",
""doc_count"": 2
},
{
""key"": ""SML"",
""doc_count"": 2
},
{
""key"": ""X.L"",
""doc_count"": 2
},
{
""key"": ""XXL"",
""doc_count"": 2
},
{
""key"": ""15.5"",
""doc_count"": 1
},
{
""key"": ""16.5"",
""doc_count"": 1
},
{
""key"": ""XXXL"",
""doc_count"": 1
}
]
}
},
{
""key"": ""Colour"",
""doc_count"": 14,
""sterms#banana"": {
""doc_count_error_upper_bound"": 0,
""sum_other_doc_count"": 0,
""buckets"": [
{
""key"": ""Blue"",
""doc_count"": 7
},
{
""key"": ""White"",
""doc_count"": 7
}
]
}
}
]
}
},
""sterms#CollarSize"": {
""doc_count_error_upper_bound"": 0,
""sum_other_doc_count"": 0,
""buckets"": []
}
}
}
}
";
var settings = new ConnectionSettings(pool, new InMemoryConnection(Encoding.UTF8.GetBytes(json)))
.DefaultIndex(defaultIndex);
var client = new ElasticClient(settings);
var response = client.Search<object>(s => s);
var collarSizeSignificantTermsAgg = response.Aggregations.Filter("CollarSize").Nested("VariantsProperties").Terms("CollarSize");
foreach (var bucket in collarSizeSignificantTermsAgg.Buckets)
{
Console.WriteLine(bucket.Key);
var bananaSigTerms = bucket.Terms("banana");
foreach (var subBucket in bananaSigTerms.Buckets)
{
Console.WriteLine($"key: {subBucket.Key}, doc_count: {subBucket.DocCount}");
}
}
}

Related

Nested Aggregation in Nest Elastic Search

In my Elastic document i have CityId,RootId,RootName,Price.Now i have to find top 7 roots in a city with following conditions.
Name and id of root which has minimum price in a City.
top 7 roots:- roots those have max number of entry in a City.
for Example :-
CityId RootId RootName Price
11 1 ABC 90
11 1 ABC 100
11 2 DEF 80
11 2 DEF 90
11 2 DEF 60
answer for CityId =11:-
RootId RootName Price
2 DEF 60
1 ABC 90
I am not aware of the syntax of the Nest. Adding a working example in JSON format.
Index Mapping:
{
"mappings":{
"properties":{
"listItems":{
"type":"nested"
}
}
}
}
Index Data:
{
"RootId": 2,
"CityId": 11,
"RootName": "DEF",
"listItems": [
{
"Price": 60
},
{
"Price": 90
},
{
"Price": 80
}
]
}
{
"RootId": 1,
"CityId": 11,
"RootName": "ABC",
"listItems": [
{
"Price": 100
},
{
"Price": 90
}
]
}
Search Query:
{
"size": 0,
"aggs": {
"id_terms": {
"terms": {
"field": "RootId"
},
"aggs": {
"nested_entries": {
"nested": {
"path": "listItems"
},
"aggs": {
"min_position": {
"min": {
"field": "listItems.Price"
}
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"id_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1,
"nested_entries": {
"doc_count": 2,
"min_position": {
"value": 90.0
}
}
},
{
"key": 2,
"doc_count": 1,
"nested_entries": {
"doc_count": 3,
"min_position": {
"value": 60.0
}
}
}
]
}
}
.Query(query => query.Bool(bQuery => bQuery.Filter(
fQuery => fQuery.Terms(ter => ter.Field(f => f.CityId).Terms(cityId))
)))
.Aggregations(agg => agg.Terms("group_by_rootId", st => st.Field(o => o.RootId)
.Order(TermsOrder.CountDescending)
.Aggregations(childAgg => childAgg.Min("min_price_in_group", m =>m.Field(p=>p.Price))
.TopHits("stocks", t11 => t11
.Source(sfd => sfd.Includes(fd => fd.Fields(Constants.IncludedFieldsFromElastic)))
.Size(1)
)
)
)
)
.Size(_popularStocksCount)
.From(0)
.Take(0);

Elasticsearch painless query exception

I am using Elasticsearch 6.2 which uses painless for the inline scripting. One of the fields in my doc has mapping:
"gap_days": {"type": "integer"}
And I have a painless script for search and the few lines are:
int gap = 10; //initialize to a default value
if (doc.containsKey('gap_days')) {
if (doc['gap_days'].value != null) {
gap = doc['gap_days'].value;
}
}
But this keeps throwing an error:
script_stack: [
"gap = doc['gap_days'].value; } } ",
" ^---- HERE"
],
caused_by: {
reason: "cannot convert MethodHandle(Longs)long to (Object)int",
type: "wrong_method_type_exception"
},
reason: "runtime error"
I tried to look into all unique doc['gap_days'] values in the index, and you can see all of them are integer in all documents
"aggregations": {
"uniq_gaps": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 342,
"buckets": [
{
"key": 0,
"doc_count": 31607
},
{
"key": 365,
"doc_count": 15119
},
{
"key": 5,
"doc_count": 2639
},
{
"key": 21,
"doc_count": 1784
},
{
"key": 14,
"doc_count": 1229
},
{
"key": 3,
"doc_count": 1073
},
{
"key": 7,
"doc_count": 979
},
{
"key": 2,
"doc_count": 728
},
{
"key": 4,
"doc_count": 291
},
{
"key": 10,
"doc_count": 170
}
]
}
}
Then why does it throw an exception saying cannot convert MethodHandle(Longs)long to (Object)int and my script stops working. Any idea how to fix this problem?

How to get the count of a pair of field values?

I need to build a heatmap from the data I have in elasticsearch. The heatmap is the count of cases where two specific fields have the same value. For the data
{'name': 'john', 'age': '10', 'car': 'peugeot'}
{'name': 'john', 'age': '10', 'car': 'audi'}
{'name': 'john', 'age': '12', 'car': 'fiat'}
{'name': 'mary', 'age': '3', 'car': 'mercedes'}
I would like to get the number of unique pairs for the values of name and age. That would be
john, 10, 2
john, 12, 1
mary, 3, 1
I could get all the events and make the count myself but I was hoping that there would be some magical aggregation which could provide that.
It would not be a problem to have it in a nested form, such as
{
'john':
{
'10': 2,
'12': 1
},
'mary':
{
'3': 1
},
}
or whatever is practical.
You can use Inner aggregation. Use query like
POST count-test/_search
{
"size": 0,
"aggs": {
"group By Name": {
"terms": {
"field": "name"
},
"aggs": {
"group By age": {
"terms": {
"field": "age"
}
}
}
}
}
}
Output won't be like as you mentioned but like.
"aggregations": {
"group By Name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "john",
"doc_count": 3,
"group By age": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "10",
"doc_count": 2
},
{
"key": "12",
"doc_count": 1
}
]
}
},
{
"key": "mary",
"doc_count": 1,
"group By age": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "3",
"doc_count": 1
}
]
}
}
]
}
}
Hope this helps!!
You can use a term aggregation with a script:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_multi_field_terms_aggregation
Like this you can "concat" what you want such as :
{
"aggs" : {
"data" : {
"terms" : {
"script" : {
"source": "doc['name'].value + doc['name'].age",
"lang": "painless"
}
}
}
}
}
(Not sure about the string concat syntax).

Elasticsearch count doc_count occurrences on aggs

I have an elasticsearch aggregation query like this.
{
"size":0,
"aggs": {
"Domains": {
"terms": {
"field": "domains",
"size": 0
},
"aggs":{
"Identifier": {
"terms": {
"field":"alertIdentifier",
"size": 0
}
}
}
}
}
}
And it results in bucket aggregation like following:
"aggregations": {
"Domains": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "IT",
"doc_count": 147,
"Identifier": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "-2623493027134706869",
"doc_count": 7
},
{
"key": "-6590617724257725266",
"doc_count": 7
},
{
"key": "1106147277275983835",
"doc_count": 4
},
{
"key": "-3070527890944301111",
"doc_count": 4
},
{
"key": "-530975388352676402",
"doc_count": 3
},
{
"key": "-6225620509938623294",
"doc_count": 2
},
{
"key": "1652134630535374656",
"doc_count": 1
},
{
"key": "4191687133126999365",
"doc_count": 8
},
{
"key": "6882920925888555081",
"doc_count": 2
}
]
}
}
What I need is to count the number of doc_counts occurrences like this:
1 times: 0
2 times: 2
3 times: 1
equal or more than 4 times: 5
any idea how to build the ES query to count the occurrences of doc_count?
Thanks in advance.
below the ES query:
POST /xt-history*/_search
{
"query": {
"filtered": {"query": {"match_all": {} },
"filter": {
"and": [
{"term": {"type": "10"}}
]
}
}
},
"size": 0,
"aggs": {
"repetitions": {
"scripted_metric": {
"init_script" : "_agg['all'] = []; _agg['all2'] = [];",
"map_script" : "_agg['all'].add(_source['alert']['alertIdentifier'])",
"combine_script" : "for (alertId in _agg['all']) { _agg['all2'].add(alertId); }; return _agg['all2']",
"reduce_script" : "all3 = []; answer = {}; answer['one'] = []; answer['two'] = []; answer['three'] = []; answer['four'] = []; answer['five'] = []; answer['five_plus'] = []; for (alertIds in _aggs) { for (alertId1 in alertIds) { all3.add(alertId1); }; }; for (alertId in all3) { if (answer['five_plus'].contains(alertId)) { } else if(answer['five'].contains(alertId)) {answer['five'].remove(alertId); answer['five_plus'].add(alertId);} else if(answer['four'].contains(alertId)) {answer['four'].remove(alertId); answer['five'].add(alertId);} else if(answer['three'].contains(alertId)) {answer['three'].remove(alertId); answer['four'].add(alertId);} else if(answer['two'].contains(alertId)) {answer['two'].remove(alertId); answer['three'].add(alertId);} else if(answer['one'].contains(alertId)) {answer['one'].remove(alertId); answer['two'].add(alertId);} else {answer['one'].add(alertId);}; }; fans = []; fans.add(answer['one'].size()); fans.add(answer['two'].size()); fans.add(answer['three'].size()); fans.add(answer['four'].size()); fans.add(answer['five'].size()); fans.add(answer['five_plus'].size()); return fans"
}
}
}
}
query output:
{
"took": 4770,
"timed_out": false,
"_shards": {
"total": 190,
"successful": 189,
"failed": 0
},
"hits": {
"total": 334,
"max_score": 0,
"hits": []
},
"aggregations": {
"repetitions": {
"value": [
63,
39,
3,
10,
2,
13
]
}
}
}
where first value is the number of repetitions for doc_count=1, second value is the number of repetitions for doc_count=2, ... last value is the number of repetition for doc_count >=5

Elasticsearch: find documents with distinct values and then aggregate over them

My index has a log-like structure: I insert a version of a document whenever an event occurs. For example, here are documents in the index:
{ "key": "a", subkey: 0 }
{ "key": "a", subkey: 0 }
{ "key": "a", subkey: 1 }
{ "key": "a", subkey: 1 }
{ "key": "b", subkey: 0 }
{ "key": "b", subkey: 0 }
{ "key": "b", subkey: 1 }
{ "key": "b", subkey: 1 }
I'm trying to construct a query in ElasticSearch which is basically equivalent to the following SQL query:
SELECT COUNT(*), key, subkey
FROM (SELECT DISTINCT key, subkey FROM t)
The answer to this query would obviously be
(1, a, 0)
(1, a, 1)
(1, b, 0)
(1, b, 1)
How would I replicate this query in Elasticsearch? I came up with the following:
GET test_index/test_type/_search?search_type=count
{
"aggregations": {
"count_aggr": {
"terms": {
"field": "concatenated_key"
},
"aggs": {
"sample_doc": {
"top_hits": {
"size": 1
}
}
}
}
}
}
concatenated_key is a concatenation of key and subkey. This query would create a bucket for each (key, subkey) combination and return a sample document from each bucket. However, I don't know how can I aggregate over the fields of _source.
Would appreciate any ideas. Thanks!
If you don't have the possibility to re-index the documents and to add your own concatenated key field, this is a way of doing it:
GET /my_index/my_type/_search?search_type=count
{
"aggs": {
"key_agg": {
"terms": {
"field": "key",
"size": 10
},
"aggs": {
"sub_key_agg": {
"terms": {
"field": "subkey",
"size": 10
}
}
}
}
}
}
It will give you something like this:
"buckets": [
{
"key": "a",
"doc_count": 4,
"sub_key_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 0,
"doc_count": 2
},
{
"key": 1,
"doc_count": 2
}
]
}
},
{
"key": "b",
"doc_count": 4,
"sub_key_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 0,
"doc_count": 2
},
{
"key": 1,
"doc_count": 2
}
]
}
}
]
where you have the key - "key": "a" - and then each combination with this key and the number of docs that match key=a and subkey=0 or key=a and subkey=1:
"buckets": [
{
"key": 0,
"doc_count": 2
},
{
"key": 1,
"doc_count": 2
}
]
Same goes for the other key.

Resources