I'm working on aggregations in NEST, so far everything has worked well, but now when I try to access nested fields through .children the result is null, however the debugger is showing the data correctly.
If I post this query through postman I get the following results:
{
"took": 50,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 9,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"filter#CollarSize": {
"meta": {},
"doc_count": 9,
"nested#VariantsProperties": {
"doc_count": 53,
"sterms#CollarSize": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "CollarSize",
"doc_count": 39,
"sterms#banana": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "15",
"doc_count": 7
},
{
"key": "16",
"doc_count": 7
},
{
"key": "17",
"doc_count": 6
},
{
"key": "18",
"doc_count": 6
},
{
"key": "LAR",
"doc_count": 2
},
{
"key": "MED",
"doc_count": 2
},
{
"key": "SML",
"doc_count": 2
},
{
"key": "X.L",
"doc_count": 2
},
{
"key": "XXL",
"doc_count": 2
},
{
"key": "15.5",
"doc_count": 1
},
{
"key": "16.5",
"doc_count": 1
},
{
"key": "XXXL",
"doc_count": 1
}
]
}
},
{
"key": "Colour",
"doc_count": 14,
"sterms#banana": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Blue",
"doc_count": 7
},
{
"key": "White",
"doc_count": 7
}
]
}
}
]
}
},
"sterms#CollarSize": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
}
}
Is there a way to get inside the child "CollarSize" ? I've tried different combinations with .nested, .children, .terms, .filter however none of these seems to work.
You can get "CollarSize" terms and "banana" terms for each with
var response = client.Search<object>(/** your query here **/);
var collarSizeSignificantTermsAgg = response.Aggregations.Filter("CollarSize").Nested("VariantsProperties").Terms("CollarSize");
foreach(var bucket in collarSizeSignificantTermsAgg.Buckets)
{
Console.WriteLine(bucket.Key);
var bananaSigTerms = bucket.Terms("banana");
foreach(var subBucket in bananaSigTerms.Buckets)
{
Console.WriteLine($"key: {subBucket.Key}, doc_count: {subBucket.DocCount}");
}
}
which prints
CollarSize
key: 15, doc_count: 7
key: 16, doc_count: 7
key: 17, doc_count: 6
key: 18, doc_count: 6
key: LAR, doc_count: 2
key: MED, doc_count: 2
key: SML, doc_count: 2
key: X.L, doc_count: 2
key: XXL, doc_count: 2
key: 15.5, doc_count: 1
key: 16.5, doc_count: 1
key: XXXL, doc_count: 1
Colour
key: Blue, doc_count: 7
key: White, doc_count: 7
Here's a full example, using InMemoryConnection to stub the response
private static void Main()
{
var defaultIndex = "my_index";
var pool = new SingleNodeConnectionPool(new Uri("http://localhost:9200"));
var json = #"{
""took"": 50,
""timed_out"": false,
""_shards"": {
""total"": 1,
""successful"": 1,
""skipped"": 0,
""failed"": 0
},
""hits"": {
""total"": {
""value"": 9,
""relation"": ""eq""
},
""max_score"": null,
""hits"": []
},
""aggregations"": {
""filter#CollarSize"": {
""meta"": { },
""doc_count"": 9,
""nested#VariantsProperties"": {
""doc_count"": 53,
""sterms#CollarSize"": {
""doc_count_error_upper_bound"": 0,
""sum_other_doc_count"": 0,
""buckets"": [
{
""key"": ""CollarSize"",
""doc_count"": 39,
""sterms#banana"": {
""doc_count_error_upper_bound"": 0,
""sum_other_doc_count"": 0,
""buckets"": [
{
""key"": ""15"",
""doc_count"": 7
},
{
""key"": ""16"",
""doc_count"": 7
},
{
""key"": ""17"",
""doc_count"": 6
},
{
""key"": ""18"",
""doc_count"": 6
},
{
""key"": ""LAR"",
""doc_count"": 2
},
{
""key"": ""MED"",
""doc_count"": 2
},
{
""key"": ""SML"",
""doc_count"": 2
},
{
""key"": ""X.L"",
""doc_count"": 2
},
{
""key"": ""XXL"",
""doc_count"": 2
},
{
""key"": ""15.5"",
""doc_count"": 1
},
{
""key"": ""16.5"",
""doc_count"": 1
},
{
""key"": ""XXXL"",
""doc_count"": 1
}
]
}
},
{
""key"": ""Colour"",
""doc_count"": 14,
""sterms#banana"": {
""doc_count_error_upper_bound"": 0,
""sum_other_doc_count"": 0,
""buckets"": [
{
""key"": ""Blue"",
""doc_count"": 7
},
{
""key"": ""White"",
""doc_count"": 7
}
]
}
}
]
}
},
""sterms#CollarSize"": {
""doc_count_error_upper_bound"": 0,
""sum_other_doc_count"": 0,
""buckets"": []
}
}
}
}
";
var settings = new ConnectionSettings(pool, new InMemoryConnection(Encoding.UTF8.GetBytes(json)))
.DefaultIndex(defaultIndex);
var client = new ElasticClient(settings);
var response = client.Search<object>(s => s);
var collarSizeSignificantTermsAgg = response.Aggregations.Filter("CollarSize").Nested("VariantsProperties").Terms("CollarSize");
foreach (var bucket in collarSizeSignificantTermsAgg.Buckets)
{
Console.WriteLine(bucket.Key);
var bananaSigTerms = bucket.Terms("banana");
foreach (var subBucket in bananaSigTerms.Buckets)
{
Console.WriteLine($"key: {subBucket.Key}, doc_count: {subBucket.DocCount}");
}
}
}
Related
In my Elastic document i have CityId,RootId,RootName,Price.Now i have to find top 7 roots in a city with following conditions.
Name and id of root which has minimum price in a City.
top 7 roots:- roots those have max number of entry in a City.
for Example :-
CityId RootId RootName Price
11 1 ABC 90
11 1 ABC 100
11 2 DEF 80
11 2 DEF 90
11 2 DEF 60
answer for CityId =11:-
RootId RootName Price
2 DEF 60
1 ABC 90
I am not aware of the syntax of the Nest. Adding a working example in JSON format.
Index Mapping:
{
"mappings":{
"properties":{
"listItems":{
"type":"nested"
}
}
}
}
Index Data:
{
"RootId": 2,
"CityId": 11,
"RootName": "DEF",
"listItems": [
{
"Price": 60
},
{
"Price": 90
},
{
"Price": 80
}
]
}
{
"RootId": 1,
"CityId": 11,
"RootName": "ABC",
"listItems": [
{
"Price": 100
},
{
"Price": 90
}
]
}
Search Query:
{
"size": 0,
"aggs": {
"id_terms": {
"terms": {
"field": "RootId"
},
"aggs": {
"nested_entries": {
"nested": {
"path": "listItems"
},
"aggs": {
"min_position": {
"min": {
"field": "listItems.Price"
}
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"id_terms": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1,
"doc_count": 1,
"nested_entries": {
"doc_count": 2,
"min_position": {
"value": 90.0
}
}
},
{
"key": 2,
"doc_count": 1,
"nested_entries": {
"doc_count": 3,
"min_position": {
"value": 60.0
}
}
}
]
}
}
.Query(query => query.Bool(bQuery => bQuery.Filter(
fQuery => fQuery.Terms(ter => ter.Field(f => f.CityId).Terms(cityId))
)))
.Aggregations(agg => agg.Terms("group_by_rootId", st => st.Field(o => o.RootId)
.Order(TermsOrder.CountDescending)
.Aggregations(childAgg => childAgg.Min("min_price_in_group", m =>m.Field(p=>p.Price))
.TopHits("stocks", t11 => t11
.Source(sfd => sfd.Includes(fd => fd.Fields(Constants.IncludedFieldsFromElastic)))
.Size(1)
)
)
)
)
.Size(_popularStocksCount)
.From(0)
.Take(0);
I am using Elasticsearch 6.2 which uses painless for the inline scripting. One of the fields in my doc has mapping:
"gap_days": {"type": "integer"}
And I have a painless script for search and the few lines are:
int gap = 10; //initialize to a default value
if (doc.containsKey('gap_days')) {
if (doc['gap_days'].value != null) {
gap = doc['gap_days'].value;
}
}
But this keeps throwing an error:
script_stack: [
"gap = doc['gap_days'].value; } } ",
" ^---- HERE"
],
caused_by: {
reason: "cannot convert MethodHandle(Longs)long to (Object)int",
type: "wrong_method_type_exception"
},
reason: "runtime error"
I tried to look into all unique doc['gap_days'] values in the index, and you can see all of them are integer in all documents
"aggregations": {
"uniq_gaps": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 342,
"buckets": [
{
"key": 0,
"doc_count": 31607
},
{
"key": 365,
"doc_count": 15119
},
{
"key": 5,
"doc_count": 2639
},
{
"key": 21,
"doc_count": 1784
},
{
"key": 14,
"doc_count": 1229
},
{
"key": 3,
"doc_count": 1073
},
{
"key": 7,
"doc_count": 979
},
{
"key": 2,
"doc_count": 728
},
{
"key": 4,
"doc_count": 291
},
{
"key": 10,
"doc_count": 170
}
]
}
}
Then why does it throw an exception saying cannot convert MethodHandle(Longs)long to (Object)int and my script stops working. Any idea how to fix this problem?
I need to build a heatmap from the data I have in elasticsearch. The heatmap is the count of cases where two specific fields have the same value. For the data
{'name': 'john', 'age': '10', 'car': 'peugeot'}
{'name': 'john', 'age': '10', 'car': 'audi'}
{'name': 'john', 'age': '12', 'car': 'fiat'}
{'name': 'mary', 'age': '3', 'car': 'mercedes'}
I would like to get the number of unique pairs for the values of name and age. That would be
john, 10, 2
john, 12, 1
mary, 3, 1
I could get all the events and make the count myself but I was hoping that there would be some magical aggregation which could provide that.
It would not be a problem to have it in a nested form, such as
{
'john':
{
'10': 2,
'12': 1
},
'mary':
{
'3': 1
},
}
or whatever is practical.
You can use Inner aggregation. Use query like
POST count-test/_search
{
"size": 0,
"aggs": {
"group By Name": {
"terms": {
"field": "name"
},
"aggs": {
"group By age": {
"terms": {
"field": "age"
}
}
}
}
}
}
Output won't be like as you mentioned but like.
"aggregations": {
"group By Name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "john",
"doc_count": 3,
"group By age": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "10",
"doc_count": 2
},
{
"key": "12",
"doc_count": 1
}
]
}
},
{
"key": "mary",
"doc_count": 1,
"group By age": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "3",
"doc_count": 1
}
]
}
}
]
}
}
Hope this helps!!
You can use a term aggregation with a script:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#_multi_field_terms_aggregation
Like this you can "concat" what you want such as :
{
"aggs" : {
"data" : {
"terms" : {
"script" : {
"source": "doc['name'].value + doc['name'].age",
"lang": "painless"
}
}
}
}
}
(Not sure about the string concat syntax).
I have an elasticsearch aggregation query like this.
{
"size":0,
"aggs": {
"Domains": {
"terms": {
"field": "domains",
"size": 0
},
"aggs":{
"Identifier": {
"terms": {
"field":"alertIdentifier",
"size": 0
}
}
}
}
}
}
And it results in bucket aggregation like following:
"aggregations": {
"Domains": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "IT",
"doc_count": 147,
"Identifier": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "-2623493027134706869",
"doc_count": 7
},
{
"key": "-6590617724257725266",
"doc_count": 7
},
{
"key": "1106147277275983835",
"doc_count": 4
},
{
"key": "-3070527890944301111",
"doc_count": 4
},
{
"key": "-530975388352676402",
"doc_count": 3
},
{
"key": "-6225620509938623294",
"doc_count": 2
},
{
"key": "1652134630535374656",
"doc_count": 1
},
{
"key": "4191687133126999365",
"doc_count": 8
},
{
"key": "6882920925888555081",
"doc_count": 2
}
]
}
}
What I need is to count the number of doc_counts occurrences like this:
1 times: 0
2 times: 2
3 times: 1
equal or more than 4 times: 5
any idea how to build the ES query to count the occurrences of doc_count?
Thanks in advance.
below the ES query:
POST /xt-history*/_search
{
"query": {
"filtered": {"query": {"match_all": {} },
"filter": {
"and": [
{"term": {"type": "10"}}
]
}
}
},
"size": 0,
"aggs": {
"repetitions": {
"scripted_metric": {
"init_script" : "_agg['all'] = []; _agg['all2'] = [];",
"map_script" : "_agg['all'].add(_source['alert']['alertIdentifier'])",
"combine_script" : "for (alertId in _agg['all']) { _agg['all2'].add(alertId); }; return _agg['all2']",
"reduce_script" : "all3 = []; answer = {}; answer['one'] = []; answer['two'] = []; answer['three'] = []; answer['four'] = []; answer['five'] = []; answer['five_plus'] = []; for (alertIds in _aggs) { for (alertId1 in alertIds) { all3.add(alertId1); }; }; for (alertId in all3) { if (answer['five_plus'].contains(alertId)) { } else if(answer['five'].contains(alertId)) {answer['five'].remove(alertId); answer['five_plus'].add(alertId);} else if(answer['four'].contains(alertId)) {answer['four'].remove(alertId); answer['five'].add(alertId);} else if(answer['three'].contains(alertId)) {answer['three'].remove(alertId); answer['four'].add(alertId);} else if(answer['two'].contains(alertId)) {answer['two'].remove(alertId); answer['three'].add(alertId);} else if(answer['one'].contains(alertId)) {answer['one'].remove(alertId); answer['two'].add(alertId);} else {answer['one'].add(alertId);}; }; fans = []; fans.add(answer['one'].size()); fans.add(answer['two'].size()); fans.add(answer['three'].size()); fans.add(answer['four'].size()); fans.add(answer['five'].size()); fans.add(answer['five_plus'].size()); return fans"
}
}
}
}
query output:
{
"took": 4770,
"timed_out": false,
"_shards": {
"total": 190,
"successful": 189,
"failed": 0
},
"hits": {
"total": 334,
"max_score": 0,
"hits": []
},
"aggregations": {
"repetitions": {
"value": [
63,
39,
3,
10,
2,
13
]
}
}
}
where first value is the number of repetitions for doc_count=1, second value is the number of repetitions for doc_count=2, ... last value is the number of repetition for doc_count >=5
My index has a log-like structure: I insert a version of a document whenever an event occurs. For example, here are documents in the index:
{ "key": "a", subkey: 0 }
{ "key": "a", subkey: 0 }
{ "key": "a", subkey: 1 }
{ "key": "a", subkey: 1 }
{ "key": "b", subkey: 0 }
{ "key": "b", subkey: 0 }
{ "key": "b", subkey: 1 }
{ "key": "b", subkey: 1 }
I'm trying to construct a query in ElasticSearch which is basically equivalent to the following SQL query:
SELECT COUNT(*), key, subkey
FROM (SELECT DISTINCT key, subkey FROM t)
The answer to this query would obviously be
(1, a, 0)
(1, a, 1)
(1, b, 0)
(1, b, 1)
How would I replicate this query in Elasticsearch? I came up with the following:
GET test_index/test_type/_search?search_type=count
{
"aggregations": {
"count_aggr": {
"terms": {
"field": "concatenated_key"
},
"aggs": {
"sample_doc": {
"top_hits": {
"size": 1
}
}
}
}
}
}
concatenated_key is a concatenation of key and subkey. This query would create a bucket for each (key, subkey) combination and return a sample document from each bucket. However, I don't know how can I aggregate over the fields of _source.
Would appreciate any ideas. Thanks!
If you don't have the possibility to re-index the documents and to add your own concatenated key field, this is a way of doing it:
GET /my_index/my_type/_search?search_type=count
{
"aggs": {
"key_agg": {
"terms": {
"field": "key",
"size": 10
},
"aggs": {
"sub_key_agg": {
"terms": {
"field": "subkey",
"size": 10
}
}
}
}
}
}
It will give you something like this:
"buckets": [
{
"key": "a",
"doc_count": 4,
"sub_key_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 0,
"doc_count": 2
},
{
"key": 1,
"doc_count": 2
}
]
}
},
{
"key": "b",
"doc_count": 4,
"sub_key_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 0,
"doc_count": 2
},
{
"key": 1,
"doc_count": 2
}
]
}
}
]
where you have the key - "key": "a" - and then each combination with this key and the number of docs that match key=a and subkey=0 or key=a and subkey=1:
"buckets": [
{
"key": 0,
"doc_count": 2
},
{
"key": 1,
"doc_count": 2
}
]
Same goes for the other key.