Elastic query with an aggregation on a nested field and a sub aggregation on field (from root) returns empty array of buckets - elasticsearch

I Wrote a query with an aggregation on a nested field and a sub aggregation on a field that is not nested but in the root instead. I expected to get a sum for each ownerId, but instead I got an empty bucket array.
The following query returns an empty array of buckets - though there are results and a positive sum.
GET my-index/_search
{
"size": 0,
"aggs": {
"agg_owner": {
"nested": {
"path": "owner_fields"
},
"aggs": {
"raw_names": {
"terms": {
"field": "owner_fields.id.keyword",
"size": 10
},
"aggs": {
"total_amount": {
"reverse_nested": {},
"aggs": {
"total_inner_amount": {
"terms": {
"field": "amount",
"size": 10
}
}
}
}
}
}
}
}
}
}
returns:
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 2,
"successful" : 2,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 45430,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"agg_owner" : {
"doc_count" : 15494,
"raw_names" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ ]
}
}
}
}
I expected to see a sum for each ownerId,
But that didn't happen.

had to remove keyword from aggregation:
"owner_fields.id.keyword" => "owner_fields.id"

Related

Nested Aggregation for AND Query Not Working

Please can someone help with the below Question.
https://discuss.elastic.co/t/nested-aggregation-with-and-always-return-0-match/315722?u=chattes
I have used following aggregations
1. Terms aggregation
2. Bucket selector
3. Nested aggregation
First I have grouped by user id using terms aggregation. Then further grouped by skill Id. Using bucket selector I have filtered users which have documents under two skills.
Query
GET index5/_search
{
"size": 0,
"aggs": {
"users": {
"terms": {
"field": "id",
"size": 10
},
"aggs": {
"skills": {
"nested": {
"path": "skills"
},
"aggs": {
"filter_skill": {
"terms": {
"field": "skills.id",
"size": 10,
"include": [
553,
426
]
}
}
}
},
"bucket_count": {
"bucket_selector": {
"buckets_path": {
"skill_count": "skills>filter_skill._bucket_count"
},
"script": "params.skill_count ==2"
}
}
}
}
}
}
Results
"aggregations" : {
"users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 1,
"doc_count" : 1,
"skills" : {
"doc_count" : 3,
"filter_skill" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "426",
"doc_count" : 1
},
{
"key" : "553",
"doc_count" : 1
}
]
}
}
},
{
"key" : 2,
"doc_count" : 1,
"skills" : {
"doc_count" : 2,
"filter_skill" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "426",
"doc_count" : 1
},
{
"key" : "553",
"doc_count" : 1
}
]
}
}
}
]
}

Finding sum of the "key" values in bucket aggregations in Elasticsearch

I have the following ES query:
GET database/_search
{
"from": 0,
"size": 0,
"query": {
"bool": {
"must": [
{
"nested": {
"query": {
"term": {
"colleges.institution_full_name": {
"value": "Academy of Sciences",
"boost": 1.0
}
}
},
"path": "colleges"
}
}
]
}
},
"_source": false,
"aggs": {
"publication_years": {
"terms": {
"field": "publication_year"
}
}
}
}
And I got the following response:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 232,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"publication_years" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2016,
"doc_count" : 119
},
{
"key" : 2017,
"doc_count" : 90
},
{
"key" : 2018,
"doc_count" : 22
},
{
"key" : 2019,
"doc_count" : 1
}
]
}
}
}
Now I want to calculate the average of the key values of publication years, i.e, average of 2016, 2017, 2018 & 2019. So how can I modify my ES query to get the average of publication years instead of getting every year individually. I tried using "avg" aggregation, but its also taking "doc_count" in consideration while calculating the average.
try it
POST database/_search
{
"size": 0,
"aggs": {
"groupByYear": {
"terms": {
"field": "publication_year"
},
"aggs": {
"avgYear": {
"avg": {
"field": "publication_year"
}
}
}
},
"avg_year": {
"avg_bucket": {
"buckets_path": "groupByYear>avgYear"
}
}
}
}
It's not clear what you want, do your want avg of 2016,2017,2018,2019?
it means you want 2017.5?

How to use composite aggregation with a single bucket

The following composite aggregation query
{
"query": {
"range": {
"orderedAt": {
"gte": 1591315200000,
"lte": 1591438881000
}
}
},
"size": 0,
"aggs": {
"my_buckets": {
"composite": {
"sources": [
{
"aggregation_target": {
"terms": {
"field": "supplierId"
}
}
}
]
},
"aggs": {
"aggregated_hits": {
"top_hits": {}
},
"filter": {
"bucket_selector": {
"buckets_path": {
"doc_count": "_count"
},
"script": "params.doc_count > 2"
}
}
}
}
}
}
returns something like below.
{
"took" : 67,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 34,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"my_buckets" : {
"after_key" : {
"aggregation_target" : "0HQI2G2HG00100G8"
},
"buckets" : [
{
"key" : {
"aggregation_target" : "0HQI2G0K000100G8"
},
"doc_count" : 4,
"aggregated_hits" : {...}
},
{
"key" : {
"aggregation_target" : "0HQI2G18G00100G8"
},
"doc_count" : 11,
"aggregated_hits" : {...}
},
{
"key" : {
"aggregation_target" : "0HQI2G2HG00100G8"
},
"doc_count" : 16,
"aggregated_hits" : {...}
}
]
}
}
}
The aggregated results are put into buckets based on the condition set in the query.
Is there any way to put them in a single bucket and paginate thought the whole result(i.e. 31 documents in this case)?
I don't think you can. A doc's context doesn't include information about other docs unless you perform a cardinality, scripted_metric or terms aggregation. Also, once you bucket your docs based on the supplierId, it'd sort of defeat the purpose of aggregating in the first place...
What you wrote above is as good as it gets and you'll have to combine the aggregated_hits within some post processing step.

Is it possible with aggregation to amalgamate all values of an array property from all grouped documents into the coalesced document?

I have documents with the format similar to the following:
[
{
"name": "fred",
"title": "engineer",
"division_id": 20
"skills": [
"walking",
"talking"
]
},
{
"name": "ed",
"title": "ticket-taker",
"division_id": 20
"skills": [
"smiling"
]
}
]
I would like to run an aggs query that would show the complete set of skills for the division: ie,
{
"aggs":{
"distinct_skills":{
"cardinality":{
"field":"division_id"
}
}
},
"_source":{
"includes":[
"division_id",
"skills"
]
}
}
.. so that the resulting hit would look like:
{
"division_id": 20,
"skills": [
"walking",
"talking",
"smiling"
]
}
I know I can retrieve inner_hits and iterate through the list and amalgamate values "manually". I assume it would perform better if I could do it a query.
Just pipe two Terms Aggregation queries as shown below:
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"my_division_ids": {
"terms": {
"field": "division_id",
"size": 10
},
"aggs": {
"my_skills": {
"terms": {
"field": "skills", <---- If it is not keyword field use `skills.keyword` field if using dynamic mapping.
"size": 10
}
}
}
}
}
}
Below is the sample response:
Response:
{
"took" : 490,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"my_division_ids" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 20, <---- division_id
"doc_count" : 2,
"my_skills" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ <---- Skills
{
"key" : "smiling",
"doc_count" : 1
},
{
"key" : "talking",
"doc_count" : 1
},
{
"key" : "walking",
"doc_count" : 1
}
]
}
}
]
}
}
}
Hope this helps!

filtering on 2 values of same field

I have a status field, which can have one of the following values,
I can filter for data which have status completed. I can also see data which has ongoing.
But I want to display the data which have status completed and ongoing at the same time.
But I don't know how to add filters for 2 values on a single field.
How can I achieve what I want ?
EDIT - Thanks for answers. But that is not what i wanted.
Like here I have filtered for status:completed, I want to filter for 2 values in this exact way.
I know I can edit this filter and , and use your queries, But I need a simple way to do this(query way is complex), as I have to show it to my marketing team and they don't have any idea about queries. I need to convince them.
If I understand your question correctly, you want to perform an aggregation on 2 values of a field.
This should be possible with a query similar to this one with a terms query:
{
"size" : 0,
"query" : {
"bool" : {
"must" : [ {
"terms" : {
"status" : [ "completed", "unpaid" ]
}
} ]
}
},
"aggs" : {
"freqs" : {
"terms" : {
"field" : "status"
}
}
}
}
This will give a result like this one:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 3,
"successful" : 3,
"failed" : 0
},
"hits" : {
"total" : 5,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"freqs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ {
"key" : "unpaid",
"doc_count" : 4
}, {
"key" : "completed",
"doc_count" : 1
} ]
}
}
}
Here is my toy mapping definition:
{
"bookings" : {
"properties" : {
"status" : {
"type" : "keyword"
}
}
}
}
You need a filter in aggregation.
{
"size": 0,
"aggs": {
"agg_name": {
"filter": {
"bool": {
"should": [
{
"terms": {
"status": [
"completed",
"ongoing"
]
}
}
]
}
}
}
}
}
Use the above query to get results like this:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 8,
"max_score": 0,
"hits": []
},
"aggregations": {
"agg_name": {
"doc_count": 6
}
}
}
The result what you want is the doc_count
For your reference bool query in elasticsearch, should it's like OR conditions,
{
"query":{
"bool":{
"should":[
{"must":{"status":"completed"}},
{"must":{"status":"ongoing"}}
]
}
},
"aggs" : {
"booking_status" : {
"terms" : {
"field" : "status"
}
}
}
}

Resources