how to get buckets count in elasticsearch aggregations? - elasticsearch

I'm trying to get how many buckets on an aggregation in specific datetime range,
{
"size": 0,
"aggs": {
"filtered_aggs": {
"filter": {
"range": {
"datetime": {
"gte": "2017-03-01T00:00:00.000Z",
"lte": "2017-06-01T00:00:00.000Z"
}
}
},
"aggs": {
"addr": {
"terms": {
"field": "region",
"size": 10000
}
}
}
}
}
}
output:
"took" : 317,
"timed_out" : false,
"num_reduce_phases" : 3,
"_shards" : {
"total" : 1118,
"successful" : 1118,
"failed" : 0
},
"hits" : {
"total" : 1899658551,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"filtered_aggs" : {
"doc_count" : 88,
"addr" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "NY",
"doc_count" : 36
},
{
"key" : "CA",
"doc_count" : 13
},
{
"key" : "JS",
"doc_count" : 7
..........
Is there a way to return both requests (buckets + total bucket count) in one search?
I'm using Elasticsearch 5.5.0
Can I get all of them?

Related

How to get word count in docs as a aggregate over time in elastic search?

I am trying to get word count trends in docs as aggregate result . Although using the following approach I am able to get the doc count aggregation result but I am not able to find any resources using which I can get word count for the month of jan , feb & mar
PUT test/_doc/1
{
"description" : "one two three four",
"month" : "jan"
}
PUT test/_doc/2
{
"description" : "one one test test test",
"month" : "feb"
}
PUT test/_doc/3
{
"description" : "one one one test",
"month" : "mar"
}
GET test/_search
{
"size": 0,
"query": {
"match": {
"description": {
"query": "one"
}
}
},
"aggs": {
"monthly_count": {
"terms": {
"field": "month.keyword"
}
}
}
}
OUTPUT
{
"took" : 706,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"monthly_count" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "feb",
"doc_count" : 1
},
{
"key" : "jan",
"doc_count" : 1
},
{
"key" : "mar",
"doc_count" : 1
}
]
}
}
}
EXPECTED WORD COUNT OVER MONTH
"aggregations" : {
"monthly_count" : {
"buckets" : [
{
"key" : "feb",
"word_count" : 2
},
{
"key" : "jan",
"word_count" : 1
},
{
"key" : "mar",
"word_count" : 3
}
]
}
}
Maybe this query can help you:
GET test/_search
{
"size": 0,
"aggs": {
"monthly_count": {
"terms": {
"field": "month.keyword"
},
"aggs": {
"count_word_one": {
"terms": {
"script": {
"source": """
def str = doc['description.keyword'].value;
def array = str.splitOnToken(' ');
int i = 0;
for (item in array) {
if(item == 'one'){
i++
}
}
return i;
"""
},
"size": 10
}
}
}
}
}
}
Response:
"aggregations" : {
"monthly_count" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "feb",
"doc_count" : 1,
"count_word_one" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "2",
"doc_count" : 1
}
]
}
},
{
"key" : "jan",
"doc_count" : 1,
"count_word_one" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1",
"doc_count" : 1
}
]
}
},
{
"key" : "mar",
"doc_count" : 1,
"count_word_one" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "3",
"doc_count" : 1
}
]
}
}
]
}
}

Finding sum of the "key" values in bucket aggregations in Elasticsearch

I have the following ES query:
GET database/_search
{
"from": 0,
"size": 0,
"query": {
"bool": {
"must": [
{
"nested": {
"query": {
"term": {
"colleges.institution_full_name": {
"value": "Academy of Sciences",
"boost": 1.0
}
}
},
"path": "colleges"
}
}
]
}
},
"_source": false,
"aggs": {
"publication_years": {
"terms": {
"field": "publication_year"
}
}
}
}
And I got the following response:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 232,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"publication_years" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2016,
"doc_count" : 119
},
{
"key" : 2017,
"doc_count" : 90
},
{
"key" : 2018,
"doc_count" : 22
},
{
"key" : 2019,
"doc_count" : 1
}
]
}
}
}
Now I want to calculate the average of the key values of publication years, i.e, average of 2016, 2017, 2018 & 2019. So how can I modify my ES query to get the average of publication years instead of getting every year individually. I tried using "avg" aggregation, but its also taking "doc_count" in consideration while calculating the average.
try it
POST database/_search
{
"size": 0,
"aggs": {
"groupByYear": {
"terms": {
"field": "publication_year"
},
"aggs": {
"avgYear": {
"avg": {
"field": "publication_year"
}
}
}
},
"avg_year": {
"avg_bucket": {
"buckets_path": "groupByYear>avgYear"
}
}
}
}
It's not clear what you want, do your want avg of 2016,2017,2018,2019?
it means you want 2017.5?

Get an aggregate count in elasticsearch based on particular uniqueid field

I have created an index and indexed the document in elasticsearch it's working fine but here the challenge is i have to get an aggregate count of category field based on uniqueid i have given my sample documents below.
{
"UserID":"A1001",
"Category":"initiated",
"policyno":"5221"
},
{
"UserID":"A1001",
"Category":"pending",
"policyno":"5222"
},
{
"UserID":"A1001",
"Category":"pending",
"policyno":"5223"
},
{
"UserID":"A1002",
"Category":"completed",
"policyno":"5224"
}
**Sample output for UserID - "A1001"**
initiated-1
pending-2
**Sample output for UserID - "A1002"**
completed-1
How to get the aggregate count from above given Json documents like the sample output mentioned above
I suggest a terms aggregation as shown in the following:
{
"size": 0,
"aggs": {
"By_ID": {
"terms": {
"field": "UserID.keyword"
},
"aggs": {
"By_Category": {
"terms": {
"field": "Category.keyword"
}
}
}
}
}
}
Here is a snippet of the response:
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"By_ID" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "A1001",
"doc_count" : 3,
"By_Category" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "pending",
"doc_count" : 2
},
{
"key" : "initiated",
"doc_count" : 1
}
]
}
},
{
"key" : "A1002",
"doc_count" : 1,
"By_Category" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "completed",
"doc_count" : 1
}
]
}
}
]
}
}

Is it possible with aggregation to amalgamate all values of an array property from all grouped documents into the coalesced document?

I have documents with the format similar to the following:
[
{
"name": "fred",
"title": "engineer",
"division_id": 20
"skills": [
"walking",
"talking"
]
},
{
"name": "ed",
"title": "ticket-taker",
"division_id": 20
"skills": [
"smiling"
]
}
]
I would like to run an aggs query that would show the complete set of skills for the division: ie,
{
"aggs":{
"distinct_skills":{
"cardinality":{
"field":"division_id"
}
}
},
"_source":{
"includes":[
"division_id",
"skills"
]
}
}
.. so that the resulting hit would look like:
{
"division_id": 20,
"skills": [
"walking",
"talking",
"smiling"
]
}
I know I can retrieve inner_hits and iterate through the list and amalgamate values "manually". I assume it would perform better if I could do it a query.
Just pipe two Terms Aggregation queries as shown below:
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"my_division_ids": {
"terms": {
"field": "division_id",
"size": 10
},
"aggs": {
"my_skills": {
"terms": {
"field": "skills", <---- If it is not keyword field use `skills.keyword` field if using dynamic mapping.
"size": 10
}
}
}
}
}
}
Below is the sample response:
Response:
{
"took" : 490,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"my_division_ids" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 20, <---- division_id
"doc_count" : 2,
"my_skills" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ <---- Skills
{
"key" : "smiling",
"doc_count" : 1
},
{
"key" : "talking",
"doc_count" : 1
},
{
"key" : "walking",
"doc_count" : 1
}
]
}
}
]
}
}
}
Hope this helps!

Select aggregations based on sub aggregation results doc count

I am aiming to only select those aggregations that have min_doc_count match defined in sub aggregations. Not sure if it is possible.
Basically I want to select only those buckets that have propertyid belonging to a particular import.
Here is my query.
GET properties/_search
{
"size": 0,
"query": {
"terms": {
"Agency_Id": [
"16"
]
}
},
"aggregations": {
"property_id": {
"terms": {
"field": "PropertyId",
"min_doc_count": 2,
"size": 10000
},
"aggregations": {
"import_filter": {
"filter": {
"term": {
"Import_Id": "90040"
}
},
"aggregations": {
"import_id": {
"terms": {
"field": "Import_Id",
"min_doc_count": 1,
"size": 10000
}
}
}
}
}
}
}
}
Actual result
{
"took" : 16,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1163,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"property_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "011162330",
"doc_count" : 2,
"import_filter" : {
"doc_count" : 1,
"import_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 90040,
"doc_count" : 1
}
]
}
}
},
{
"key" : "6065590",
"doc_count" : 2,
"import_filter" : {
"doc_count" : 1,
"import_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 90040,
"doc_count" : 1
}
]
}
}
},
{
"key" : "6289352",
"doc_count" : 2,
"import_filter" : {
"doc_count" : 1,
"import_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 90040,
"doc_count" : 1
}
]
}
}
},
{
"key" : "gd-00-022386",
"doc_count" : 2,
"import_filter" : {
"doc_count" : 0,
"import_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ ]
}
}
}
]
}
}
}
Expected
{
"took" : 16,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1163,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"property_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "011162330",
"doc_count" : 2,
"import_filter" : {
"doc_count" : 1,
"import_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 90040,
"doc_count" : 1
}
]
}
}
},
{
"key" : "6065590",
"doc_count" : 2,
"import_filter" : {
"doc_count" : 1,
"import_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 90040,
"doc_count" : 1
}
]
}
}
},
{
"key" : "6289352",
"doc_count" : 2,
"import_filter" : {
"doc_count" : 1,
"import_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 90040,
"doc_count" : 1
}
]
}
}
}
]
}
}
}
Based on my understanding of your query, you need Bucket selector aggregation
Query:
GET properties/_search
{
"size": 0,
"query": {
"terms": {
"Agency_Id": [
"16"
]
}
},
"aggregations": {
"property_id": {
"terms": {
"field": "PropertyId",
"min_doc_count": 2,
"size": 10000
},
"aggregations": {
"import_filter": {
"filter": {
"term": {
"Import_Id": "90040"
}
},
"aggregations": {
"import_id": {
"terms": {
"field": "Import_Id",
"min_doc_count": 1,
"size": 10000
}
}
}
},
"mybucket_selector": { ---> select product bucket if import bucket has any value
"bucket_selector": {
"buckets_path": {
"FinalCount": "import_filter>import_id._bucket_count"
},
"script": "params.FinalCount>0"
}
}
}
}
}
}

Resources