Elasticsearch get average - elasticsearch

I'm trying to average aggregate data on elasticsearch. This is the structure of my data:
document 1
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":2
},
{
"name":"topic_02",
"valore":4
}
]
}
document 2
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":4
},
{
"name":"topic_02",
"valore":8
}
]
}
I have to create an aggregation by groupId and by topic name and on this aggregation calculate the average of the value field. But trying with the source code the result of the obtained average is wrong.
With the above data of documents one and two the expected result should be:
groupId
topicName
average
TEST_01
topic_01
3
TEST_01
topic_02
6
TermsAggregationBuilder aggregation = AggregationBuilders
.terms("groupId")
.field("groupId.keyword")
.subAggregation(AggregationBuilders
.terms("topicName")
.field("topics.name.keyword").subAggregation(AggregationBuilders
.avg("avg").field("topics.valore")));

First of all make sure you topics field is type "nested", because if it is "object" the topicName and valores will be flattened. This mean you will end up with a set of valores and topicNames without relation between them.
Mappings
{
"test_ynsanity" : {
"mappings" : {
"properties" : {
"detectionDate" : {
"type" : "date"
},
"groupId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"lag" : {
"type" : "long"
},
"tipo" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"topics" : {
"type" : "nested",
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"valore" : {
"type" : "long"
}
}
}
}
}
}
}
Ingesting data
POST test_ynsanity/_doc
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":2
},
{
"name":"topic_02",
"valore":4
}
]
}
POST test_ynsanity/_doc
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":4
},
{
"name":"topic_02",
"valore":8
}
]
}
Query
POST test_ynsanity/_search
{
"size": 0,
"aggs": {
"groups": {
"terms": {
"field": "groupId.keyword",
"size": 10
},
"aggs": {
"topics": {
"nested": {
"path": "topics"
},
"aggs": {
"topic_names": {
"terms": {
"field": "topics.name.keyword"
},
"aggs": {
"topic_avg": {
"avg": {
"field": "topics.valore"
}
}
}
}
}
}
}
}
}
}
Response
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"groups" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "TEST_01",
"doc_count" : 2,
"topics" : {
"doc_count" : 4,
"topic_names" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "topic_01",
"doc_count" : 2,
"NAME" : {
"value" : 3.0
}
},
{
"key" : "topic_02",
"doc_count" : 2,
"NAME" : {
"value" : 6.0
}
}
]
}
}
}
]
}
}
}
I have no access to the Java DSL right now, but the query should look something like this:
TermsAggregationBuilder aggregation = AggregationBuilders
.terms("groupId")
.field("groupId.keyword")
.subAggregation(AggregationBuilders
.nested("agg", "topics")
.terms("topic_names")
.field("topics.name.keyword").subAggregation(AggregationBuilders
.avg("avg").field("topics.valore")));

Related

ElasticSearch - how to get aggregation of aggregation

I am very new to elasticsearch
I work for a dating website that has data as follows:
Single - with fields: name, signUpDate, state, and other data fields.
Encounter - with fields: state, encounterDate, singlesInvolved, and other data fields.
These are my 2 indexes
Now I have to write a query that returns as follows:
For every state, how many singles, how many encounters, the longest time a single has been part of our website, and the average time a single has been part of our website
And also return one result that is that same average for all states
Like this example:
[
{ //this one is the average of all states
"singles": 45,
"dates": 18,
"minWaitingTime": 1644677979530,
"avgWaitingTime": 15603
},
{ //these are the averages of each state
"state": "MA",
"singles": 50,
"dates": 23,
"minWaitingTime": 1644677979530,
"avgWaitingTime": 15603
},
{
"state": "NY",
"singles": 39,
"dates": 13,
"minWaitingTime": 1644850558872,
"avgWaitingTime": 6033
}
]
I've been working on the query for each state individually but i dont know how to get an average of all states
so far what i have is this:
GET /single,encounter/_search
{
"size": 0,
"aggs": {
"bystate": {
"terms": {
"field": "state",
"size": 59
},
"aggs": {
"group-by-index": {
"terms": {
"field": "_index"
}
},
"min_date": {
"min": {
"field": "signedUpAt"
}
},
"avg_date": {
"avg": {
"field": "signedUpAt"
}
}
}
}
}
}
I don't know if there is a better way to do this, likewise I don't know how to calculate the average (singles, encounters, min_date and average_date average) for all states using this result
Every result of the previous query looks like this:
{
"key" : "MA",
"doc_count" : 164,
"avg_date" : {
"value" : 1.6457900076508965E12,
"value_as_string" : "2022-02-25T11:53:27.650"
},
"min_date" : {
"value" : 1.64467797953E12,
"value_as_string" : "2022-02-12T14:59:39.530"
},
"group-by-index" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "single",
"doc_count" : 135
},
{
"key" : "encounter",
"doc_count" : 29
}
]
}
},
I would really appreciate help on this one
Addition: index mapping.
Encounter:
{
"encounter" : {
"aliases" : { },
"mappings" : {
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"avgAge" : {
"type" : "integer",
"index" : false,
"doc_values" : false
},
"application" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"createdAt" : {
"type" : "date",
"format" : "date_hour_minute_second_millis"
},
"encounterId" : {
"type" : "keyword"
},
"locationType" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"singleOneId" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"singleTwoId" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"serviceLine" : {
"type" : "keyword"
},
"state" : {
"type" : "keyword"
},
"rating" : {
"type" : "keyword"
}
}
},
"settings" : {
"index" : {
"refresh_interval" : "1s",
"number_of_shards" : "1",
"provided_name" : "encounter",
"creation_date" : "1643704661932",
"number_of_replicas" : "1",
"uuid" : "MliXQL_bRBKDN7_d8G_BYw",
"version" : {
"created" : "7100299"
}
}
}
}
}
And Single:
{
"single" : {
"aliases" : { },
"mappings" : {
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"id" : {
"type" : "keyword"
},
"singleId" : {
"type" : "keyword"
},
"state" : {
"type" : "keyword"
},
"preferedGender" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"refresh_interval" : "1s",
"number_of_shards" : "1",
"provided_name" : "single",
"creation_date" : "1643704662136",
"number_of_replicas" : "1",
"uuid" : "Js_tqZfRRx-IxbjVRRN4wQ",
"version" : {
"created" : "7100299"
}
}
}
}
}
You can use avg bucket aggregation, where you can provide bucket_path and based on value it will calculate avg of entire aggregation.
Below is sample query:
{
"size": 0,
"aggs": {
"bystate": {
"terms": {
"field": "state",
"size": 59
},
"aggs": {
"group-by-index": {
"terms": {
"field": "_index"
}
},
"min_date": {
"min": {
"field": "signedUpAt"
}
},
"avg_date": {
"avg": {
"field": "signedUpAt"
}
}
}
},
"avg_all_state": {
"avg_bucket": {
"buckets_path": "bystate>avg_date"
}
}
}
}

Elasticsearch Aggregation not giving desirable otput

I have an object, which contains mills of drugs given to a patient.
More than one drug can be administered to a patient.
I am trying to sum the total individual mills of drugs administered to a patient within a specified time.
Here is a sample of my Object.
{
"_uid" : "953a4af9901847c3b206dac7cee5b298",
"_fullName" : "Test Patient",
"_created": "2021-12-18 22:48:45",
"_treatment" : {
"_created" : "2021-12-18 22:48:45",
"_drugs" : [
{
"_name" : "Another Tablet",
"_uid" : "5a09f6a9c415465a84a8661f35ac621d",
"_mils" : "500"
},
{
"_name" : "Test Drug",
"_uid" : "36c7fcf048c743078ca4c80d187d86c9",
"_mils" : "300"
}
]
}
}
In Kibana, i did the following
{
"query": {
"bool": {
"filter": {
"range": {
"_created": {
"gte": "2021-01-01 00:00:00",
"lte": "2021-12-31 00:00:00"
}
}
}
}
},
"size": 0,
"aggs" : {
"men" : {
"terms": {
"field": "_treatment._drugs._name.keyword"
},
"aggs": {
"milsUsed": { "sum": { "field": "_treatment._drugs._mils" } }
}
}
}
}
Presently kibana is adding all the mills together and not separating them. Below is the response from Kibana.
"aggregations" : {
"men" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Another Tablet",
"doc_count" : 2,
"milsUsed" : {
"value" : 1100.0
}
},
{
"key" : "Test Drug",
"doc_count" : 2,
"milsUsed" : {
"value" : 1100.0
}
}
]
}
}
Expected response i am looking to get
"aggregations" : {
"men" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Another Tablet",
"doc_count" : 1,
"milsUsed" : {
"value" : 500.0
}
},
{
"key" : "Test Drug",
"doc_count" : 1,
"milsUsed" : {
"value" : 300.0
}
}
]
}
}
Index mapping
{
"patients" : {
"mappings" : {
"properties" : {
"_fullName" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"_treatment" : {
"properties": {
"_drugs": {
"properties": {
"_mils" : {
"type" : "long"
},
"_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},,
"_uid" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
}
}
}
}
}
}
}
}
TLDR;
Have you heard about nested fields in elastic search ?
Internally Elastic search flatten nested object in your documents.
So if you have
{
"group" : "fans",
"user" : [
{
"first" : "John",
"last" : "Smith"
},
{
"first" : "Alice",
"last" : "White"
}
]
}
The internal representation of the json documents in the index will be
{
"group" : "fans",
"user.first" : [ "alice", "john" ],
"user.last" : [ "smith", "white" ]
}
In you case when you perform the aggregation it does the same. And all of a sudden, because of the flattening operation. You lose the "relationship" between _drugs._name et _drugs._mils
Below is a pet project that solve you use example.
Example
Set Up
PUT /so_agg_sum_drugs/
{
"mappings": {
"properties": {
"_fullName": {
"type": "keyword"
},
"_treatment": {
"properties": {
"_drugs": {
"type": "nested", <- nested field type !!
"properties": {
"_mils": {
"type": "long"
},
"_name": {
"type": "keyword"
},
"_uid": {
"type": "keyword"
}
}
}
}
}
}
}
}
POST /so_agg_sum_drugs/_doc
{
"_fullName" : "Test Patient",
"_treatment" : {
"_drugs" : [
{
"_name" : "Another Tablet",
"_uid" : "5a09f6a9c415465a84a8661f35ac621d",
"_mils" : "500"
},
{
"_name" : "Test Drug",
"_uid" : "36c7fcf048c743078ca4c80d187d86c9",
"_mils" : "300"
}
]
}
}
POST /so_agg_sum_drugs/_doc
{
"_fullName" : "Test Patient 2",
"_treatment" : {
"_drugs" : [
{
"_name" : "Another Tablet",
"_uid" : "5a09f6a9c415465a84a8661f35ac621d",
"_mils" : "500"
},
{
"_name" : "Test Drug",
"_uid" : "36c7fcf048c743078ca4c80d187d86c9",
"_mils" : "400"
},
{
"_name" : "Test Drug",
"_uid" : "36c7fcf048c743078ca4c80d187d86c9",
"_mils" : "300"
}
]
}
}
Solution
Your aggregation was mostly right, except for the nested field type. You can find some documentation about aggregation on nested fields here. [doc]
GET /so_agg_sum_drugs/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"patients": {
"terms": {
"field": "_fullName"
},
"aggs": {
"drugs": {
"nested": {
"path": "_treatment._drugs". <- wrap you agg on the drugs objects in a nested type agg.
},
"aggs": {
"per_drug": {
"terms": {
"field": "_treatment._drugs._name"
},
"aggs": {
"quantity": {
"sum": {
"field": "_treatment._drugs._mils"
}
}
}
}
}
}
}
}
}
}
{
"took" : 350,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"patients" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Test Patient",
"doc_count" : 1,
"drugs" : {
"doc_count" : 2,
"per_drug" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Another Tablet",
"doc_count" : 1,
"quantity" : {
"value" : 500.0
}
},
{
"key" : "Test Drug",
"doc_count" : 1,
"quantity" : {
"value" : 300.0
}
}
]
}
}
},
{
"key" : "Test Patient 2",
"doc_count" : 1,
"drugs" : {
"doc_count" : 3,
"per_drug" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Test Drug",
"doc_count" : 2,
"quantity" : {
"value" : 700.0
}
},
{
"key" : "Another Tablet",
"doc_count" : 1,
"quantity" : {
"value" : 500.0
}
}
]
}
}
}
]
}
}
}

How to return hit term in ES ?

I try to return only the terms that were successfully hit instead of the document itself, but I don’t know how to achieve the desired effect。
"es_episode" : {
"aliases" : { },
"mappings" : {
"properties" : {
"endTime" : {
"type" : "long"
},
"episodeId" : {
"type" : "long"
},
"startTime" : {
"type" : "long"
},
"studentIds" : {
"type" : "long"
}
}
}
This is an example:
{
"episodeId":124,
"startTime":10,
"endTime":20,
"studentIds":[200,300]
}
My query:
GET /es_episode/_search
{
"_source": ["studentIds"],
"query": {
"terms": {
"studentIds": [300,400]
}
}
}
The result is
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "es_episode",
"_type" : "episode",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"studentIds" : [
200,
300
]
}
}
]
}
But in fact I only want to know which term hits. For example, the result I want should be studentIds=[300] instead of all studentIds=[200,300] of the returned document. It seems that some additional operations are required, but I don’t know
how.
I try to achieve my goal with the following query
GET /es_episode/_search
{
"_source": ["studentIds"],
"query": {
"terms": {
"studentIds": [300,400]
}
},
"aggs": {
"student_id": {
"terms": {
"field": "studentIds",
"size": 10
},
"aggs": {
"id": {
"terms": {
"field": "episodeId"
}
},
"id_select":{
"bucket_selector": {
"buckets_path": {
"key" : "_key"
},
"script": "params.key==300 || params.key==400"
}
}
}
}
}
}
the result for this is
"aggregations" : {
"student_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 300,
"doc_count" : 1,
"id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 124,
"doc_count" : 1
}
]
}
}
]
}
}
It seems that I successfully filtered out the terms I don’t want, but this doesn’t look pretty, and I need to set my parameters repeatedly in the script

How to select the last bucket in a date_histogram selector in Elasticsearch

I have a date_histogram and I can use max_bucket to get the bucket with the greatest value, but I want to select the last bucket (i.e. the bucket with the highest timestamp).
Using max_bucket to get the greatest value works OK, but I don't know what to put in the buckets_path to get the last bucket.
My mapping:
{
"ee-2020-02-28" : {
"mappings" : {
"dynamic" : "strict",
"properties" : {
"date" : {
"type" : "date"
},
"frequency" : {
"type" : "long"
},
"keyword" : {
"type" : "keyword"
},
"text" : {
"type" : "text"
}
}
}
}
}
My working query, which returns the bucket for the day with higher frequency (it's named last_day because this is a WIP query to get to my goal):
{
"query": {
"range": {
"date": { /* Start away from the begining of data, so the rolling avg is full */
"gte": "2019-02-18"/*,
"lte": "2020-12-14"*/
}
}
},
"aggs": {
"palabrejas": {
"terms": {
"field": "keyword",
"size": 100
},
"aggs": {
"nnndiario": {
"date_histogram": {
"field": "date",
"calendar_interval": "day"
},
"aggs": {
"dailyfreq": {
"sum": {
"field": "frequency"
}
}
}
},
"ventanuco": {
"avg_bucket": {
"buckets_path": "nnndiario>dailyfreq",
"gap_policy": "insert_zeros"
}
},
"last_day": {
"max_bucket": {
"buckets_path": "nnndiario>dailyfreq"
}
}
}
}
}
}
Its output (notice I replaced long parts with [...]):
{
"aggregations" : {
"palabrejas" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "rama0",
"doc_count" : 20400,
"nnndiario" : {
"buckets" : [
{
"key_as_string" : "2020-01-01T00:00:00.000Z",
"key" : 1577836800000,
"doc_count" : 600,
"dailyfreq" : {
"value" : 3000.0
}
},
{
"key_as_string" : "2020-01-02T00:00:00.000Z",
"key" : 1577923200000,
"doc_count" : 600,
"dailyfreq" : {
"value" : 3000.0
}
},
{
"key_as_string" : "2020-01-03T00:00:00.000Z",
"key" : 1578009600000,
"doc_count" : 600,
"dailyfreq" : {
"value" : 3000.0
}
},
[...]
{
"key_as_string" : "2020-01-31T00:00:00.000Z",
"key" : 1580428800000,
"doc_count" : 600,
"dailyfreq" : {
"value" : 3000.0
}
}
]
},
"ventanuco" : {
"value" : 3290.3225806451615
},
"last_day" : {
"value" : 12000.0,
"keys" : [
"2020-01-13T00:00:00.000Z"
]
}
},
{
"key" : "rama1",
"doc_count" : 20400,
"nnndiario" : {
"buckets" : [
{
"key_as_string" : "2020-01-01T00:00:00.000Z",
"key" : 1577836800000,
"doc_count" : 600,
"dailyfreq" : {
"value" : 3000.0
}
},
[...]
]
},
"ventanuco" : {
"value" : 3290.3225806451615
},
"last_day" : {
"value" : 12000.0,
"keys" : [
"2020-01-13T00:00:00.000Z"
]
}
},
[...]
}
]
}
}
}
I don't know what to put in last_day's buckets_path to obtain the last bucket.
You might consider using a terms aggregation instead of a date_histogram-aggregation:
"max_date_bucket_agg": {
"terms": {
"field": "date",
"size": 1,
"order": {"_key": "desc"}
}
}
An issue might be the granularity of your data, you may consider storing the date-value of the expected granularity (e.g. day) in a separate field and use that field in the terms-aggregation.

multiple fields in aggs elastic query

i have elastic mapped as
"mappings": {
"keywords": {
"properties": {
"Keyword": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"KeywordType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
I trying to retrieve two fields keywords and its keyword type.
{
"query": {
"bool": {
"must": [{
"match": {
"Keyword": TEXT_REQ
}
}]
}
},
"aggs": {
"keywords": {
"terms": {
"field":"Keyword.keyword",
"size": 500
}
}
}
}
It returns all the keywords that are present in the text. I want the keywordtype also along with it, i tried with multiple value scores
{aggs:{
"keywords":{"terms":{"field":"Keyword.keyword"}},
"keywordtype":{"terms":{"field":"KeywordType.keyword"}}
}}
but i don't get the corresponding keywordtype for the keyword. I got the overall keywordtypes present.
{... "aggregations":{"keywords":{... "buckets":[ {"key": "management"}]},
"keywordtype":{... "buckets":[{"key":"Tools"}, {"key":"technology"}]}
i need output to be
bucket:[{"keyword":"management", keywordtype:"Tools"}]
how to modify the elastic query ?
You can use either of the below queries:
Solution 1: Using Composite Aggregation:
You can make use of the below Composite Aggregation as you mentioned that you would want to group the Keyword and KeywordType
Aggregation Query:
POST <your_index_name>/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"Keyword": "TEXT_REQ"
}
}
]
}
},
"aggs" : {
"my_buckets": {
"composite" : {
"sources" : [
{ "keyword": { "terms" : { "field": "Keyword.keyword" } } },
{ "keywordType": { "terms" : { "field": "KeywordType.keyword" } } }
]
}
}
}
}
Sample Response:
{
"took" : 40,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"my_buckets" : {
"after_key" : {
"keyword" : "TEXT_REQ",
"keywordType" : "TEXT_REQ_Type3"
},
"buckets" : [ <----- Required Results Start
{
"key" : {
"keyword" : "TEXT_REQ",
"keywordType" : "TEXT_REQ_Type1"
},
"doc_count" : 1
},
{
"key" : {
"keyword" : "TEXT_REQ",
"keywordType" : "TEXT_REQ_Type2"
},
"doc_count" : 2
},
{
"key" : {
"keyword" : "TEXT_REQ",
"keywordType" : "TEXT_REQ_Type3"
},
"doc_count" : 1
}
] <----- Required Results End
}
}
}
Solution 2: Using Terms Aggregation
Using Terms Aggregation, I've constructed parent-child(parent being Keyword and child being KeywordType) which would be in below tree structure.
Bool Query
Terms Aggregation on Keyword.keyword
- Terms Aggregation on KeywordType.keyword
Aggregation Query:
POST <your_index_name>/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"Keyword": "TEXT_REQ"
}
}
]
}
},
"aggs": {
"mykeywords": {
"terms": {
"field": "Keyword.keyword",
"size": 10
},
"aggs": {
"mytypes": {
"terms": {
"field": "KeywordType.keyword",
"size": 10
}
}
}
}
}
}
Sample Response:
{
"took" : 97,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 4,
"max_score" : 0.0,
"hits" : [ ]
},
"aggregations" : {
"mykeywords" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "TEXT_REQ", <----- Parent Value i.e Keyword
"doc_count" : 4,
"mytypes" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ <----- Children i.e. KeywordType
{
"key" : "TEXT_REQ_Type2",
"doc_count" : 2
},
{
"key" : "TEXT_REQ_Type1",
"doc_count" : 1
},
{
"key" : "TEXT_REQ_Type3",
"doc_count" : 1
}
]
}
}
]
}
}
}
Let me know if this helps!

Resources