Multiple aggregation in single query on Elasticsearch - elasticsearch

I have log data in the Elasticsearch index.
`"hits": [
{
"_index": "event_log",
"_type": "log_type",
"_id": "2-d-kmoBazYRVz7KCQIj",
"_score": 1,
"_source": {
"user_id": 123,
"event": "click",
"category": "abc",
"product_id": 1112,
"bkt": "A"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "3ed-kmoBazYRVz7KCQLX",
"_score": 1,
"_source": {
"user_id": 456,
"event": "click",
"category": "abc",
"product_id": 112,
"bkt": "A"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "3ud-kmoBazYRVz7KCgIy",
"_score": 1,
"_source": {
"user_id": 1234,
"event": "click",
"category": "abc",
"product_id": 1112,
"bkt": "B"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "4Od-kmoBazYRVz7KCgLr",
"_score": 1,
"_source": {
"user_id": 4567,
"event": "click",
"category": "xyz",
"product_id": 1118,
"bkt": "B"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "4ud-kmoBazYRVz7KkwL2",
"_score": 1,
"_source": {
"user_id": 123,
"event": "cart",
"category": "xyz",
"product_id": 1,
"bkt": "A"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "2ud-kmoBazYRVz7KCALB",
"_score": 1,
"_source": {
"user_id": 123,
"event": "cart",
"category": "xyz",
"product_id": 11,
"bkt": "A"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "3-d-kmoBazYRVz7KCgKP",
"_score": 1,
"_source": {
"user_id": 4567,
"event": "click",
"category": "abc",
"product_id": 111,
"bkt": "B"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "3Od-kmoBazYRVz7KCQJ8",
"_score": 1,
"_source": {
"user_id": 456,
"event": "click",
"category": "abc",
"product_id": 111,
"bkt": "A"
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "4ed-kmoBazYRVz7KCwJH",
"_score": 1,
"_source": {
"user_id": 4567,
"event": "click",
"category": "xyz",
"product_id": 1128,
"bkt": "B"
}
}
]}
I want to get the aggregation by category, bkt, event. As well as I want to aggregate user_id by category, bkt. I have two separate queries for that
Count of record aggregated by category, bkt, event.
GET event_log/_search
{"size" : 0,
"aggs": {
"category_id": {
"terms": { "field": "category.keyword" },
"aggs": {
"ab_bucket": {
"terms": { "field": "bkt.keyword" },
"aggs": {
"event_type": {
"terms": { "field": "event.keyword" }
}
}
}
}
}
}
}
The result is
"aggregations": {
"category_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "abc",
"doc_count": 5,
"ab_bucket": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "A",
"doc_count": 3,
"event_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "click",
"doc_count": 3
}
]
}
},
{
"key": "B",
"doc_count": 2,
"event_type": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "click",
"doc_count": 2
}
]
}
}
]
}
},
Users aggregated by category, bkt.
GET event_log/_search
{"size" : 0,
"aggs": {
"category_id": {
"terms": { "field": "category.keyword" },
"aggs": {
"ab_bucket": {
"terms": { "field": "bkt.keyword" },
"aggs": {
"total_uniq_users" : {
"cardinality": {
"field" : "user_id"
}
}
}
}
}
}
}
}
The result is
"aggregations": {
"category_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "abc",
"doc_count": 5,
"ab_bucket": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "A",
"doc_count": 3,
"total_uniq_users": {
"value": 2
}
},
{
"key": "B",
"doc_count": 2,
"total_uniq_users": {
"value": 2
}
}
]
}
},
Is there a way to combine both the queries and obtain the expected result as a single result

Yes, you can do it like this:
GET event_log/_search
{
"size": 0,
"aggs": {
"category_id": {
"terms": {
"field": "category.keyword"
},
"aggs": {
"ab_bucket": {
"terms": {
"field": "bkt.keyword"
},
"aggs": {
"total_uniq_users": {
"cardinality": {
"field": "user_id"
}
},
"event_type": {
"terms": {
"field": "event.keyword"
}
}
}
}
}
}
}
}

Related

Return Multi-Term Distinct Values

Within an Elastic Search index I am attempting to query by 2 distinct top-level field values from field companyName and field productName, ordered by a generatedDate field and include the domainModelId field.
The following SQL query shows the results of all existing values and I've high-lighted the two unique document rows (in this case) by generatedDate;
{
"query": "SELECT companyName, productName, generatedDate FROM nextware_domain_metaservices_domainmodel ORDER BY generatedDate DESC"
}
response as follows:
I tried the following
{
"size":0,
"aggs":
{
"companies":
{
"terms":
{
"field": "companyName.keyword"
},
"aggs":
{
"products":
{
"terms":
{
"field": "productName.keyword"
}
}
}
}
}
}
This returns the correct buckets as follows;
"aggregations": {
"companies": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "NextWare",
"doc_count": 18,
"products": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "ProductPortal",
"doc_count": 16
},
{
"key": "Domain",
"doc_count": 2
}
]
}
}
]
}
}
How can I include the value of domainModelId.Id field without a second query?
To include the value of domainModelId.Id, you need to use top_hits aggregation
Adding a working example with index data, search query, and search result
Index Data:
{
"companyName":"NextWare",
"productName":"Domain",
"domainModelId.Id":"i"
}
{
"companyName":"NextWare",
"productName":"Domain",
"domainModelId.Id":"c"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"a"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"b"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"d"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"e"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"f"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"g"
}
{
"companyName":"NextWare",
"productName":"ProductPortal",
"domainModelId.Id":"h"
}
Search Query:
{
"size": 0,
"aggs": {
"companies": {
"terms": {
"field": "companyName.keyword"
},
"aggs": {
"products": {
"terms": {
"field": "productName.keyword"
},
"aggs": {
"top_ids": {
"top_hits": {
"_source": {
"includes": [
"domainModelId.Id"
]
},
"size": 10
}
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"companies": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "NextWare",
"doc_count": 9,
"products": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "ProductPortal",
"doc_count": 7,
"top_ids": {
"hits": {
"total": {
"value": 7,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67049816",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"domainModelId.Id": "a"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"domainModelId.Id": "b"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "4",
"_score": 1.0,
"_source": {
"domainModelId.Id": "d"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "5",
"_score": 1.0,
"_source": {
"domainModelId.Id": "e"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "6",
"_score": 1.0,
"_source": {
"domainModelId.Id": "f"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "7",
"_score": 1.0,
"_source": {
"domainModelId.Id": "g"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "8",
"_score": 1.0,
"_source": {
"domainModelId.Id": "h"
}
}
]
}
}
},
{
"key": "Domain",
"doc_count": 2,
"top_ids": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "67049816",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"domainModelId.Id": "c"
}
},
{
"_index": "67049816",
"_type": "_doc",
"_id": "9",
"_score": 1.0,
"_source": {
"domainModelId.Id": "i"
}
}
]
}
}
}
]
}
}
]
}
}

Elastic Search - select DISTINCT value from aggregation result?

In Elastic Search I have an index named Menu. In Menu have an array of Shop. Something like this.
{
"menu_id": 1,
"name": 1,
"shops": [
{
"name": "A",
"shop_id: "A",
},
{
"name": "B",
"shop_id: "B",
}
]
}
{
"menu_id": 2,
"name": 2,
"shops": [
{
"name": "C",
"shop_id: "C",
}
]
}
{
"menu_id": 3,
"name": 3,
"shops": [
{
"name": "A",
"shop_id: "A",
}
]
}
{
"menu_id": 4,
"name": 4,
"shops": [
{
"name": "A",
"shop_id: "A",
},
{
"name": "C",
"shop_id: "C",
}
]
}
With my query I want to search Shop that have id "A" or "C". I want my result being like this.
{
"name": "A",
"shop_id: "A",
},
{
"name": "C",
"shop_id: "C",
}
I tried with this query.
{
"_source": "shops",
"query": {
"bool": {
"should": [
{
"match": {
"shops.id": "A"
}
},
{
"match": {
"shops.id": "C"
}
}
]
}
},
"aggs": {
"all_shops": {
"terms": {
"field": "shops.id.keyword",
"min_doc_count": 1
},
"aggs": {
"real_shop": {
"top_hits": {
"_source": [
"shops"
],
"size": 1
}
}
}
}
}
}
And this query.
{
"_source": "shops",
"query": {
"bool": {
"should": [
{
"match": {
"shops.id": "A"
}
},
{
"match": {
"shops.id": "C"
}
}
]
}
},
"aggs": {
"messages": {
"filters": {
"filters": [
{
"match": {
"shops.id": "A"
}
},
{
"match": {
"shops.id": "C"
}
}
]
},
"aggs": {
"real_shop": {
"top_hits": {
"_source": [
"shops"
],
"size": 1
}
}
}
}
}
}
I still got many "A", "B" and many "C".
How can I get just once "A" and once "C".
I cannot search it with Index Shop Because I want to use Information from Menu to search it.
Final Query is "Search shop with shop's name or menu's name with shop ids".
You need to make shops to be of the nested type, to query on each nested field object. You can use inner_hits to return documents that matched exactly with the query. Modify your index mapping as shown below
{
"mappings": {
"properties": {
"shops": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"nested": {
"path": "shops",
"query": {
"terms": {
"shops.shop_id.keyword": [
"A",
"C"
]
}
},
"inner_hits": {}
}
}
}
Search Result:
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"menu_id": 1,
"name": 1,
"shops": [
{
"name": "A",
"shop_id": "A"
},
{
"name": "B",
"shop_id": "B"
}
]
},
"inner_hits": {
"shops": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "A", // note this
"shop_id": "A"
}
}
]
}
}
}
},
{
"_index": "66675093",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"menu_id": 1,
"name": 1,
"shops": [
{
"name": "C",
"shop_id": "C"
}
]
},
"inner_hits": {
"shops": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "2",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "C",
"shop_id": "C" // note this
}
}
]
}
}
}
}
]
UPDATE 1:
You can use filter aggregation along with nested aggregation, to achieve your use case. Try out this below query
{
"size": 0,
"aggs": {
"NAME": {
"nested": {
"path": "shops"
},
"aggs": {
"NAME": {
"filter": {
"terms": {
"shops.shop_id.keyword": ["A","C"]
}
},
"aggs": {
"NAME": {
"terms": {
"field": "shops.shop_id.keyword"
},
"aggs": {
"top_sales_hits": {
"top_hits": {
"size": 1
}
}
}
}
}
}
}
}
}
}
Search Result will be
"aggregations": {
"NAME": {
"doc_count": 6,
"NAME": {
"doc_count": 5,
"NAME": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "A",
"doc_count": 3,
"top_sales_hits": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "A", // note this
"shop_id": "A"
}
}
]
}
}
},
{
"key": "C",
"doc_count": 2,
"top_sales_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "2",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "C", // note this
"shop_id": "C"
}
}
]
}
}
}
]
}
}
}
}

How to perform union operation on an array field after group by aggregation on another field in elasticsearch

I have the following documents in index products
{ "product_name": "prod-1", "meta": [ { "tag": "tag1", "score": "12" }, { "tag": "tag2", "score": "24" } ] }
{ "product_name": "prod-2", "meta": [ { "tag": "tag1", "score": "36" } ] }
{ "product_name": "prod-2", "meta": [ { "tag": "tag2", "score": "44" } ] }
{ "product_name": "prod-3", "meta": [ { "tag": "tag3", "score": "54" } ] }
I know how to group by product_name in es
POST /products/_search
{
"size": 0,
"aggs": {
"by_product": {
"terms": {
"field": "product_name"
}
}
}
}
After grouping by product_name, I want a field called meta in each bucket which has a union of meta from all documents in that bucket like this
[
{
"key": "prod-1",
"meta": [{ "tag": "tag1", "score": "12" }, { "tag": "tag2", "score": "24" }]
},
{
"key": "prod-2",
"meta": [{ "tag": "tag1", "score": "36" }, { "tag": "tag2", "score": "44" }]
},
{
"key": "prod-3",
"meta": [ { "tag": "tag3", "score": "54" } ]
}
]
How can I achive this in elaticsearch?
The best way to show your expected search result is to use top hits
aggregation using which you can add additional fields to terms
aggregation
Search Query:
{
"size": 0,
"aggs": {
"by_product": {
"terms": {
"field": "product_name.keyword"
},
"aggs": {
"top_sales_hits": {
"top_hits": {
"_source": {
"includes": [
"meta.tag",
"meta.score"
]
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"by_product": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "prod-2",
"doc_count": 2,
"top_sales_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "64801386",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "36",
"tag": "tag1"
}
]
}
},
{
"_index": "64801386",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "44",
"tag": "tag2"
}
]
}
}
]
}
}
},
{
"key": "prod-1",
"doc_count": 1,
"top_sales_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "64801386",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "12",
"tag": "tag1"
},
{
"score": "24",
"tag": "tag2"
}
]
}
}
]
}
}
},
{
"key": "prod-3",
"doc_count": 1,
"top_sales_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "64801386",
"_type": "_doc",
"_id": "4",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "54",
"tag": "tag3"
}
]
}
}
]
}
}
}
]
}

How to get sum value for fields based on input field in elasticsearch (input field and sum output fields are different)

This is document present in elastic search and wants to output based fields in which it returns the sum of the high and medium and which be greater than zero, the value of high and medium must be greater than > 0
{
"host_id": 1,
"hostname": "Hostname1",
"businesshierarchy": {
"businessunit": "NON Unit",
"Location":"Un",
"Application":"App1"
},
"updatedts": 1601894092,
"critical": 0,
"high": 1,
"medium": 1,
"low": 0
},
{
"host_id": 2,
"hostname": "Hostname2",
"businesshierarchy": {
"businessunit": "One Unit",
"Location":"Un",
"Application":"App2"
},
"updatedts": 1601894092,
"critical": 0,
"high": 1,
"medium": 2,
"low": 0
},
{
"host_id": 3,
"hostname": "Hostname3",
"businesshierarchy": {
"businessunit": "NON Unit",
"Location":"Uk",
"Application":"App2"
},
"updatedts": 1601894092,
"critical": 0,
"high": 2,
"medium": 2,
"low": 0
}
Is there are any query or method to get output like in elastic search?
based on location
Location - Un
High - 2
medium - 3
Location - Uk
High - 2
medium - 2
Based on application
Application - App1
High - 1
medium - 1
Application - App2
High - 3
medium - 4
or based on hostname
hostname - Hostname1
High - 1
medium - 1
hostname - Hostname2
High - 1
medium - 2
hostname - Hostname3
High - 2
medium - 2
Similarly for businessunit. The field name passed dynamically like businessunit, hostname, application, location-based on it want to get count High and medium value like the above output.
Adding a working example with index mapping, index data(same as that given in question), search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"hostname": {
"type": "keyword"
},
"businesshierarchy": {
"properties": {
"Location": {
"type": "keyword"
},
"Application": {
"type": "keyword"
}
}
}
}
}
}
Search Query:
{
"size": 0,
"aggs": {
"user": {
"terms": {
"field": "businesshierarchy.Location"
},
"aggs": {
"top_user_hits": {
"top_hits": {
"_source": {
"includes": [
"high",
"medium"
]
}
}
},
"high_sum": {
"sum": {
"field": "high"
}
},
"medium_sum": {
"sum": {
"field": "medium"
}
}
}
}
}
}
Search Result:
Based on the location
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Un",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
},
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0 <-- note this
},
"medium_sum": {
"value": 3.0
}
},
{
"key": "Uk",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0 <-- note this
},
"medium_sum": {
"value": 2.0
}
}
]
}
For querying on the basis of application replace terms aggregation like this:
"aggs": {
"user": {
"terms": {
"field": "businesshierarchy.Application"
},
The following search result will be there:
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "App2",
"doc_count": 2,
"top_user_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
},
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 3.0
},
"medium_sum": {
"value": 4.0
}
},
{
"key": "App1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 1.0
}
}
]
}
For querying on the basis of hostname replace terms aggregation like this:
"aggs": {
"user": {
"terms": {
"field": "hostname"
},
Search Results will be :
"aggregations": {
"user": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Hostname1",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 1
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 1.0
}
},
{
"key": "Hostname2",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"high": 1,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 1.0
},
"medium_sum": {
"value": 2.0
}
},
{
"key": "Hostname3",
"doc_count": 1,
"top_user_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "stof_64218649",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"high": 2,
"medium": 2
}
}
]
}
},
"high_sum": {
"value": 2.0
},
"medium_sum": {
"value": 2.0
}
}
]
}
we can use this query to get the excepted result
{
"query": {
"bool": {
"filter": [
{
"bool": {
"should": [
{
"range": {
"medium": {
"gt": 0
}
}
},
{
"range": {
"high": {
"gt": 0
}
}
}
]
}
}
]
}
},
"aggs": {
"fieldnames": {
"terms": {
"field": "hostname.keyword"
},
"aggs": {
"medium": {
"sum": {
"field": "medium"
}
},
"high": {
"sum": {
"field": "high"
}
}
}
}
},
"size": 0
}
Search result for this look like this
"aggregations": {
"fieldnames": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "ALL Unit",
"doc_count": 1,
"high": {
"value": 0.0
},
"medium": {
"value": 7.0
}
},
{
"key": "Latest Unit",
"doc_count": 1,
"high": {
"value": 0.0
},
"medium": {
"value": 5.0
}
},
{
"key": "NO Unit",
"doc_count": 1,
"high": {
"value": 1.0
},
"medium": {
"value": 1.0
}
}
]
}
}
In case if we need the result for location and application, just need to change
for Location
"aggs": {
"fieldnames": {
"terms": {
"field": "businesshierarchy.Application.keyword"
}
for Application
"aggs": {
"fieldnames": {
"terms": {
"field": "businesshierarchy.Location.keyword"
}
if the mapping is something like this,
{
"mappings": {
"properties": {
"hostname": {
"type": "keyword"
},
"businesshierarchy": {
"properties": {
"Location": {
"type": "keyword"
},
"Application": {
"type": "keyword"
}
}
}
}
}
}
There is no need for adding .keyword to
"terms": {
"field": "businesshierarchy.Location"
}

Elasticsearch query to process log data

I have an event log of an e-commerce website in Elasticsearch.
Each event is a record in ES
{
"_index": "event_log",
"_type": "log_type",
"_id": "3ud-kmoBazYRVz7KCgIy",
"_score": 1,
"_source": {
"user_id": 123,
"event": "click",
"category": "abc",
"product_id": 1112
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "4Od-kmoBazYRVz7KCgLr",
"_score": 1,
"_source": {
"user_id": 123,
"event": "click",
"category": "abc",
"product_id": 1118
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "4ud-kmoBazYRVz7KkwL2",
"_score": 1,
"_source": {
"user_id": 123,
"event": "cart",
"category": "xyz",
"product_id": 1
}
},
{
"_index": "event_log",
"_type": "log_type",
"_id": "2ud-kmoBazYRVz7KCALB",
"_score": 1,
"_source": {
"user_id": 123,
"event": "cart",
"category": "xyz",
"product_id": 11
}
},
I want list of all the product_ids grouping event, category, user.
Expected output:
{"click": {
"abc": {
"123": {
"product_id": [1112, 1118]
}
}
},
"cart": {
"xyz": {
"123": {
"product_id": [1, 11]
}
}
}
}
I will be having millions of records in the index. Querying all the records and processing it is time-consuming. Is there a way to produce the output in a single query? I'm sure it is not possible to generate exactly in the given format. Something near to it is very useful.
Hi here is my suggestion (first try)
GET event_log/_search
{
"size": 0,
"aggs": {
"event": {
"terms": {
"field": "event"
},
"aggs": {
"category": {
"terms": {
"field": "category"
},
"aggs": {
"product_id": {
"terms": {
"field": "product_id"
}
}
}
}
}
}
}
}

Resources