elasticsearch aggregate on list of objects with condition

elasticsearch aggregate on list of objects with condition - elasticsearch

Say we have following documents in elasticsearch:
[{
"person": {
'name': 'asqar'
},
"bill": [
{
code:2,
value: 210
},
{
code:3,
value: 330
},
{
code:8,
value: 220
},
]
},
{
"person": {
'name': 'asqar'
},
"bill": [
{
code:2,
value: 340
},
{
code:4,
value: 340
},
{
code:1,
value: 200
},
]
},
{
"person": {
'name': 'asqar'
},
"bill": [
{
code:2,
value: 810
},
{
code:4,
value: 630
},
{
code:8,
value: 220
},
]
}]
I want to apply aggregate function on specific object in the bill array iwth some condition, for example I want calculate avg of value which its code is 2.

Field bill needs to be created as nested object to filter on it.
You can then use filter aggregation
Mapping:
PUT testindex/_mapping
{
"properties": {
"person": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"bill": {
"type": "nested",
"properties": {
"code": {
"type": "integer"
},
"value":{
"type": "double"
}
}
}
}
}
Data:
"hits" : [
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "422tAWsBd-1D6Ztt1_Tb",
"_score" : 1.0,
"_source" : {
"person" : {
"name" : "asqar"
},
"bill" : [
{
"code" : 2,
"value" : 210
},
{
"code" : 3,
"value" : 330
},
{
"code" : 8,
"value" : 220
}
]
}
},
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "5G2uAWsBd-1D6ZttpfR9",
"_score" : 1.0,
"_source" : {
"person" : {
"name" : "asqar"
},
"bill" : [
{
"code" : 2,
"value" : 340
},
{
"code" : 4,
"value" : 340
},
{
"code" : 1,
"value" : 200
}
]
}
},
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "5W2vAWsBd-1D6ZttQfQ_",
"_score" : 1.0,
"_source" : {
"person" : {
"name" : "asqar"
},
"bill" : [
{
"code" : 2,
"value" : 810
},
{
"code" : 4,
"value" : 630
},
{
"code" : 8,
"value" : 220
}
]
}
}
]
Query:
GET testindex/_search
{
"size": 0,
"aggs": {
"terms_agg": {
"terms": {
"field": "person.name.keyword"
},
"aggs": {
"bill": {
"nested": {
"path": "bill"
},
"aggs": {
"bill_code": {
"filter": {
"term": {
"bill.code": 2
}
},
"aggs": {
"average": {
"avg": {
"field": "bill.value"
}
}
}
}
}
}
}
}
}
}
Output:
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"terms_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "asqar",
"doc_count" : 3,
"bill" : {
"doc_count" : 9,
"bill_code" : {
"doc_count" : 3,
"average" : {
"value" : 453.3333333333333
}
}
}
}
]
}
}

You need to first make sure that the bill field is of nested type. Then you can use nested aggregation to deal with nested documents. You can use terms aggregation on bill.code and a child avg aggregation on field bill.value to this terms aggregation. This will give you average value for each code. Now since you want only aggregation against the code 2, you can make use of bucket selector aggregation to filter and get only bucket with code 2.
So the final aggregation query will look as below:
{
"aggs": {
"VALUE_NESTED": {
"nested": {
"path": "bill"
},
"aggs": {
"VALUE_TERM": {
"terms": {
"field": "bill.code"
},
"aggs": {
"VALUE_AVG": {
"avg": {
"field": "bill.value"
}
},
"CODE": {
"max": {
"field": "bill.code"
}
},
"CODE_FILTER": {
"bucket_selector": {
"buckets_path": {
"code": "CODE"
},
"script": "params.code == 2"
}
}
}
}
}
}
}
}
Sample o/p for above:
"aggregations": {
"VALUE_NESTED": {
"doc_count": 9,
"VALUE_TERM": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 3,
"CODE": {
"value": 2
},
"VALUE_AVG": {
"value": 453.3333333333333
}
}
]
}
}
}

Related

Elasticsearch query to get the list of documents with some minimum occurrence of a property

I have a index with documents like this
[
{
"customer_id" : "123",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-23"
...
},
{
"customer_id" : "123",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-24"
...
},
{
"customer_id" : "345",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-25"
...
}
]
I want to get the list of all documents from specific country e.g USA, between a give time range with at least 2 occurrences of same customer_id.
With the above data, it should return
[
{
"customer_id" : "123",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-24"
...
}
]
Now, I tried the below ES query
POST /index_name/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"creation_date": {
"gte": "2021-06-23",
"lte": "2021-08-23"
}
}
},
{
"match": {
"country": "USA"
}
}
]
}
},
"aggs": {
"customer_agg": {
"terms": {
"field": "customer_id",
"min_doc_count": 2
}
}
}
}
The above query returns following result
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : 1.5587491,
"hits" : [...]
]
},
"aggregations" : {
"person_agg" : {
"doc_count_error_upper_bound" : 1,
"sum_other_doc_count" : 1,
"buckets" : [
{
"key" : "customer_id",
"doc_count" : 2
}
]
}
}
I don't need the list of buckets in response, but only the list of documents satisfying the condition. How can I achieve it?

On a first glance I noticed that in the search query you are searching by a field called creation_timestamp but in the mapping of the document you say that the date field you want to range check is called creation_date.
I decided to test this locally on Elasticsearch 7.10 and here are the settings I used
PUT /test-index-v1
PUT /test-index-v1/_mapping
{
"properties": {
"customer_id": {
"type": "keyword"
},
"country": {
"type": "keyword"
},
"department": {
"type": "keyword"
},
"creation-date": {
"type": "date"
}
}
}
As you can see I'm using keyword on the fields so that we can use - sorting, aggregation and etc.
After I created the index I imported the documents you gave as an example
POST /test-index-v1/_doc
{
"customer_id" : "345",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-25"
}
POST /test-index-v1/_doc
{
"customer_id" : "123",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-25"
}
POST /test-index-v1/_doc
{
"customer_id" : "123",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-24"
}
Then I executed this search query including a must match on the customer_id as well:
POST /test-index-v1/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"creation_date": {
"gte": "2021-06-23",
"lte": "2021-08-23"
}
}
},
{
"match": {
"country": "USA"
}
},
{
"match": {
"customer_id": "123"
}
}
]
}
},
"aggs": {
"customer_agg": {
"terms": {
"field": "customer_id",
"min_doc_count": 2
}
}
}
}
This query will return you the search hits as well. Using only an aggregation the searchHits won't be returned.
Here is the response I received:
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.6035349,
"hits" : [
{
"_index" : "test-index-v1",
"_type" : "_doc",
"_id" : "vbVD9HsBRVWFAvvZTW-l",
"_score" : 1.6035349,
"_source" : {
"customer_id" : "123",
"country" : "USA",
"department" : "IT",
"creation_date" : "2021-06-25"
}
},
{
"_index" : "test-index-v1",
"_type" : "_doc",
"_id" : "vrVD9HsBRVWFAvvZU29q",
"_score" : 1.6035349,
"_source" : {
"customer_id" : "123",
"country" : "USA",
"department" : "IT",
"creation_date" : "2021-06-24"
}
}
]
},
"aggregations" : {
"customer_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "123",
"doc_count" : 2
}
]
}
}
}
Hope this helps with your issue. Feel free to leave a comment if you have other questions regarding Elastic! :)
EDIT:
Regarding the grouping by customer_id in a certain date range I used this query:
POST /test-index-v1/_search
{
"aggs": {
"group_by_customer_id": {
"terms": {
"field": "customer_id"
},
"aggs": {
"dates_between": {
"filter": {
"range": {
"creation_date": {
"gte": "2020-06-23",
"lte": "2021-06-24"
}
}
}
}
}
}
}
}
And the response is:
"aggregations" : {
"group_by_customer_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "123",
"doc_count" : 2,
"dates_between" : {
"doc_count" : 1
}
},
{
"key" : "345",
"doc_count" : 1,
"dates_between" : {
"doc_count" : 0
}
}
]
}
}

elasticsearch find documents where the given number items in array has the same property value

first of all I would like to show simplified structure of document.
{
"_id": "413123123",
"_source": {
"description": {
"firstLine": "this is my description",
"secondLine": "some value"
},
"InsertDetails": {
"Timestamp": "2020-06-12T11:14:36+0000"
},
"Links": [
{
"LinkDetails": {
"linkId": 2342,
"type": "Link",
"dateCreation": "2012-09-21T08:42:09+0000",
"typeId": 404019,
"typeOfLink": "http"
}
},
{
"LinkDetails": {
"linkId": 321313,
"type": "Link",
"dateCreation": "2012-08-21T08:42:09+0000",
"typeId": 404019,
"typeOfLink": "http"
}
},
{
"LinkDetails": {
"linkId": 1231,
"type": "Link",
"dateCreation": "2012-09-21T08:42:09+0000",
"typeId": 32323,
"typeOfLink": "https"
}
},
{
"LinkDetails": {
"linkId": 53434,
"type": "Link",
"dateCreation": "2012-11-21T08:42:09+0000",
"typeId": 123231,
"typeOfLink": "wss"
}
}
]
}
}
I have a problem with forming query, which would find documents, where the following requirements are met:
two items in Links arrays has typeOfLink equal to http
description string contains word "this"
found items will be sorted by date desc
The version of elasticsearch is 2.3.2
I've tried with query such like this:
{
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match": {
"Links.LinkDetails.typeOfLink": "http"
}
}
],
"minimum_should_match": 2
}
},
{
"match": {
"description.firstLine": "this"
}
}
]
}
},
"sort": [
{
"InsertDetails.Timestamp": {
"order": "desc"
}
}
]
}
The problem is that this query returns me also the documents, which has only one item in the array with the given value. I've tried to modify this query in different ways, but without any luck.
Added mapping
{
"my_index": {
"mappings": {
"en": {
"properties": {
"InsertDetails": {
"properties": {
"Timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
}
}
},
"description": {
"properties": {
"firstLine": {
"type": "string"
},
"secondLine": {
"type": "string"
}
}
},
"Links": {
"properties": {
"LinkDetails": {
"properties": {
"linkId": {
"type": "long"
},
"type": {
"type": "string"
},
"dateCreation": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"typeOfLink": {
"type": "string"
},
"typeId": {
"type": "long"
}
}
}
}
}
}
}
}
}
}

At first, you want to filter on a nested field. (array of object)
To have coherent result you must have to map this field as a nested one.
https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html
Then, you will have to use aggregations.
What you want is to aggregate only "http" values for type_of_link, and return results if the aggregation return more than 2 results.
You query will be a little more complicated:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "Links",
"query": {
"match": {
"Links.LinkDetails.typeOfLink": "http"
}
}
}
},
{
"match": {
"description.firstLine": "this"
}
}
]
}
},
"aggs": {
"links": {
"nested": {
"path": "Links"
},
"aggs": {
"http_only": {
"filter": {
"term": {
"Links.LinkDetails.typeOfLink.keyword": "http"
}
},
"aggs": {
"several_http": {
"terms": {
"field": "Links.LinkDetails.typeOfLink.keyword",
"min_doc_count": 2
}
,
"aggs": {
"complete_match": {
"top_hits": {
"size": 100
}
}
}
}
}
}
}
}
},
"sort": [
{
"InsertDetails.Timestamp": {
"order": "desc"
}
}
]
}
And your response will looks like:
"aggregations" : {
"links" : {
"doc_count" : 4,
"http_only" : {
"doc_count" : 2,
"several_http" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "http",
"doc_count" : 2,
"complete_match" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.98082924,
"hits" : [
{
"_index" : "test3",
"_type" : "_doc",
"_id" : "ed1AkXQBD_dLYq-V78bD",
"_nested" : {
"field" : "Links",
"offset" : 0
},
"_score" : 0.98082924,
"_source" : {
"LinkDetails" : {
"linkId" : 2342,
"type" : "Link",
"dateCreation" : "2012-09-21T08:42:09+0000",
"typeId" : 404019,
"typeOfLink" : "http"
}
}
},
{
"_index" : "test3",
"_type" : "_doc",
"_id" : "ed1AkXQBD_dLYq-V78bD",
"_nested" : {
"field" : "Links",
"offset" : 1
},
"_score" : 0.98082924,
"_source" : {
"LinkDetails" : {
"linkId" : 321313,
"type" : "Link",
"dateCreation" : "2012-08-21T08:42:09+0000",
"typeId" : 404019,
"typeOfLink" : "http"
}
}
}
]
}
}
}
]
}
}
}
}
By playing with the given aggregation you should be able to do what you want.

How to filter nested aggregations in ElasticSearch?

For example, let's assume we have a product index with the following mapping:
{
"product": {
"mappings": {
"producttype": {
"properties": {
"id": {
"type": "keyword"
},
"productAttributes": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "text",
"analyzer": "keyword"
}
},
"analyzer": "standard"
}
}
}
}
}
}
I am trying to find how many products which have specific product attributes using the following query(I am using a fuzzy query to allow some edit distance):
{
"size": 0,
"query": {
"nested": {
"query": {
"fuzzy": {
"productAttributes.name": {
"value": "SSD"
}
}
},
"path": "productAttributes"
}
},
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
But it returns all product attributes for each matched document and here is the response I get.
"aggregations" : {
"product_attribute_nested_agg" : {
"doc_count" : 6,
"terms_nested_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "SSD",
"doc_count" : 3
},
{
"key" : "USB 2.0",
"doc_count" : 3
}
]
}
}
}
Could you please guide me to how to filter buckets to only return matched attributes?
Edit:
Here are some document samples:
"hits" : {
"total" : 12,
"max_score" : 1.0,
"hits" : [
{
"_index" : "product",
"_type" : "producttype",
"_id" : "677d1164-c401-4d36-8a08-6aa14f7f32bb",
"_score" : 1.0,
"_source" : {
"title" : "Dell laptop",
"productAttributes" : [
{
"name" : "USB 2.0",
"value" : "4"
},
{
"name" : "SSD",
"value" : "250 GB"
}
]
}
},
{
"_index" : "product",
"_type" : "producttype",
"_id" : "2954935a-7f60-437a-8a54-00da2d71da46",
"_score" : 1.0,
"_source" : {
"productAttributes" : [
{
"name" : "USB 2.0",
"value" : "3"
},
{
"name" : "SSD",
"value" : "500 GB"
}
],
"title" : "HP laptop"
}
},
]
}

To filter only specific, you can use filter queries.
Query:
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"terms": {
"productAttributes.name": [
"SSD"
]
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}
This is what it does the trick:
"filter": {
"terms": {
"productAttributes.name": [
"SSD"
]
}
}
You need to do filter part of the aggregation.
Output:
"aggregations": {
"product_attribute_nested_agg": {
"doc_count": 4,
"inner": {
"doc_count": 2,
"terms_nested_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "SSD",
"doc_count": 2
}
]
}
}
}
}
Filtering using Fuzziness :
GET /product/_search
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"fuzzy": {
"productAttributes.name": {
"value": "SSt",//here will match SSD
"fuzziness": 3//you can remove it to be as Auto
}
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}

Elasticsearch query with grouping

I have database with products. Each Product is composed of fields: uuid, group_id, title, since, till.
since and till define interval of availability.
Intervals [since, till] are disjoint pairs for each group_id. So there are no 2 products within one group for which intervals intersect.
I need to fetch a list of products that meets the following conditions:
on the list should be at most 1 product from each group
each product matches the given title
each product is current (since <= NOW <= till) OR if current product does not exist within its group, it should be the nearest product from the future (min(since) such that since >= NOW)
ES mapping:
{
"products": {
"mappings": {
"dynamic": "false",
"properties": {
"group_id": {
"type": "long",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"since": {
"type": "date",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"till": {
"type": "date",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
Is it possible to create such query in Elasticsearch?

Looking at your mapping, I've created sample documents, the query and its response as below:
Sample Documents:
POST product_index/_doc/1
{
"group_id": 1,
"title": "nike",
"since": "2020-01-01",
"till": "2020-03-31"
}
POST product_index/_doc/2
{
"group_id": 2,
"title": "nike",
"since": "2020-01-01",
"till": "2020-03-31"
}
POST product_index/_doc/3
{
"group_id": 3,
"title": "nike",
"since": "2020-03-15",
"till": "2020-03-31"
}
POST product_index/_doc/4
{
"group_id": 3,
"title": "nike",
"since": "2020-03-19",
"till": "2020-03-31"
}
As mentioned above, there are like 4 documents in total, group 1 and 2 have one document each while group 3 has two documents with both since >= now
Query Request:
The summary of the query is below:
Bool
- Must
- Match title as nike
- Should
- clause 1 - since <= now <= till
- clause 2 - now <= since
Agg
- Terms on GroupId
- Top Hits (retrieve only 1st document as your clause is at most for each group, and sort them by asc order of since)
Below is the actual query:
POST product_index/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"title": "nike"
}
},
{
"bool": {
"should": [
{ <--- since <=now <= till
"bool": {
"must": [
{
"range": {
"till": {
"gte": "now"
}
}
},
{
"range": {
"since": {
"lte": "now"
}
}
}
]
}
},
{ <---- since >= now
"bool": {
"must": [
{
"range": {
"since": {
"gte": "now"
}
}
}
]
}
}
]
}
}
]
}
},
"aggs": {
"my_groups": {
"terms": {
"field": "group_id.keyword",
"size": 10
},
"aggs": {
"my_docs": {
"top_hits": {
"size": 1, <--- Note this to return at most one document
"sort": [
{ "since": { "order": "asc"} <--- Sort to return the lowest value of since
}
]
}
}
}
}
}
}
Notice that I've made use of Terms Aggregation and Top Hits as its sub-aggregation.
Response:
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"my_groups" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "3",
"doc_count" : 2,
"my_docs" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "product_index",
"_type" : "_doc",
"_id" : "3",
"_score" : null,
"_source" : {
"group_id" : 3,
"title" : "nike",
"since" : "2020-03-15",
"till" : "2020-03-31"
},
"sort" : [
1584230400000
]
}
]
}
}
},
{
"key" : "1",
"doc_count" : 1,
"my_docs" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "product_index",
"_type" : "_doc",
"_id" : "1",
"_score" : null,
"_source" : {
"group_id" : 1,
"title" : "nike",
"since" : "2020-01-01",
"till" : "2020-03-31"
},
"sort" : [
1577836800000
]
}
]
}
}
},
{
"key" : "2",
"doc_count" : 1,
"my_docs" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "product_index",
"_type" : "_doc",
"_id" : "2",
"_score" : null,
"_source" : {
"group_id" : 2,
"title" : "nike",
"since" : "2020-01-01",
"till" : "2020-03-31"
},
"sort" : [
1577836800000
]
}
]
}
}
}
]
}
}
}
Let me know if this helps!

Count aggregation on reverse nesting field for multiple values

Our Elastic Mapping
{"mappings": {
"products" : {
"properties":{
"name " : {
"type" : "keyword"
},
"resellers" : {
"type" : "nested",
"properties" : {
"name" : { "type" : "text" },
"price" : { "type" : "double" }
}
}
}
}
}}
In this mapping each product stores the list of resellers which are selling it at specific price.
We have requirement where we want to get count of products sell by specific resellers at specific price, I am able to get it for single reseller by using reverse nested agg and cardinality agg using following query DSL
. For ex:- Getting Total Product sell by Amazon at 2.
{
"query": {
"bool": {
"must": [
{
"match_all": {
"boost": 1.0
}
}
]
}
},
"aggs": {
"patchFilter": {
"nested": {
"path": "resellers"
},
"aggs": {
"nestedfilter": {
"filter": {
"bool": {
"must":[
{
"term" :{
"resellers.name.keyword": {
"value": "Amazon"
}
}
},{
"terms" :{
"resellers.price":[2]
}
}
]
}
},
"aggs": {
"resellerprice": {
"reverse_nested" :{},
"aggs": {
"resellers_price":{
"cardinality" : {
"field" : "name.keyword"
}
}
}
}
}
}
}
}
}
}
I want to fetch it for multiple resellers(Amazon,Flipkart, Walmart) which are selling at 2 in single query. Can somebody help me out in doing that?

Mapping:
PUT productreseller
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"resellers": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields":{
"keyword":{
"type": "keyword"
}
}
},
"price": {
"type": "double"
}
}
}
}
}
}
Data:
[
{
"_index" : "productreseller",
"_type" : "_doc",
"_id" : "JNbCLm0B00idyGV0Pn1Z",
"_score" : 1.0,
"_source" : {
"name" : "P2",
"resellers" : [
{
"name" : "amazon",
"price" : 3
},
{
"name" : "abc",
"price" : 2
}
]
}
},
{
"_index" : "productreseller",
"_type" : "_doc",
"_id" : "JdbCLm0B00idyGV0Wn0y",
"_score" : 1.0,
"_source" : {
"name" : "P1",
"resellers" : [
{
"name" : "amazon",
"price" : 2
},
{
"name" : "abc",
"price" : 3
}
]
}
},
{
"_index" : "productreseller",
"_type" : "_doc",
"_id" : "JtbPLm0B00idyGV0D32Y",
"_score" : 1.0,
"_source" : {
"name" : "P4",
"resellers" : [
{
"name" : "xyz",
"price" : 2
},
{
"name" : "abc",
"price" : 3
}
]
}
}
]
Query:
GET productreseller/_search
{
"query": {
"bool": {
"must": [
{
"match_all": {
"boost": 1
}
}
]
}
},
"aggs": {
"patchFilter": {
"nested": {
"path": "resellers"
},
"aggs": {
"nestedfilter": {
"filter": {
"bool": {
"must": [
{
"terms": {
"resellers.price": [
2
]
}
}
]
}
},
"aggs": {
"NAME": {
"terms": {
--->terms aggregation to list resellers and reverse_nested as subaggregation
"field": "resellers.name.keyword",
"size": 10
},
"aggs": {
"resellerprice": {
"reverse_nested": {},
"aggs": {
"resellers_price": {
"cardinality": {
"field": "name"
}
}
}
}
}
}
}
}
}
}
}
}
Result:
"aggregations" : {
"patchFilter" : {
"doc_count" : 8,
"nestedfilter" : {
"doc_count" : 3,
"NAME" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "abc",
"doc_count" : 1,
"resellerprice" : {
"doc_count" : 1,
"resellers_price" : {
"value" : 1
}
}
},
{
"key" : "amazon",
"doc_count" : 1,
"resellerprice" : {
"doc_count" : 1,
"resellers_price" : {
"value" : 1
}
}
},
{
"key" : "xyz",
"doc_count" : 1,
"resellerprice" : {
"doc_count" : 1,
"resellers_price" : {
"value" : 1
}
}
}
]
}
}
}
}
If you want to display only certain resellers, you can add terms query in nested filter

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

elasticsearch aggregate on list of objects with condition - elasticsearch

Related

Elasticsearch query to get the list of documents with some minimum occurrence of a property

elasticsearch find documents where the given number items in array has the same property value

How to filter nested aggregations in ElasticSearch?

Elasticsearch query with grouping

Count aggregation on reverse nesting field for multiple values

Categories

Resources