Elastic Search - select DISTINCT value from aggregation result? - elasticsearch

In Elastic Search I have an index named Menu. In Menu have an array of Shop. Something like this.
{
"menu_id": 1,
"name": 1,
"shops": [
{
"name": "A",
"shop_id: "A",
},
{
"name": "B",
"shop_id: "B",
}
]
}
{
"menu_id": 2,
"name": 2,
"shops": [
{
"name": "C",
"shop_id: "C",
}
]
}
{
"menu_id": 3,
"name": 3,
"shops": [
{
"name": "A",
"shop_id: "A",
}
]
}
{
"menu_id": 4,
"name": 4,
"shops": [
{
"name": "A",
"shop_id: "A",
},
{
"name": "C",
"shop_id: "C",
}
]
}
With my query I want to search Shop that have id "A" or "C". I want my result being like this.
{
"name": "A",
"shop_id: "A",
},
{
"name": "C",
"shop_id: "C",
}
I tried with this query.
{
"_source": "shops",
"query": {
"bool": {
"should": [
{
"match": {
"shops.id": "A"
}
},
{
"match": {
"shops.id": "C"
}
}
]
}
},
"aggs": {
"all_shops": {
"terms": {
"field": "shops.id.keyword",
"min_doc_count": 1
},
"aggs": {
"real_shop": {
"top_hits": {
"_source": [
"shops"
],
"size": 1
}
}
}
}
}
}
And this query.
{
"_source": "shops",
"query": {
"bool": {
"should": [
{
"match": {
"shops.id": "A"
}
},
{
"match": {
"shops.id": "C"
}
}
]
}
},
"aggs": {
"messages": {
"filters": {
"filters": [
{
"match": {
"shops.id": "A"
}
},
{
"match": {
"shops.id": "C"
}
}
]
},
"aggs": {
"real_shop": {
"top_hits": {
"_source": [
"shops"
],
"size": 1
}
}
}
}
}
}
I still got many "A", "B" and many "C".
How can I get just once "A" and once "C".
I cannot search it with Index Shop Because I want to use Information from Menu to search it.
Final Query is "Search shop with shop's name or menu's name with shop ids".

You need to make shops to be of the nested type, to query on each nested field object. You can use inner_hits to return documents that matched exactly with the query. Modify your index mapping as shown below
{
"mappings": {
"properties": {
"shops": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"nested": {
"path": "shops",
"query": {
"terms": {
"shops.shop_id.keyword": [
"A",
"C"
]
}
},
"inner_hits": {}
}
}
}
Search Result:
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"menu_id": 1,
"name": 1,
"shops": [
{
"name": "A",
"shop_id": "A"
},
{
"name": "B",
"shop_id": "B"
}
]
},
"inner_hits": {
"shops": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "A", // note this
"shop_id": "A"
}
}
]
}
}
}
},
{
"_index": "66675093",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"menu_id": 1,
"name": 1,
"shops": [
{
"name": "C",
"shop_id": "C"
}
]
},
"inner_hits": {
"shops": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "2",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "C",
"shop_id": "C" // note this
}
}
]
}
}
}
}
]
UPDATE 1:
You can use filter aggregation along with nested aggregation, to achieve your use case. Try out this below query
{
"size": 0,
"aggs": {
"NAME": {
"nested": {
"path": "shops"
},
"aggs": {
"NAME": {
"filter": {
"terms": {
"shops.shop_id.keyword": ["A","C"]
}
},
"aggs": {
"NAME": {
"terms": {
"field": "shops.shop_id.keyword"
},
"aggs": {
"top_sales_hits": {
"top_hits": {
"size": 1
}
}
}
}
}
}
}
}
}
}
Search Result will be
"aggregations": {
"NAME": {
"doc_count": 6,
"NAME": {
"doc_count": 5,
"NAME": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "A",
"doc_count": 3,
"top_sales_hits": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "A", // note this
"shop_id": "A"
}
}
]
}
}
},
{
"key": "C",
"doc_count": 2,
"top_sales_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66675093",
"_type": "_doc",
"_id": "2",
"_nested": {
"field": "shops",
"offset": 0
},
"_score": 1.0,
"_source": {
"name": "C", // note this
"shop_id": "C"
}
}
]
}
}
}
]
}
}
}
}

Related

Elasticsearch - Is it possible to collapse first then aggregate data of a nested field?

I am using Elasticsearch and I want to group our results by a specific field, returning top n documents per group. The document have a nested filed and I want to aggregate all the documents' nested field for each group.
Example
I have 5 documents and each have a groupId and also a nested field peoples. I want group these documents by the groupId. And then for each group, I want to get top 2 people(some documents may contain same people).
PUT test/_mapping
{
"properties": {
"groupId":{
"type":"keyword"
},
"id":{
"type":"keyword"
},
"name":{
"type":"text"
},
"people":{
"type":"nested",
"properties":{
"email":{
"type":"keyword"
}
}
}
}
}
PUT test/_doc/1
{
"name": "docs1",
"groupId": "1",
"people":[{
"email":"people1#test.com"
}]
}
PUT test/_doc/2
{
"name": "docs2",
"groupId": "1",
"people":[{
"email":"people2.1#test.com"
},
{
"email":"people2.2#test.com"
}]
}
PUT test/_doc/3
{
"name": "docs3",
"groupId": "2",
"people":[{
"email":"people3.1#test.com"
},
{
"email":"people2.2#test.com"
}]
}
PUT test/_doc/4
{
"name": "docs4",
"groupId": "1",
"people":[{
"email":"people4.1#test.com"
},
{
"email":"people4.2#test.com"
}]
}
PUT test/_doc/5
{
"name": "docs5",
"groupId": "3",
"people":[{
"email":"people5.1#test.com"
},
{
"email":"people5.2#test.com"
}]
}
Search query
GET test/_search
{
"collapse": {
"field": "groupId",
"inner_hits": {
"name":"inner",
"size": 2
}
},
"sort": [
{
"groupId": {
"order": "asc"
}
}
],
"size": 2,
"from": 0
}
Result
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 5,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "test",
"_id": "1",
"_score": null,
"_source": {
"name": "docs1",
"groupId": "1",
"people": [
{
"email": "people1#test.com"
}
]
},
"fields": {
"groupId": [
"1"
]
},
"sort": [
"1"
],
"inner_hits": {
"inner": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test",
"_id": "1",
"_score": 0,
"_source": {
"name": "docs1",
"groupId": "1",
"people": [
{
"email": "people1#test.com"
}
]
}
},
{
"_index": "test",
"_id": "2",
"_score": 0,
"_source": {
"name": "docs2",
"groupId": "1",
"people": [
{
"email": "people2.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
}
}
]
}
}
}
},
{
"_index": "test",
"_id": "3",
"_score": null,
"_source": {
"name": "docs3",
"groupId": "2",
"people": [
{
"email": "people3.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
},
"fields": {
"groupId": [
"2"
]
},
"sort": [
"2"
],
"inner_hits": {
"inner": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test",
"_id": "3",
"_score": 0,
"_source": {
"name": "docs3",
"groupId": "2",
"people": [
{
"email": "people3.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
}
}
]
}
}
}
}
]
}
}
Expecting is to aggregate a groupPeople field for each group and it contains top n people of that group(should not affected by the inner_hit size, like for groupId=1, it contains 3 documents and 5 people).
The query that you're looking for is this one:
POST test/_search
{
"size": 0,
"aggs": {
"groups": {
"terms": {
"field": "groupId",
"size": 10
},
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"emails": {
"terms": {
"field": "people.email",
"size": 2
}
}
}
}
}
}
}
}
If you need pagination, you can achieve the same using the composite aggregation:
POST test/_search
{
"size": 0,
"aggs": {
"pages": {
"composite": {
"sources": [
{
"groups": {
"terms": {
"field": "groupId"
}
}
}
]
},
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"emails": {
"terms": {
"field": "people.email",
"size": 2
}
}
}
}
}
}
}
}

How to sort by a provide value and get only matched element in nested array in Elasticsearch7

It's okay to get all the element back. But I feel docs size is too heavy. How to get match element only in nested array.
Details below, I will be grateful for any help you can provide.
my test index below
PUT test
PUT test/_mapping
{
"properties": {
"items": {
"type": "nested",
"properties": {
"item": {
"type": "keyword"
},
"price": {
"type": "integer"
}
}
}
}
}
PUT test/_doc/1
{
"items": [
{"item": "A", "price": 350},
{"item": "B", "price": 500}
]
}
PUT test/_doc/2
{
"items": [
{"item": "A", "price": 400},
{"item": "C", "price": 200}
]
}
PUT test/_doc/3
{
"items": [
{"item": "B", "price": 600},
{"item": "C", "price": 150}
]
}
I can get docs which exist item: "B" and sorting by price of item: "B"
Here is the query
POST test/_search
{
"query" : {
"nested" : {
"query" : {
"term" : {
"items.item": {
"value" : "B",
"boost" : 1.0
}
}
},
"path" : "items",
"ignore_unmapped" : false,
"score_mode" : "none",
"boost" : 1.0
}
},
"sort":[
{
"items.price": {
"order": "desc",
"nested": {
"path": "items",
"filter": {
"term" : { "items.item": "B" }
}
}
}
}
]
}
And Results
[
{
"items" : [
{"item" : "B", "price" : 600},
{"item" : "C", "price" : 150}
]
},
{
"items" : [
{"item" : "A", "price" : 350},
{"item" : "B", "price" : 500}
]
}
]
How do I get the result with the item: B only like below
[
{
"items" : [
{"item" : "B", "price" : 600}
]
},
{
"items" : [
{"item" : "B", "price" : 500}
]
}
]
You can use inner_hits along with the nested query, to get only the matching object in the result
{
"query": {
"nested": {
"query": {
"term": {
"items.item": {
"value": "B",
"boost": 1.0
}
}
},
"inner_hits": {}, // note this
"path": "items",
"ignore_unmapped": false,
"score_mode": "none",
"boost": 1.0
}
},
"sort": [
{
"items.price": {
"order": "desc",
"nested": {
"path": "items",
"filter": {
"term": {
"items.item": "B"
}
}
}
}
}
]
}
Search Result:
"hits": [
{
"_index": "68902032",
"_type": "_doc",
"_id": "3",
"_score": null,
"_source": {
"items": [
{
"item": "B",
"price": 600
},
{
"item": "C",
"price": 150
}
]
},
"sort": [
600
],
"inner_hits": {
"items": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.9444616,
"hits": [
{
"_index": "68902032",
"_type": "_doc",
"_id": "3",
"_nested": {
"field": "items",
"offset": 0
},
"_score": 0.9444616,
"_source": {
"item": "B",
"price": 600 // note this
}
}
]
}
}
}
},
{
"_index": "68902032",
"_type": "_doc",
"_id": "1",
"_score": null,
"_source": {
"items": [
{
"item": "A",
"price": 350
},
{
"item": "B",
"price": 500
}
]
},
"sort": [
500
],
"inner_hits": {
"items": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.9444616,
"hits": [
{
"_index": "68902032",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "items",
"offset": 1
},
"_score": 0.9444616,
"_source": {
"item": "B",
"price": 500 // note this
}
}
]
}
}
}
}
]

Deduplicate results in elasticsearch based on a field

I have an elasticsearch index (v6.8) that contains documents that may share a similar value for a field.
[
{
"siren": 123,
"owner": "A",
"price": 10
},
{
"siren": 123,
"owner": "B",
"price": 20
},
{
"siren": 456,
"owner": "A",
"price": 10
},
{
"siren": 456,
"owner": "C",
"price": 30
}
]
I would like to get all documents from owner A and B, but deduplicated on the siren field. The result would be. I don't care which deduplicated line is returned (from owner A or B).
[
{
"siren": 123,
"owner": "A",
"price": 10
},
{
"siren": 456,
"owner": "A",
"price": 10
}
]
Also, I would like my aggregations to count documents deduplicated on the same field.
I have tried
{
"query": {
"bool": {
"must": [
[
{
"terms": {
"owner": [
"A",
"B"
]
}
}
]
]
}
},
"aggs": {
"by_price": {
"terms": {
"field": "price",
"size": 20
}
}
}
}
But this counts multiple times the "same" document.
You can use terms aggregation on the siren field along with top hits aggregation
{
"size":0,
"query": {
"bool": {
"must": [
{
"terms": {
"owner.keyword": [
"A",
"B"
]
}
}
]
}
},
"aggs": {
"by_price": {
"terms": {
"field": "siren",
"size": 20
},
"aggs": {
"top_sales_hits": {
"top_hits": {
"_source": {
"includes": [
"siren",
"owner",
"price"
]
},
"size": 1
}
}
}
}
}
}
Search Result will be
"aggregations": {
"by_price": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 123,
"doc_count": 2,
"top_sales_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66226467",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"owner": "A", // note this
"siren": 123,
"price": 10
}
}
]
}
}
},
{
"key": 456,
"doc_count": 1,
"top_sales_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "66226467",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"owner": "A", // note this
"siren": 456,
"price": 10
}
}
]
}
}
}
]
}
}

Create the Elastic search query to show Random 5 Questions by category

I Have fields Category & Questions in the Table.
My Requirement is for the below mentioned 3 category against I need the questions which is tagged (SO I want the Category and Questions field in the query) by writing elastic search query
Category :
OLA
BNA
DRG
GET logstash-sdc-feedback/_search? { "_source":["Category.keyword"], "size": 5, "query":{ "bool": { "must": [ {"match":{"Category.keyword"": "OLA","BNA","DRG"}}
],
}
}, "aggs": { "MyBuckets": { "terms": { "field": "questions.keyword","Category.keyword" "order":{ "_count": "asc" }, "size": "5"
} } } }
You can use terms query along with terms aggregation, to achieve your use case.
Adding a working example
Index Data:
{
"category": "XYZ",
"question": "d"
}
{
"category": "OLA",
"question": "a"
}
{
"category": "BNA",
"question": "b"
}
{
"category": "DRG",
"question": "c"
}
Search Query:
{
"query": {
"bool": {
"must": {
"terms": {
"category.keyword": [
"OLA",
"BNA",
"DRG"
]
}
}
}
},
"aggs": {
"top_tags": {
"terms": {
"field": "category.keyword"
},
"aggs": {
"top_faq_hits": {
"top_hits": {
"_source": {
"includes": [
"question"
]
},
"size": 1
}
}
}
}
}
}
Search Result:
"aggregations": {
"top_tags": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "BNA", // note this
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65566020",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"question": "b" // note this
}
}
]
}
}
},
{
"key": "DRG",
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65566020",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"question": "c"
}
}
]
}
}
},
{
"key": "OLA",
"doc_count": 1,
"top_faq_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "65566020",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"question": "a"
}
}
]
}
}
}
]
}
}

How to perform union operation on an array field after group by aggregation on another field in elasticsearch

I have the following documents in index products
{ "product_name": "prod-1", "meta": [ { "tag": "tag1", "score": "12" }, { "tag": "tag2", "score": "24" } ] }
{ "product_name": "prod-2", "meta": [ { "tag": "tag1", "score": "36" } ] }
{ "product_name": "prod-2", "meta": [ { "tag": "tag2", "score": "44" } ] }
{ "product_name": "prod-3", "meta": [ { "tag": "tag3", "score": "54" } ] }
I know how to group by product_name in es
POST /products/_search
{
"size": 0,
"aggs": {
"by_product": {
"terms": {
"field": "product_name"
}
}
}
}
After grouping by product_name, I want a field called meta in each bucket which has a union of meta from all documents in that bucket like this
[
{
"key": "prod-1",
"meta": [{ "tag": "tag1", "score": "12" }, { "tag": "tag2", "score": "24" }]
},
{
"key": "prod-2",
"meta": [{ "tag": "tag1", "score": "36" }, { "tag": "tag2", "score": "44" }]
},
{
"key": "prod-3",
"meta": [ { "tag": "tag3", "score": "54" } ]
}
]
How can I achive this in elaticsearch?
The best way to show your expected search result is to use top hits
aggregation using which you can add additional fields to terms
aggregation
Search Query:
{
"size": 0,
"aggs": {
"by_product": {
"terms": {
"field": "product_name.keyword"
},
"aggs": {
"top_sales_hits": {
"top_hits": {
"_source": {
"includes": [
"meta.tag",
"meta.score"
]
}
}
}
}
}
}
}
Search Result:
"aggregations": {
"by_product": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "prod-2",
"doc_count": 2,
"top_sales_hits": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "64801386",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "36",
"tag": "tag1"
}
]
}
},
{
"_index": "64801386",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "44",
"tag": "tag2"
}
]
}
}
]
}
}
},
{
"key": "prod-1",
"doc_count": 1,
"top_sales_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "64801386",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "12",
"tag": "tag1"
},
{
"score": "24",
"tag": "tag2"
}
]
}
}
]
}
}
},
{
"key": "prod-3",
"doc_count": 1,
"top_sales_hits": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.0,
"hits": [
{
"_index": "64801386",
"_type": "_doc",
"_id": "4",
"_score": 1.0,
"_source": {
"meta": [
{
"score": "54",
"tag": "tag3"
}
]
}
}
]
}
}
}
]
}

Resources