Our Elastic Mapping
{"mappings": {
"products" : {
"properties":{
"name " : {
"type" : "keyword"
},
"resellers" : {
"type" : "nested",
"properties" : {
"name" : { "type" : "text" },
"price" : { "type" : "double" }
}
}
}
}
}}
In this mapping each product stores the list of resellers which are selling it at specific price.
We have requirement where we want to get count of products sell by specific resellers at specific price, I am able to get it for single reseller by using reverse nested agg and cardinality agg using following query DSL
. For ex:- Getting Total Product sell by Amazon at 2.
{
"query": {
"bool": {
"must": [
{
"match_all": {
"boost": 1.0
}
}
]
}
},
"aggs": {
"patchFilter": {
"nested": {
"path": "resellers"
},
"aggs": {
"nestedfilter": {
"filter": {
"bool": {
"must":[
{
"term" :{
"resellers.name.keyword": {
"value": "Amazon"
}
}
},{
"terms" :{
"resellers.price":[2]
}
}
]
}
},
"aggs": {
"resellerprice": {
"reverse_nested" :{},
"aggs": {
"resellers_price":{
"cardinality" : {
"field" : "name.keyword"
}
}
}
}
}
}
}
}
}
}
I want to fetch it for multiple resellers(Amazon,Flipkart, Walmart) which are selling at 2 in single query. Can somebody help me out in doing that?
Mapping:
PUT productreseller
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"resellers": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields":{
"keyword":{
"type": "keyword"
}
}
},
"price": {
"type": "double"
}
}
}
}
}
}
Data:
[
{
"_index" : "productreseller",
"_type" : "_doc",
"_id" : "JNbCLm0B00idyGV0Pn1Z",
"_score" : 1.0,
"_source" : {
"name" : "P2",
"resellers" : [
{
"name" : "amazon",
"price" : 3
},
{
"name" : "abc",
"price" : 2
}
]
}
},
{
"_index" : "productreseller",
"_type" : "_doc",
"_id" : "JdbCLm0B00idyGV0Wn0y",
"_score" : 1.0,
"_source" : {
"name" : "P1",
"resellers" : [
{
"name" : "amazon",
"price" : 2
},
{
"name" : "abc",
"price" : 3
}
]
}
},
{
"_index" : "productreseller",
"_type" : "_doc",
"_id" : "JtbPLm0B00idyGV0D32Y",
"_score" : 1.0,
"_source" : {
"name" : "P4",
"resellers" : [
{
"name" : "xyz",
"price" : 2
},
{
"name" : "abc",
"price" : 3
}
]
}
}
]
Query:
GET productreseller/_search
{
"query": {
"bool": {
"must": [
{
"match_all": {
"boost": 1
}
}
]
}
},
"aggs": {
"patchFilter": {
"nested": {
"path": "resellers"
},
"aggs": {
"nestedfilter": {
"filter": {
"bool": {
"must": [
{
"terms": {
"resellers.price": [
2
]
}
}
]
}
},
"aggs": {
"NAME": {
"terms": {
--->terms aggregation to list resellers and reverse_nested as subaggregation
"field": "resellers.name.keyword",
"size": 10
},
"aggs": {
"resellerprice": {
"reverse_nested": {},
"aggs": {
"resellers_price": {
"cardinality": {
"field": "name"
}
}
}
}
}
}
}
}
}
}
}
}
Result:
"aggregations" : {
"patchFilter" : {
"doc_count" : 8,
"nestedfilter" : {
"doc_count" : 3,
"NAME" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "abc",
"doc_count" : 1,
"resellerprice" : {
"doc_count" : 1,
"resellers_price" : {
"value" : 1
}
}
},
{
"key" : "amazon",
"doc_count" : 1,
"resellerprice" : {
"doc_count" : 1,
"resellers_price" : {
"value" : 1
}
}
},
{
"key" : "xyz",
"doc_count" : 1,
"resellerprice" : {
"doc_count" : 1,
"resellers_price" : {
"value" : 1
}
}
}
]
}
}
}
}
If you want to display only certain resellers, you can add terms query in nested filter
Related
Currently im using ES 7.10
Creating a index and mapping
PUT /rathan_index
PUT /rathan_index/_mappings
{
"properties" : {
"Term__kt" : {
"type" : "nested",
"include_in_root" : true,
"properties" : {
"Fee_Amount" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "double"
}
}
}
}
},
"type" : {
"type" : "keyword"
}
}
}
Insert some data
POST /rathan_index/_doc/contracts-74900
{
"type": "contracts",
"Term__kt" : [
{
"Fee_Amount" : 105.75
}
]
}
POST /rathan_index/_doc/contracts-74901
{
"type": "contracts",
"Term__kt" : [
{
"Fee_Amount" : 105.75
}
]
}
Query for search
GET rathan_index/_search
{
"from": 0,
"size": 1000,
"_source": [
"*__kt.*"
],
"track_total_hits": true,
"query": {
"function_score": {
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"term": {
"type": "contracts"
}
}
]
}
},
"should": [
{
"query_string": {
"query": "0105.75",
"type": "best_fields",
"fields": [
"*__kt.*"
],
"lenient": true,
"default_operator": "AND",
"boost": 3
}
},
{
"query_string": {
"query": "0105.75",
"type": "phrase_prefix",
"fields": [
"*__kt.*"
],
"lenient": true,
"boost": 2
}
},
{
"query_string": {
"query": "0105.75",
"fields": [
"*__kt.*"
],
"lenient": true,
"type": "best_fields"
}
}
],
"minimum_should_match": 1
}
}
}
},
"min_score": 0.000001,
"highlight": {
"fields": {
"*": {
"type": "plain"
}
}
}
}
The result comes with 105.75 but its not getting higlighted.
I'm assuming there is a implicit conversion that's happening which is converting 0105.75 to 105.75 .
But i want it to highlighted regardless. Can i please get a solution for this.
Thanks in advance
{
"took" : 565,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 4.0,
"hits" : [
{
"_index" : "rathan_index",
"_type" : "_doc",
"_id" : "contracts-74900",
"_score" : 4.0,
"_source" : {
"Term__kt" : [
{
"Fee_Amount" : 105.75
}
]
},
"highlight" : {
"type" : [
"<em>contracts</em>"
]
}
},
{
"_index" : "rathan_index",
"_type" : "_doc",
"_id" : "contracts-74901",
"_score" : 4.0,
"_source" : {
"Term__kt" : [
{
"Fee_Amount" : 105.75
}
]
},
"highlight" : {
"type" : [
"<em>contracts</em>"
]
}
}
]
}
}
For example, let's assume we have a product index with the following mapping:
{
"product": {
"mappings": {
"producttype": {
"properties": {
"id": {
"type": "keyword"
},
"productAttributes": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "text",
"analyzer": "keyword"
}
},
"analyzer": "standard"
}
}
}
}
}
}
I am trying to find how many products which have specific product attributes using the following query(I am using a fuzzy query to allow some edit distance):
{
"size": 0,
"query": {
"nested": {
"query": {
"fuzzy": {
"productAttributes.name": {
"value": "SSD"
}
}
},
"path": "productAttributes"
}
},
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
But it returns all product attributes for each matched document and here is the response I get.
"aggregations" : {
"product_attribute_nested_agg" : {
"doc_count" : 6,
"terms_nested_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "SSD",
"doc_count" : 3
},
{
"key" : "USB 2.0",
"doc_count" : 3
}
]
}
}
}
Could you please guide me to how to filter buckets to only return matched attributes?
Edit:
Here are some document samples:
"hits" : {
"total" : 12,
"max_score" : 1.0,
"hits" : [
{
"_index" : "product",
"_type" : "producttype",
"_id" : "677d1164-c401-4d36-8a08-6aa14f7f32bb",
"_score" : 1.0,
"_source" : {
"title" : "Dell laptop",
"productAttributes" : [
{
"name" : "USB 2.0",
"value" : "4"
},
{
"name" : "SSD",
"value" : "250 GB"
}
]
}
},
{
"_index" : "product",
"_type" : "producttype",
"_id" : "2954935a-7f60-437a-8a54-00da2d71da46",
"_score" : 1.0,
"_source" : {
"productAttributes" : [
{
"name" : "USB 2.0",
"value" : "3"
},
{
"name" : "SSD",
"value" : "500 GB"
}
],
"title" : "HP laptop"
}
},
]
}
To filter only specific, you can use filter queries.
Query:
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"terms": {
"productAttributes.name": [
"SSD"
]
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}
This is what it does the trick:
"filter": {
"terms": {
"productAttributes.name": [
"SSD"
]
}
}
You need to do filter part of the aggregation.
Output:
"aggregations": {
"product_attribute_nested_agg": {
"doc_count": 4,
"inner": {
"doc_count": 2,
"terms_nested_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "SSD",
"doc_count": 2
}
]
}
}
}
}
Filtering using Fuzziness :
GET /product/_search
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"fuzzy": {
"productAttributes.name": {
"value": "SSt",//here will match SSD
"fuzziness": 3//you can remove it to be as Auto
}
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}
Assume I have the following two elements in my elasticsearch index:
{
"name": "bob",
"likes": ["computer", "cat", "water"]
},
{
"name": "alice",
"likes": ["gaming", "gambling"]
}
I would now like to query for elements, that like computer, laptop or cat. (which matches bob, note that it should be an exact string match)
As a result I need the matches, as well as the count of matches, so would like to get the following back (since it found computer and cat, but not laptop or water):
{
"name": "bob",
"likes": ["computer", "cat"],
"likes_count": 2
}
Is there a way to achieve this with a single elasticsearch query? (note that I'm still stuck with ES2.4, but will hopefully soon be able to upgrade).
Ideally I would also like to sort the output by likes_count.
Thank you!
Best way would be to create likes as nested data type
Mapping
PUT index71
{
"mappings": {
"properties": {
"name":{
"type": "text"
},
"likes":{
"type": "nested",
"properties": {
"name":{
"type":"keyword"
}
}
}
}
}
}
Query:
GET index71/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "likes",
"query": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"inner_hits": {} ---> It will return matched elements in nested type
}
}
]
}
},
"aggs": {
"likes": {
"nested": {
"path": "likes"
},
"aggs": {
"matcheLikes": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"aggs": {
"likeCount": {
"value_count": {
"field": "likes.name"
}
}
}
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_score" : 1.0,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "cat"
},
{
"name" : "water"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.0,
"_source" : {
"name" : "computer"
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 1
},
"_score" : 1.0,
"_source" : {
"name" : "cat"
}
}
]
}
}
}
}
]
},
"aggregations" : {
"likes" : {
"doc_count" : 3,
"matcheLikes" : {
"doc_count" : 2,
"likeCount" : {
"value" : 2
}
}
}
}
If likes cannot be changed to nested type then scripts need to be used which will impact performance
Mapping
{
"index72" : {
"mappings" : {
"properties" : {
"likes" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
Query:
{
"script_fields": { ---> It will iterate through likes and get matched values
"matchedElements": {
"script": "def matchedLikes=[];def list_to_check = ['computer', 'laptop', 'cat']; def do_not_return = true; for(int i=0;i<doc['likes.keyword'].length;i++){ if(list_to_check.contains(doc['likes.keyword'][i])) {matchedLikes.add(doc['likes.keyword'][i])}} return matchedLikes;"
}
},
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes": [
"computer",
"laptop",
"cat"
]
}
}
]
}
}
}
},
"aggs": {
"Name": {
"terms": {
"field": "name.keyword",
"size": 10
},
"aggs": {
"Count": {
"scripted_metric": { --> get count of matched values
"init_script": "state.matchedLikes=[]",
"map_script": " def list_to_check = ['computer', 'laptop', 'cat']; def do_not_return = true; for(int i=0;i<doc['likes.keyword'].length;i++){ if(list_to_check.contains(doc['likes.keyword'][i])) {state.matchedLikes.add(doc['likes.keyword'][i]);}}",
"combine_script": "int count = 0; for (int i=0;i<state.matchedLikes.length;i++) { count += 1 } return count;",
"reduce_script": "int count = 0; for (a in states) { count += a } return count"
}
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "index72",
"_type" : "_doc",
"_id" : "wtqso3ABH6obcmRR0hSV",
"_score" : 0.0,
"fields" : {
"matchedElements" : [
"cat",
"computer"
]
}
}
]
},
"aggregations" : {
"Name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "bob",
"doc_count" : 1,
"Count" : {
"value" : 2
}
}
]
}
}
EDIT 1
To give higher score to more matches change terms query to should clause. Each term in should clause will contribute towards score
GET index71/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "likes",
"query": {
"bool": {
"should": [
{
"term": {
"likes.name": "computer"
}
},
{
"term": {
"likes.name": "cat"
}
},
{
"term": {
"likes.name": "laptop"
}
}
]
}
},
"inner_hits": {}
}
}
]
}
},
"aggs": {
"likes": {
"nested": {
"path": "likes"
},
"aggs": {
"matcheLikes": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"aggs": {
"likeCount": {
"value_count": {
"field": "likes.name"
}
}
}
}
}
}
}
}
Result
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.5363467,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_score" : 1.5363467,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "cat"
},
{
"name" : "water"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.7917595,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 1
},
"_score" : 1.7917595,
"_source" : {
"name" : "cat"
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.2809337,
"_source" : {
"name" : "computer"
}
}
]
}
}
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "pr-lqHABcSMy6UhGAWtW",
"_score" : 1.2809337,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "gaming"
},
{
"name" : "gambling"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2809337,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "pr-lqHABcSMy6UhGAWtW",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.2809337,
"_source" : {
"name" : "computer"
}
}
]
}
}
}
}
]
},
"aggregations" : {
"likes" : {
"doc_count" : 6,
"matcheLikes" : {
"doc_count" : 3,
"likeCount" : {
"value" : 3
}
}
}
}
Say we have following documents in elasticsearch:
[{
"person": {
'name': 'asqar'
},
"bill": [
{
code:2,
value: 210
},
{
code:3,
value: 330
},
{
code:8,
value: 220
},
]
},
{
"person": {
'name': 'asqar'
},
"bill": [
{
code:2,
value: 340
},
{
code:4,
value: 340
},
{
code:1,
value: 200
},
]
},
{
"person": {
'name': 'asqar'
},
"bill": [
{
code:2,
value: 810
},
{
code:4,
value: 630
},
{
code:8,
value: 220
},
]
}]
I want to apply aggregate function on specific object in the bill array iwth some condition, for example I want calculate avg of value which its code is 2.
Field bill needs to be created as nested object to filter on it.
You can then use filter aggregation
Mapping:
PUT testindex/_mapping
{
"properties": {
"person": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"bill": {
"type": "nested",
"properties": {
"code": {
"type": "integer"
},
"value":{
"type": "double"
}
}
}
}
}
Data:
"hits" : [
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "422tAWsBd-1D6Ztt1_Tb",
"_score" : 1.0,
"_source" : {
"person" : {
"name" : "asqar"
},
"bill" : [
{
"code" : 2,
"value" : 210
},
{
"code" : 3,
"value" : 330
},
{
"code" : 8,
"value" : 220
}
]
}
},
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "5G2uAWsBd-1D6ZttpfR9",
"_score" : 1.0,
"_source" : {
"person" : {
"name" : "asqar"
},
"bill" : [
{
"code" : 2,
"value" : 340
},
{
"code" : 4,
"value" : 340
},
{
"code" : 1,
"value" : 200
}
]
}
},
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "5W2vAWsBd-1D6ZttQfQ_",
"_score" : 1.0,
"_source" : {
"person" : {
"name" : "asqar"
},
"bill" : [
{
"code" : 2,
"value" : 810
},
{
"code" : 4,
"value" : 630
},
{
"code" : 8,
"value" : 220
}
]
}
}
]
Query:
GET testindex/_search
{
"size": 0,
"aggs": {
"terms_agg": {
"terms": {
"field": "person.name.keyword"
},
"aggs": {
"bill": {
"nested": {
"path": "bill"
},
"aggs": {
"bill_code": {
"filter": {
"term": {
"bill.code": 2
}
},
"aggs": {
"average": {
"avg": {
"field": "bill.value"
}
}
}
}
}
}
}
}
}
}
Output:
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"terms_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "asqar",
"doc_count" : 3,
"bill" : {
"doc_count" : 9,
"bill_code" : {
"doc_count" : 3,
"average" : {
"value" : 453.3333333333333
}
}
}
}
]
}
}
You need to first make sure that the bill field is of nested type. Then you can use nested aggregation to deal with nested documents. You can use terms aggregation on bill.code and a child avg aggregation on field bill.value to this terms aggregation. This will give you average value for each code. Now since you want only aggregation against the code 2, you can make use of bucket selector aggregation to filter and get only bucket with code 2.
So the final aggregation query will look as below:
{
"aggs": {
"VALUE_NESTED": {
"nested": {
"path": "bill"
},
"aggs": {
"VALUE_TERM": {
"terms": {
"field": "bill.code"
},
"aggs": {
"VALUE_AVG": {
"avg": {
"field": "bill.value"
}
},
"CODE": {
"max": {
"field": "bill.code"
}
},
"CODE_FILTER": {
"bucket_selector": {
"buckets_path": {
"code": "CODE"
},
"script": "params.code == 2"
}
}
}
}
}
}
}
}
Sample o/p for above:
"aggregations": {
"VALUE_NESTED": {
"doc_count": 9,
"VALUE_TERM": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 3,
"CODE": {
"value": 2
},
"VALUE_AVG": {
"value": 453.3333333333333
}
}
]
}
}
}
I have this query which gives me documents using the filter. How do I make sure the coordinates field in not empty/null?
{
"size":1,
"_source": ["coordinates"]
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"range": {
"timestamp_ms": {
"gte": "1468015200000",
"lte": "1468022400000"
}
}
}
}
}
}
output:
{
"took" : 41,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 79112,
"max_score" : 1.0,
"hits" : [ {
"_index" : "twitter",
"_type" : "tweet",
"_id" : "751536344866910208",
"_score" : 1.0,
"_source" : {
"coordinates" : null
}
} ]
}
}
this is the mapping:
"coordinates" : {
"properties" : {
"coordinates" : {
"type" : "geo_point"
},
"type" : {
"type" : "string",
"index" : "not_analyzed"
}
}
}
You can add in your filter/query a filter exists
An example:
{
"size": 1,
"_source": [
"coordinates"
],
"query": {
"filtered": {
"query": {
"exists": {
"field": "coordinates"
}
},
"filter": {
"range": {
"timestamp_ms": {
"gte": "1468015200000",
"lte": "1468022400000"
}
}
}
}
}
}