elasticsearch conditional Boost Index time and Query time - elasticsearch

I have twitter_user_index
I want to boost tweets score in index time
If user.verified:
boost: 10
elif user.follower_count in range (1, 100):
boost: 1
elif user.follower_count in range (101, 200):
boost: 2
How do I achieve this kind of boosting in index time or in query time (both answers welcome)

I have taken an index with below mapping. "user" I have taken as "object type", if your type is "nested" then you need to use nested query, rest structure will be same.
Mappings:
PUT twitter_user_index
{
"mappings": {
"properties": {
"user":{
"type": "object",
"properties": {
"name":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword"
}
}
},
"verified":{
"type":"boolean"
},
"follower_count":{
"type": "integer"
}
}
}
}
}
}
Data: I have taken three records. 2 users with verified account having less than 100 and more than 100 followers respectively and one user with non verified account having more than 100 followers
"hits" : [
{
"_index" : "twitter_user_index",
"_type" : "_doc",
"_id" : "gwiu-HEBZgLhu13ZIerO",
"_score" : 1.0,
"_source" : {
"user" : {
"name" : "abc",
"verified" : true,
"follower_count" : 90
}
}
},
{
"_index" : "twitter_user_index",
"_type" : "_doc",
"_id" : "hAiu-HEBZgLhu13Za-qL",
"_score" : 1.0,
"_source" : {
"user" : {
"name" : "efg",
"verified" : true,
"follower_count" : 120
}
}
},
{
"_index" : "twitter_user_index",
"_type" : "_doc",
"_id" : "hQiu-HEBZgLhu13ZhOrr",
"_score" : 1.0,
"_source" : {
"user" : {
"name" : "xyz",
"verified" : false,
"follower_count" : 120
}
}
}
]
Query: I have used a should clause which will not filter out any document, one score matching documents higher. Each clause has its boost to increase relevance.
GET twitter_user_index/_search
{
"query": {
"bool": {
"should": [
{
"term": {
"user.verified": {
"value": true,
"boost": 10
}
}
},
{
"range": {
"user.follower_count": {
"gte": 1,
"lte": 100,
"boost": 1
}
}
},
{
"range": {
"user.follower_count": {
"gte": 101,
"lte": 200,
"boost": 2
}
}
}
]
}
}
}
Result: In output of query, verified accounts are scored higher than non verified account and accounts with followers in given range are scored higher.
"hits" : [
{
"_index" : "twitter_user_index",
"_type" : "_doc",
"_id" : "hAiu-HEBZgLhu13Za-qL",
"_score" : 6.700036,
"_source" : {
"user" : {
"name" : "efg",
"verified" : true,
"follower_count" : 120
}
}
},
{
"_index" : "twitter_user_index",
"_type" : "_doc",
"_id" : "gwiu-HEBZgLhu13ZIerO",
"_score" : 5.700036,
"_source" : {
"user" : {
"name" : "abc",
"verified" : true,
"follower_count" : 90
}
}
},
{
"_index" : "twitter_user_index",
"_type" : "_doc",
"_id" : "hQiu-HEBZgLhu13ZhOrr",
"_score" : 2.0,
"_source" : {
"user" : {
"name" : "xyz",
"verified" : false,
"follower_count" : 120
}
}
}
]
EDIT1: Elasticsearch DSL if else blocks cannot be added. We can add additonal should clause that verified:false and range between 1 and 100 then select document applying boost.
{
"query": {
"bool": {
"should": [
{
"term": {
"user.verified": {
"value": true,
"boost": 10
}
}
},
{
"bool": {
"should": [
{
"term": {
"user.verified": {
"value": false
}
}
},
{
"range": {
"user.follower_count": {
"gte": 1,
"lte": 100,
"boost": 1
}
}
}
]
}
},
{
"bool": {
"should": [
{
"term": {
"user.verified": {
"value": false
}
}
},
{
"range": {
"user.follower_count": {
"gte": 101,
"lte": 200,
"boost": 2
}
}
}
]
}
}
]
}
}
}
Result
"hits" : [
{
"_index" : "twitter_user_index",
"_type" : "_doc",
"_id" : "hAiu-HEBZgLhu13Za-qL",
"_score" : 5.566749,
"_source" : {
"user" : {
"name" : "efg",
"verified" : true,
"follower_count" : 120
}
}
},
{
"_index" : "twitter_user_index",
"_type" : "_doc",
"_id" : "hgi4-HEBZgLhu13ZW-rY",
"_score" : 5.566749,
"_source" : {
"user" : {
"name" : "xyz1",
"verified" : true,
"follower_count" : 150
}
}
},
{
"_index" : "twitter_user_index",
"_type" : "_doc",
"_id" : "gwiu-HEBZgLhu13ZIerO",
"_score" : 4.566749,
"_source" : {
"user" : {
"name" : "abc",
"verified" : true,
"follower_count" : 90
}
}
},
{
"_index" : "twitter_user_index",
"_type" : "_doc",
"_id" : "hQiu-HEBZgLhu13ZhOrr",
"_score" : 4.4079456,
"_source" : {
"user" : {
"name" : "xyz",
"verified" : false,
"follower_count" : 120
}
}
}
]
EDIT3
Optimized version using filter clause. Filter doesn't calculate score so is faster
{
"query": {
"bool": {
"should": [
{
"term": {
"user.verified": {
"value": true,
"boost": 10
}
}
},
{
"bool": {
"filter": [
{
"term": {
"user.verified": {
"value": false
}
}
}],
"must":[{
"range": {
"user.follower_count": {
"gte": 1,
"lte": 100,
"boost": 1
}
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"user.verified": {
"value": false
}
}
}],
"must":[
{
"range": {
"user.follower_count": {
"gte": 101,
"lte": 200,
"boost": 2
}
}
}
]
}
}
]
}
}
}

Related

Cannot query nested object in Elasticsearch

I have the following data sample:
{
"_index" : "index-stats-202110",
"_type" : "_doc",
"_id" : "yT7vlHwBLghXjfKlKm7e",
"_score" : 9.583174,
"_source" : {
"client_id" : "111f34db-c88c-4675-8a69-f3028b3dfa18",
"campaign_id" : "2c3c62f7-9e77-48df-a211-108a2220a063",
"created_at" : "1634586272",
"date_start" : "1634580000",
"date_end" : "1634583600",
"specs" : [
"events",
"general"
],
"data" : {
"count" : 13
}
}
},
{
"_index" : "index-stats-202110",
"_type" : "_doc",
"_id" : "yj7vlHwBLghXjfKlKm7e",
"_score" : 9.583174,
"_source" : {
"client_id" : "111f34db-c88c-4675-8a69-f3028b3dfa18",
"campaign_id" : "2c3c62f7-9e77-48df-a211-108a2220a063",
"created_at" : "1634586272",
"date_start" : "1634580000",
"date_end" : "1634583600",
"specs" : [
"events",
"visit"
],
"data" : {
"label" : "visit",
"count" : 13
}
}
},
{
"_index" : "index-stats-202110",
"_type" : "_doc",
"_id" : "yz7vlHwBLghXjfKlKm7e",
"_score" : 9.583174,
"_source" : {
"client_id" : "111f34db-c88c-4675-8a69-f3028b3dfa18",
"campaign_id" : "2c3c62f7-9e77-48df-a211-108a2220a063",
"created_at" : "1634586272",
"date_start" : "1634580000",
"date_end" : "1634583600",
"specs" : [
"events"
],
"data" : {
"count" : 1
},
"geo" : {
"country" : "q",
"province" : "q",
"city" : "t"
}
}
},
{
"_index" : "index-stats-202110",
"_type" : "_doc",
"_id" : "zD7vlHwBLghXjfKlKm7e",
"_score" : 9.583174,
"_source" : {
"client_id" : "111f34db-c88c-4675-8a69-f3028b3dfa18",
"campaign_id" : "2c3c62f7-9e77-48df-a211-108a2220a063",
"created_at" : "1634586272",
"date_start" : "1634580000",
"date_end" : "1634583600",
"specs" : [
"events"
],
"data" : {
"count" : 1
},
"geo" : {
"country" : "j",
"province" : "q",
"city" : "d"
}
}
}
which I get from the query:
{
"query": {
"bool": {
"must": [
{
"term": {
"client_id": "111f34db-c88c-4675-8a69-f3028b3dfa18"
}
},
{
"term": {
"campaign_id": "2c3c62f7-9e77-48df-a211-108a2220a063"
}
},
{
"term": {
"specs": "events"
}
}
]
}
}
}
and this is my mapping:
{
"properties":{
"specs":{
"type":"keyword"
},
"campaign_id":{
"type":"keyword"
},
"client_id":{
"type":"keyword"
},
"created_at":{
"type":"date"
},
"date_start":{
"type":"date"
},
"date_end":{
"type":"date"
},
"geo":{
"type":"nested",
"properties":{
"country":{
"type":"keyword"
},
"province":{
"type":"keyword"
},
"city":{
"type":"keyword"
}
}
},
"data":{
"enabled":false
}
}
}
I want to get the geo.country = 'q' which I have in my sample, but when I try to execute the following query I get an empty response:
{
"query": {
"bool": {
"must": [
{
"term": {
"client_id": "111f34db-c88c-4675-8a69-f3028b3dfa18"
}
},
{
"term": {
"campaign_id": "2c3c62f7-9e77-48df-a211-108a2220a063"
}
},
{
"term": {
"specs": "events"
}
},
{
"term": {
"geo.country": "q"
}
}
]
}
}
}
Question: How can I separate the geo.country = 'q' from the list?
You can use nested query along with bool/must clause to achieve your required result
{
"query": {
"bool": {
"must": [
{
"term": {
"client_id": "111f34db-c88c-4675-8a69-f3028b3dfa18"
}
},
{
"term": {
"campaign_id": "2c3c62f7-9e77-48df-a211-108a2220a063"
}
},
{
"term": {
"specs": "events"
}
},
{
"nested": {
"path": "geo",
"query": {
"term": {
"geo.country": "q"
}
}
}
}
]
}
}
}

Date histogram with different Timezones with Elasticsearch v.7.13.3

I have to acquire datas from different part of Timezones (example New York -6.00 and Rome +2.00).
In the document I have a field 'timestamp' defined as "data" and I have for example create a "date_histogram" for example from 8.00 AM to 9.00 AM. How can I match the USA 8.00-9.00 and the ITA 8.00-9.00 datas in order to compare the two data from the same period?
This is my datas with two different fuse. 2 from USA and 2 from ITA:
"hits" : [
{
"_index" : "test-data-2021-8-4",
"_type" : "_doc",
"_id" : "9tS4EHsB4Ke8qtFfYqbg",
"_score" : 1.0,
"_source" : {
"id" : "mtKDIsEfSr3I8AwCDE1Gjw_11",
"value" : 87.2,
"timestamp" : "2021-08-04T12:32:04+02:00"
}
},
{
"_index" : "test-data-2021-8-4",
"_type" : "_doc",
"_id" : "99S4EHsB4Ke8qtFfYqbg",
"_score" : 1.0,
"_source" : {
"id" : "mtKDIsEfSr3I8AwCDE1Gjw_5",
"value" : 31.0025,
"timestamp" : "2021-08-04T12:32:04+02:00"
}
},
{
"_index" : "test-data-2021-8-4",
"_type" : "_doc",
"_id" : "wdOREHsB4Ke8qtFfuZAf",
"_score" : 1.0,
"_source" : {
"id" : "mtKDIsEfSr3I8AwCDE1Gjw_11",
"value" : 15.1,
"timestamp" : "2021-08-04T05:49:50-04:00"
}
},
{
"_index" : "test-data-2021-8-4",
"_type" : "_doc",
"_id" : "wtOREHsB4Ke8qtFfuZAg",
"_score" : 1.0,
"_source" : {
"id" : "mtKDIsEfSr3I8AwCDE1Gjw_5",
"value" : 27.9457,
"timestamp" : "2021-08-04T05:49:50-04:00"
}
}
]
This is my date_histogram query:
GET /test-data-*/_search?size=10000
{
"aggs": {
"agg_sum": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "1h"
},
"aggs": {
"aggregazione": {
"sum": {
"field": "value"
}
}
}
}
},
"size": 0,
"fields": [
{
"field": "timestamp",
"format": "date_time"
}
],
"stored_fields": [
"*"
],
"query": {
"bool": {
"must": [],
"filter": [
{
"range": {
"timestamp": {
"gte": "2021-08-04T08:00:00.000",
"lte": "2021-08-04T09:00:00.000",
"format": "strict_date_optional_time"
}
}
}
],
"should": [],
"must_not": []
}
}
}
Thanks in advance for the response.

Return matched values and the count of values that matched in elasticsearch query

Assume I have the following two elements in my elasticsearch index:
{
"name": "bob",
"likes": ["computer", "cat", "water"]
},
{
"name": "alice",
"likes": ["gaming", "gambling"]
}
I would now like to query for elements, that like computer, laptop or cat. (which matches bob, note that it should be an exact string match)
As a result I need the matches, as well as the count of matches, so would like to get the following back (since it found computer and cat, but not laptop or water):
{
"name": "bob",
"likes": ["computer", "cat"],
"likes_count": 2
}
Is there a way to achieve this with a single elasticsearch query? (note that I'm still stuck with ES2.4, but will hopefully soon be able to upgrade).
Ideally I would also like to sort the output by likes_count.
Thank you!
Best way would be to create likes as nested data type
Mapping
PUT index71
{
"mappings": {
"properties": {
"name":{
"type": "text"
},
"likes":{
"type": "nested",
"properties": {
"name":{
"type":"keyword"
}
}
}
}
}
}
Query:
GET index71/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "likes",
"query": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"inner_hits": {} ---> It will return matched elements in nested type
}
}
]
}
},
"aggs": {
"likes": {
"nested": {
"path": "likes"
},
"aggs": {
"matcheLikes": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"aggs": {
"likeCount": {
"value_count": {
"field": "likes.name"
}
}
}
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_score" : 1.0,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "cat"
},
{
"name" : "water"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.0,
"_source" : {
"name" : "computer"
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 1
},
"_score" : 1.0,
"_source" : {
"name" : "cat"
}
}
]
}
}
}
}
]
},
"aggregations" : {
"likes" : {
"doc_count" : 3,
"matcheLikes" : {
"doc_count" : 2,
"likeCount" : {
"value" : 2
}
}
}
}
If likes cannot be changed to nested type then scripts need to be used which will impact performance
Mapping
{
"index72" : {
"mappings" : {
"properties" : {
"likes" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
Query:
{
"script_fields": { ---> It will iterate through likes and get matched values
"matchedElements": {
"script": "def matchedLikes=[];def list_to_check = ['computer', 'laptop', 'cat']; def do_not_return = true; for(int i=0;i<doc['likes.keyword'].length;i++){ if(list_to_check.contains(doc['likes.keyword'][i])) {matchedLikes.add(doc['likes.keyword'][i])}} return matchedLikes;"
}
},
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes": [
"computer",
"laptop",
"cat"
]
}
}
]
}
}
}
},
"aggs": {
"Name": {
"terms": {
"field": "name.keyword",
"size": 10
},
"aggs": {
"Count": {
"scripted_metric": { --> get count of matched values
"init_script": "state.matchedLikes=[]",
"map_script": " def list_to_check = ['computer', 'laptop', 'cat']; def do_not_return = true; for(int i=0;i<doc['likes.keyword'].length;i++){ if(list_to_check.contains(doc['likes.keyword'][i])) {state.matchedLikes.add(doc['likes.keyword'][i]);}}",
"combine_script": "int count = 0; for (int i=0;i<state.matchedLikes.length;i++) { count += 1 } return count;",
"reduce_script": "int count = 0; for (a in states) { count += a } return count"
}
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "index72",
"_type" : "_doc",
"_id" : "wtqso3ABH6obcmRR0hSV",
"_score" : 0.0,
"fields" : {
"matchedElements" : [
"cat",
"computer"
]
}
}
]
},
"aggregations" : {
"Name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "bob",
"doc_count" : 1,
"Count" : {
"value" : 2
}
}
]
}
}
EDIT 1
To give higher score to more matches change terms query to should clause. Each term in should clause will contribute towards score
GET index71/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "likes",
"query": {
"bool": {
"should": [
{
"term": {
"likes.name": "computer"
}
},
{
"term": {
"likes.name": "cat"
}
},
{
"term": {
"likes.name": "laptop"
}
}
]
}
},
"inner_hits": {}
}
}
]
}
},
"aggs": {
"likes": {
"nested": {
"path": "likes"
},
"aggs": {
"matcheLikes": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"aggs": {
"likeCount": {
"value_count": {
"field": "likes.name"
}
}
}
}
}
}
}
}
Result
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.5363467,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_score" : 1.5363467,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "cat"
},
{
"name" : "water"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.7917595,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 1
},
"_score" : 1.7917595,
"_source" : {
"name" : "cat"
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.2809337,
"_source" : {
"name" : "computer"
}
}
]
}
}
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "pr-lqHABcSMy6UhGAWtW",
"_score" : 1.2809337,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "gaming"
},
{
"name" : "gambling"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2809337,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "pr-lqHABcSMy6UhGAWtW",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.2809337,
"_source" : {
"name" : "computer"
}
}
]
}
}
}
}
]
},
"aggregations" : {
"likes" : {
"doc_count" : 6,
"matcheLikes" : {
"doc_count" : 3,
"likeCount" : {
"value" : 3
}
}
}
}

Find nearest timestamp

I'm using Elasticsearch 6.4.2, and I need to find the previous and next docs considering a specified timestamp.
Kinda like if I did a SELECT TOP 1 * from table WHERE date < 2019-01-01 ORDER BY date DESC and SELECT TOP 1 * from table WHERE date > 2019-01-01 ORDER BY date ASCon a SQL table, to find the previous and next records from 2019-01-01, you know?
Any ideas?
Data:
[
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "mceIBm4B1qXGA4PnKzvZ",
"_score" : 1.0,
"_source" : {
"id" : 1,
"date" : "2019-10-01"
}
},
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "mseIBm4B1qXGA4PnRDvs",
"_score" : 1.0,
"_source" : {
"id" : 2,
"date" : "2019-10-02"
}
},
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "m8eIBm4B1qXGA4PncDv9",
"_score" : 1.0,
"_source" : {
"id" : 3,
"date" : "2019-10-03"
}
},
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "nMeIBm4B1qXGA4Pnhjvs",
"_score" : 1.0,
"_source" : {
"id" : 4,
"date" : "2019-10-04"
}
},
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "nceIBm4B1qXGA4Pnmjtm",
"_score" : 1.0,
"_source" : {
"id" : 5,
"date" : "2019-10-05"
}
}
]
Query: I am using two filter and terms aggregations to get first date greater than and less to 2019-10-03
{
"size": 0,
"aggs": {
"above": {
"filter": {
"range": {
"date": {
"gt": "2019-10-03"
}
}
},
"aggs": {
"TopDocument": {
"terms": {
"field": "date",
"size": 1,
"order": {
"_term": "asc"
}
},
"aggs": {
"documents": {
"top_hits": {
"size": 10
}
}
}
}
}
},
"below":{
"filter": {
"range": {
"date": {
"lt": "2019-10-03"
}
}
},
"aggs": {
"TopDocument": {
"terms": {
"field": "date",
"size": 1,
"order": {
"_term": "desc"
}
},
"aggs": {
"documents": {
"top_hits": {
"size": 10
}
}
}
}
}
}
}
}
Response:
{
"below" : {
"doc_count" : 2,
"TopDocument" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 1,
"buckets" : [
{
"key" : 1569974400000,
"key_as_string" : "2019-10-02T00:00:00.000Z",
"doc_count" : 1,
"documents" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "mseIBm4B1qXGA4PnRDvs",
"_score" : 1.0,
"_source" : {
"id" : 2,
"date" : "2019-10-02"
}
}
]
}
}
}
]
}
},
"above" : {
"doc_count" : 2,
"TopDocument" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 1,
"buckets" : [
{
"key" : 1570147200000,
"key_as_string" : "2019-10-04T00:00:00.000Z",
"doc_count" : 1,
"documents" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "nMeIBm4B1qXGA4Pnhjvs",
"_score" : 1.0,
"_source" : {
"id" : 4,
"date" : "2019-10-04"
}
}
]
}
}
}
]
}
}
}
You can try this :
SELECT TOP 1 * from table WHERE date < 2019-01-01 ORDER BY date DESC
{
"sort": [
{
"date": {
"order": "desc"
}
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"date": {
"lt": "2019-01-01"
}
}
}
]
}
},
"size": 1
}
SELECT TOP 1 * from table WHERE date > 2019-01-01 ORDER BY date ASC
{
"sort": [
{
"date": {
"order": "asc"
}
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"date": {
"gt": "2019-01-01"
}
}
}
]
}
},
"size": 1
}

Count aggregation on reverse nesting field for multiple values

Our Elastic Mapping
{"mappings": {
"products" : {
"properties":{
"name " : {
"type" : "keyword"
},
"resellers" : {
"type" : "nested",
"properties" : {
"name" : { "type" : "text" },
"price" : { "type" : "double" }
}
}
}
}
}}
In this mapping each product stores the list of resellers which are selling it at specific price.
We have requirement where we want to get count of products sell by specific resellers at specific price, I am able to get it for single reseller by using reverse nested agg and cardinality agg using following query DSL
. For ex:- Getting Total Product sell by Amazon at 2.
{
"query": {
"bool": {
"must": [
{
"match_all": {
"boost": 1.0
}
}
]
}
},
"aggs": {
"patchFilter": {
"nested": {
"path": "resellers"
},
"aggs": {
"nestedfilter": {
"filter": {
"bool": {
"must":[
{
"term" :{
"resellers.name.keyword": {
"value": "Amazon"
}
}
},{
"terms" :{
"resellers.price":[2]
}
}
]
}
},
"aggs": {
"resellerprice": {
"reverse_nested" :{},
"aggs": {
"resellers_price":{
"cardinality" : {
"field" : "name.keyword"
}
}
}
}
}
}
}
}
}
}
I want to fetch it for multiple resellers(Amazon,Flipkart, Walmart) which are selling at 2 in single query. Can somebody help me out in doing that?
Mapping:
PUT productreseller
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"resellers": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields":{
"keyword":{
"type": "keyword"
}
}
},
"price": {
"type": "double"
}
}
}
}
}
}
Data:
[
{
"_index" : "productreseller",
"_type" : "_doc",
"_id" : "JNbCLm0B00idyGV0Pn1Z",
"_score" : 1.0,
"_source" : {
"name" : "P2",
"resellers" : [
{
"name" : "amazon",
"price" : 3
},
{
"name" : "abc",
"price" : 2
}
]
}
},
{
"_index" : "productreseller",
"_type" : "_doc",
"_id" : "JdbCLm0B00idyGV0Wn0y",
"_score" : 1.0,
"_source" : {
"name" : "P1",
"resellers" : [
{
"name" : "amazon",
"price" : 2
},
{
"name" : "abc",
"price" : 3
}
]
}
},
{
"_index" : "productreseller",
"_type" : "_doc",
"_id" : "JtbPLm0B00idyGV0D32Y",
"_score" : 1.0,
"_source" : {
"name" : "P4",
"resellers" : [
{
"name" : "xyz",
"price" : 2
},
{
"name" : "abc",
"price" : 3
}
]
}
}
]
Query:
GET productreseller/_search
{
"query": {
"bool": {
"must": [
{
"match_all": {
"boost": 1
}
}
]
}
},
"aggs": {
"patchFilter": {
"nested": {
"path": "resellers"
},
"aggs": {
"nestedfilter": {
"filter": {
"bool": {
"must": [
{
"terms": {
"resellers.price": [
2
]
}
}
]
}
},
"aggs": {
"NAME": {
"terms": {
--->terms aggregation to list resellers and reverse_nested as subaggregation
"field": "resellers.name.keyword",
"size": 10
},
"aggs": {
"resellerprice": {
"reverse_nested": {},
"aggs": {
"resellers_price": {
"cardinality": {
"field": "name"
}
}
}
}
}
}
}
}
}
}
}
}
Result:
"aggregations" : {
"patchFilter" : {
"doc_count" : 8,
"nestedfilter" : {
"doc_count" : 3,
"NAME" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "abc",
"doc_count" : 1,
"resellerprice" : {
"doc_count" : 1,
"resellers_price" : {
"value" : 1
}
}
},
{
"key" : "amazon",
"doc_count" : 1,
"resellerprice" : {
"doc_count" : 1,
"resellers_price" : {
"value" : 1
}
}
},
{
"key" : "xyz",
"doc_count" : 1,
"resellerprice" : {
"doc_count" : 1,
"resellers_price" : {
"value" : 1
}
}
}
]
}
}
}
}
If you want to display only certain resellers, you can add terms query in nested filter

Resources