Elasticsearch DSL queries - optional should terms & scores - elasticsearch

I'm pretty new on Elasticsearch world and I might be missing some concept.
That's the scenario I'm not understanding:
I want to find a doc from the following criteria:
category.level = A
category.name = "John .G" OR "Chris T."
approved = yes (optional)
Mappings:
PUT data
{
"mappings": {
"properties": {
"createdAt": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss.SSSZ"
},
"category": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"analyzer": "keyword"
}
}
},
"approved": {
"type": "text",
"analyzer": "keyword"
}
}
}
}
Data:
POST data/_create/1
{
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Mary F.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2022-04-18 19:09:27.527+0200",
"approved": "yes"
}
POST data/_create/2
{
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Chris T.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2022-04-18 19:09:27.527+0200",
"approved": "no"
}
POST data/_create/3
{
"category": [
{
"name": "John G.",
"level": "C"
},
{
"name": "Phil C.",
"level": "C"
}
],
"createdBy": "John",
"createdAt": "2022-04-18 19:09:27.527+0200",
"approved": "no"
}
POST data/_create/4
{
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Chris T.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2020-04-18 19:09:27.527+0200",
"approved": "yes"
}
POST data/_create/5
{
"category": [
{
"name": "Unknown A.",
"level": "A"
},
{
"name": "Unknown B.",
"level": "A"
}
],
"createdBy": "Unknown",
"createdAt": "2020-08-18 19:09:27.527+0200",
"approved": "yes"
}
Query:
GET data/_search
{
"query": {
"nested": {
"path": "category",
"query": {
"bool": {
"must": [
{"match": {"category.level": "A"}}
],
"should": [
{"term": {"category.name": "John G."}},
{"term": {"category.name": "Chris T."}},
{"term": {"approved": "yes"}}
],
"minimum_should_match": 1
}
}
}
}
}
Response:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.4455402,
"hits" : [
{
"_index" : "data",
"_id" : "2",
"_score" : 1.4455402,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Chris T.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2022-04-18 19:09:27.527+0200",
"approved" : "no"
}
},
{
"_index" : "data",
"_id" : "4",
"_score" : 1.4455402,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Chris T.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2020-04-18 19:09:27.527+0200",
"approved" : "yes"
}
},
{
"_index" : "data",
"_id" : "1",
"_score" : 1.151647,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Mary F.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2022-04-18 19:09:27.527+0200",
"approved" : "yes"
}
}
]
}
}
Questions:
Why the first document returned is an approval = no? I was expecting that docs with approval = yes would be better scored.
Why doc with index = 5 (it doesn't attend the criteria category.name, but it does for approved = yes) is not being returned?
The optionality of approved = yes is not being expressed in the above query. How could I create a kind of extra separated should term with minimum_should_match: 0 ? Something that would increase the score but would not filter the results.

You need to use below query, which have main bool query. it have first must clause with nested query and it have bool query for category.level field and then another bool query with should clause for category.name field.
Now main bool query have should clause for approved which is used for boosting result with yes value (this is outside nested query).
POST data/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "category",
"query": {
"bool": {
"must": [
{
"term": {
"category.level": {
"value": "a"
}
}
},
{
"bool": {
"should": [
{
"term": {
"category.name": "John G."
}
},
{
"term": {
"category.name": "Chris T."
}
}
]
}
}
]
}
}
}
}
],
"should": [
{
"term": {
"approved": "yes"
}
}
]
}
}
}
Result:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.9845366,
"hits" : [
{
"_index" : "data",
"_type" : "_doc",
"_id" : "4",
"_score" : 1.9845366,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Chris T.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2020-04-18 19:09:27.527+0200",
"approved" : "yes"
}
},
{
"_index" : "data",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.6906434,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Mary F.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2022-04-18 19:09:27.527+0200",
"approved" : "yes"
}
},
{
"_index" : "data",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.4455402,
"_source" : {
"category" : [
{
"name" : "John G.",
"level" : "A"
},
{
"name" : "Chris T.",
"level" : "A"
}
],
"createdBy" : "John",
"createdAt" : "2022-04-18 19:09:27.527+0200",
"approved" : "no"
}
}
]
}
}
Why the first document returned is an approval = no? I was expecting
that docs with approval = yes would be better scored.
Because you have should clause inside nested query and it is no matching to any document as approved is outside category hence it is not changing score.
Why doc with index = 5 (it doesn't attend the criteria category.name,
but it does for approved = yes) is not being returned?
it is removed by your must clause, but if you need index =5 document as well then you can add two should clause, one for nested and one for approved and it will resolved your issue.
Your question 3 also resolved by my answer.

I tried your scenario with your mapping and sample data, and found the issue, you are using approved:yes in the nested query context which is causing the issue, which is causing the issue, if you change the query to below(Basically using approved:yes in the should block but outside the nested query), it solves all your issues.
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "category",
"query": {
"bool": {
"must": [
{
"match": {
"category.level": "A"
}
}
],
"should": [
{
"term": {
"category.name": "John G."
}
},
{
"term": {
"category.name": "Chris T."
}
}
]
}
}
}
},
{
"term": {
"approved": "yes"
}
}
]
}
}
}
And search result
"hits": [
{
"_index": "71967271",
"_id": "4",
"_score": 1.9845366,
"_source": {
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Chris T.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2020-04-18 19:09:27.527+0200",
"approved": "yes"
}
},
{
"_index": "71967271",
"_id": "2",
"_score": 1.4455402,
"_source": {
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Chris T.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2022-04-18 19:09:27.527+0200",
"approved": "no"
}
},
{
"_index": "71967271",
"_id": "1",
"_score": 1.2437345,
"_source": {
"category": [
{
"name": "John G.",
"level": "A"
},
{
"name": "Mary F.",
"level": "A"
}
],
"createdBy": "John",
"createdAt": "2022-04-18 19:09:27.527+0200",
"approved": "yes"
}
},
{
"_index": "71967271",
"_id": "5",
"_score": 0.7968255,
"_source": {
"category": [
{
"name": "Unknown A.",
"level": "A"
},
{
"name": "Unknown B.",
"level": "A"
}
],
"createdBy": "Unknown",
"createdAt": "2020-08-18 19:09:27.527+0200",
"approved": "yes"
}
}
]

Related

Query with And & OR in Elastic Search

I'm new to Elastic Search, I have document like below :
Mapping of same JSON index is like below :
Mapping
{
"mappings": {
"properties": {
"age": {
"type": "long"
},
"hobbiles": {
"type": "keyword"
}
}
}
}
Some sample documents are like below :
[{
"_id": "test#domain.com",
"age": 12,
"hobbiles": [{
"name": "Singing",
"level": "begineer"
},
{
"name": "Dancing",
"level": "begineer"
}
]
},
{
"_id": "test1#domain.com",
"age": 7,
"hobbiles": [{
"name": "Coding",
"level": "begineer"
},
{
"name": "Chess",
"level": "begineer"
}
]
},
{
"_id": "test2#domain.com",
"age": 20,
"hobbiles": [{
"name": "Singing",
"level": "begineer"
},
{
"name": "Dancing",
"level": "begineer"
}
]
},
{
"_id": "test3#domain.com",
"age": 21,
"hobbiles": [{
"name": "Coding",
"level": "begineer"
},
{
"name": "Dancing",
"level": "Football"
}
]
}
]
Now I want to fetch documents where id IN (test#domain.com, test1#domain.com) and age is greater than 5. [operationally] hobiiles Football.
My expectations from output is I should get three documents: and if hobbies is not matching then also it should be fine but if hobbies matches then that document should be on top. Basically I want to match hobbies but its optional if it doesn't match then also I should get data based on prior clauses.
[test3#domain.com, test#domain.com, test1#domain.com]
test3 on top because Football matches there, and test and test1 because age and id matches there.
Tldr;
It can be achieved via bool queries.
Solution
PUT /_bulk
{"index":{"_index":"73935795", "_id":"test#domain.com"}}
{"age":12,"hobbiles":[{"name":"Singing","level":"begineer"},{"name":"Dancing","level":"begineer"}]}
{"index":{"_index":"73935795", "_id":"test1#domain.com"}}
{"age":7,"hobbiles":[{"name":"Coding","level":"begineer"},{"name":"Chess","level":"begineer"}]}
{"index":{"_index":"73935795", "_id":"test2#domain.com"}}
{"age":20,"hobbiles":[{"name":"Singing","level":"begineer"},{"name":"Dancing","level":"begineer"}]}
{"index":{"_index":"73935795", "_id":"test3#domain.com"}}
{"age":21,"hobbiles":[{"name":"Coding","level":"begineer"},{"name":"Dancing","level":"Football"}]}
GET 73935795/_search
{
"query": {
"bool": {
"filter": [
{
"range": {
"age": {
"gt": 5
}
}
},
{
"terms": {
"_id": [
"test#domain.com",
"test1#domain.com",
"test3#domain.com"
]
}
}
],
"should": [
{
"query_string": {
"query": "(football) OR (begineer)",
"default_field": "hobbiles.level"
}
}
]
}
}
}
This requires using Should clause. Should is equivalent to "OR". So a document will be returned if it satisfies any one condition in should query.
For conditions on id and age I have used filter clause. It is equivalent to "AND" . Filter clause does not calculate score for matched documents so any document which matches "hobbiles.level" will be ranked higher.
Query
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"term": {
"hobbiles.level.keyword": {
"value": "Football"
}
}
},
{
"bool": {
"filter": [
{
"terms": {
"id.keyword": [
"test#domain.com",
"test1#domain.com"
]
}
},
{
"range": {
"age": {
"gt": 5
}
}
}
]
}
}
]
}
}
}
Result
"hits" : [
{
"_index" : "index8",
"_type" : "_doc",
"_id" : "qE06noMBfFiM6spcUTo4",
"_score" : 1.3112575,
"_source" : {
"id" : "test3#domain.com",
"age" : 21,
"hobbiles" : [
{
"name" : "Coding",
"level" : "begineer"
},
{
"name" : "Dancing",
"level" : "Football"
}
]
}
},
{
"_index" : "index8",
"_type" : "_doc",
"_id" : "pE03noMBfFiM6spc4jr2",
"_score" : 0.0,
"_source" : {
"id" : "test#domain.com",
"age" : 12,
"hobbiles" : [
{
"name" : "Singing",
"level" : "begineer"
},
{
"name" : "Dancing",
"level" : "begineer"
}
]
}
},
{
"_index" : "index8",
"_type" : "_doc",
"_id" : "pU03noMBfFiM6spc6DqZ",
"_score" : 0.0,
"_source" : {
"id" : "test1#domain.com",
"age" : 7,
"hobbiles" : [
{
"name" : "Coding",
"level" : "begineer"
},
{
"name" : "Chess",
"level" : "begineer"
}
]
}
}
]

Elasticsearch: collapsed results participating in sorting

I am using Elasticsearch and I want to group our results by a specific field, returning only the most recent document per group. When scoring and sorting, I want the documents I am not returning (the ones that are older) to be ignored.
I have tried approaching this with collapse, however the "hidden" documents are also taken into account, which I would like to avoid.
Example
In the following example I have 2 groups of documents, which I would like to group by their email, taking for each group the most recent by created_at, and sort them by their rating descending.
With the data of the example, the most recent ones are Aaa 1 (with email aaa#aaa.com) and Bbb 4 (with email bbb#bbb.com). I want to sort by their rating descending, I am expecting Bbb 4 and then Aaa 1. However, they are returned the other way around, because the Aaa 2 and Aaa 3 are also scored, which I want to avoid.
How can I write my query in a way that would return Bbb 4 and then Aaa 1? Should I be using the top_hits aggregation instead?
PUT test
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"email": {
"type": "keyword"
},
"description": {
"type": "text"
},
"rating": {
"type": "integer"
},
"created_at": {
"type": "date"
}
}
}
}
POST test/_doc
{
"name": "Aaa 1",
"rating": 1,
"created_at": "2021-01-01",
"description": "A quick fox",
"email": "aaa#aaa.com"
}
POST test/_doc
{
"name": "Aaa 2",
"rating": 20,
"created_at": "2020-01-01",
"description": "jumps over",
"email": "aaa#aaa.com"
}
POST test/_doc
{
"name": "Aaa 3",
"rating": 30,
"created_at": "2019-01-01",
"description": "the fence",
"email": "aaa#aaa.com"
}
POST test/_doc
{
"name": "Bbb 4",
"rating": 4,
"created_at": "2021-01-02",
"description": "behind the house",
"email": "bbb#bbb.com"
}
POST test/_doc
{
"name": "Bbb 5",
"rating": 5,
"created_at": "2020-01-02",
"description": "we live in",
"email": "bbb#bbb.com"
}
GET test/_search
{
"_source": false,
"track_total_hits": false,
"query": {
"bool": {
"should": {
"match_all": {}
}
}
},
"collapse": {
"field": "email",
"inner_hits": [
{
"name": "last_document",
"size": 1,
"_source": ["name","email","rating"],
"sort": [
{
"created_at": {
"order": "desc"
}
}
]
}
]
},
"sort": [
{
"rating": {
"order": "desc"
}
}
]
}
This returns
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"max_score" : null,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "bccEn3oBRQ1dOOnBe3nD",
"_score" : null,
"fields" : {
"email" : [
"aaa#aaa.com"
]
},
"sort" : [
30
],
"inner_hits" : {
"last_document" : {
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "a8cEn3oBRQ1dOOnBdXli",
"_score" : null,
"_source" : {
"name" : "Aaa 1",
"rating" : 1,
"email" : "aaa#aaa.com"
},
"sort" : [
1609459200000
]
}
]
}
}
}
},
{
"_index" : "test",
"_type" : "_doc",
"_id" : "b8cEn3oBRQ1dOOnBiHkx",
"_score" : null,
"fields" : {
"email" : [
"bbb#bbb.com"
]
},
"sort" : [
5
],
"inner_hits" : {
"last_document" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "bscEn3oBRQ1dOOnBgHlt",
"_score" : null,
"_source" : {
"name" : "Bbb 4",
"rating" : 4,
"email" : "bbb#bbb.com"
},
"sort" : [
1609545600000
]
}
]
}
}
}
}
]
}
}
I have ran into the same problem. As far as I know this is not possible.
As a workaround you can do this:
GET test/_search
{
"_source": false,
"track_total_hits": false,
"query": {
"match_all": {}
},
"collapse": {
"field": "email"
},
"sort": [
{
"created_at": {
"order": "desc"
}
}
]
}
This would return the latest comment per email in your 'normal' hits array. You would then need to sort those by rating after the search.
The problem I have is that my result set is too large to fetch at once and re-sort them after the search. If you found a different solution to this, I would be happy to hear it :)

How to fetch only specific object inside nested field along with search query in Elasticsearch

I am having an index which has nested fields. I want to include only particular nested object in response based on condition along with other fields. For example consider the mappings
PUT /users
{
"mappings": {
"properties": {
"name": {
"type": "text"
},
"address": {
"type": "nested",
"properties": {
"state": {
"type": "keyword"
},
"city": {
"type": "keyword"
},
"country": {
"type": "keyword"
}
}
}
}
}
I want to search users by name and expecting the response should only include nested object contains country = 'United States". Consider the following documents in users index
{
"users": [
{
"name": "John",
"address": [
{
"state": "Alabama",
"city": "Alabaster",
"Country": "United States"
},
{
"state": "New Delhi",
"city": "Agra",
"Country": "India"
}
]
},
{
"name": "Edward John",
"address": [
{
"state": "Illinois",
"city": "Chicago",
"Country": "United States"
},
{
"state": "Afula",
"city": "Afula",
"Country": "Israel"
}
]
},
,
{
"name": "Edward John",
"address": [
{
"state": "Afula",
"city": "Afula",
"Country": "Israel"
}
]
}
]
}
I am expecting the search result as follows
{
"users": [
{
"name": "John",
"address": [
{
"state": "Alabama",
"city": "Alabaster",
"Country": "United States"
}
]
},
{
"name": "Edward John",
"address": [
{
"state": "Illinois",
"city": "Chicago",
"Country": "United States"
}
]
},
,
{
"name": "Edward John",
"address": [
]
}
]
}
Kindly provide me a suitable elasticsearch query to fetch this documents
The correct query would be this one:
POST users/_search
{
"_source": [
"name"
],
"query": {
"bool": {
"should": [
{
"nested": {
"path": "address",
"query": {
"bool": {
"must": [
{
"match": {
"address.Country": "United States"
}
}
]
}
},
"inner_hits": {}
}
},
{
"bool": {
"must_not": [
{
"nested": {
"path": "address",
"query": {
"bool": {
"must": [
{
"match": {
"address.Country": "United States"
}
}
]
}
}
}
}
]
}
}
]
}
}
}
Which returns this:
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 1.489748,
"hits" : [
{
"_index" : "users",
"_type" : "_doc",
"_id" : "X8pINHgB2VNT6r1rJj04",
"_score" : 1.489748,
"_source" : {
"name" : "John"
},
"inner_hits" : {
"address" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.489748,
"hits" : [
{
"_index" : "users",
"_type" : "_doc",
"_id" : "X8pINHgB2VNT6r1rJj04",
"_nested" : {
"field" : "address",
"offset" : 0
},
"_score" : 1.489748,
"_source" : {
"city" : "Alabaster",
"Country" : "United States",
"state" : "Alabama"
}
}
]
}
}
}
},
{
"_index" : "users",
"_type" : "_doc",
"_id" : "XftINHgBAEsNDPLQQxL8",
"_score" : 1.489748,
"_source" : {
"name" : "Edward John"
},
"inner_hits" : {
"address" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.489748,
"hits" : [
{
"_index" : "users",
"_type" : "_doc",
"_id" : "XftINHgBAEsNDPLQQxL8",
"_nested" : {
"field" : "address",
"offset" : 0
},
"_score" : 1.489748,
"_source" : {
"city" : "Chicago",
"Country" : "United States",
"state" : "Illinois"
}
}
]
}
}
}
},
{
"_index" : "users",
"_type" : "_doc",
"_id" : "UoZINHgBNlJvCnAGVzE9",
"_score" : 0.0,
"_source" : {
"name" : "Edward John"
},
"inner_hits" : {
"address" : {
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
}
}
]
}
Try out using below query
{
"query": {
"nested": {
"path": "address",
"query": {
"bool": {
"must": [
{
"match": {
"address.Country": "United States"
}
}
]
}
},
"inner_hits": {}
}
}
}
Search Result will be
"hits": [
{
"_index": "66579117",
"_type": "_doc",
"_id": "1",
"_score": 0.6931471,
"_source": {
"name": "John",
"address": [
{
"sate": "Alabama",
"city": "Alabaster",
"Country": "United States"
},
{
"sate": "New Delhi",
"city": "Agra",
"Country": "India"
}
]
},
"inner_hits": {
"address": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.6931471,
"hits": [
{
"_index": "66579117",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "address",
"offset": 0
},
"_score": 0.6931471,
"_source": {
"sate": "Alabama",
"city": "Alabaster",
"Country": "United States"
}
}
]
}
}
}
},
{
"_index": "66579117",
"_type": "_doc",
"_id": "2",
"_score": 0.6931471,
"_source": {
"name": "Edward",
"address": [
{
"sate": "Illinois",
"city": "Chicago",
"Country": "United States"
},
{
"sate": "Afula",
"city": "Afula",
"Country": "Israel"
}
]
},
"inner_hits": {
"address": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.6931471,
"hits": [
{
"_index": "66579117",
"_type": "_doc",
"_id": "2",
"_nested": {
"field": "address",
"offset": 0
},
"_score": 0.6931471,
"_source": {
"sate": "Illinois",
"city": "Chicago",
"Country": "United States"
}
}
]
}
}
}
}
]

How to group distinct records in Elasticsearch

I have the following data in my Elasticsearch index:
{
"title": "Hello from elastic",
"name": "ABC",
"j_id": "1",
"date": '2021-03-02T12:29:31.356514'
},
{
"title": "Hello from elastic",
"name": "PQR",
"j_id": "1",
"date": '2021-03-02T12:29:31.356514'
},
{
"title": "Hello from elastic",
"name": "XYZ",
"j_id": "2",
"date": '2021-03-02T12:29:31.356514'
},
{
"title": "Hello from elastic",
"name": "MNO",
"j_id": "3",
"date": '2021-03-02T12:29:31.356514'
}
Now I want to get unique records on the basis of the id.
The expected output is:
{
"1": [{
"title": "Hello from elastic",
"name": "ABC",
"j_id": "1",
"date": '2021-03-02T12:29:31.356514'
},
{
"title": "Hello from elastic",
"name": "PQR",
"j_id": "1",
"date": '2021-03-02T12:29:31.356514'
}],
"2": [{
"title": "Hello from elastic",
"name": "XYZ",
"j_id": "2",
"date": '2021-03-02T12:29:31.356514'
}],
"3": [{
"title": "Hello from elastic",
"name": "MNO",
"j_id": "3",
"date": '2021-03-02T12:29:31.356514'
}]
}
I tried an aggregate query but it's giving me only the counts.
Also, I want to include latest record in my response.
How can I get sorted, unique records from Elasticsearch grouped by the id?
I want latest inserted data first
Assuming a minimal mapping covering the date and j_id fields:
PUT myindex
{
"mappings": {
"properties": {
"j_id": {
"type": "keyword"
},
"date": {
"type": "date"
}
}
}
}
you can leverage a terms aggregation whose sub-aggregation is an ordered top_hits aggregation:
POST myindex/_search?filter_path=aggregations.*.buckets.key,aggregations.*.buckets.sorted_hits.hits.hits._source
{
"size": 0,
"aggs": {
"by_j_id": {
"terms": {
"field": "j_id",
"size": 10,
"order": {
"max_date": "desc"
}
},
"aggs": {
"max_date": {
"max": {
"field": "date"
}
},
"sorted_hits": {
"top_hits": {
"size": 10,
"sort": [
{
"date": {
"order": "desc"
}
}
]
}
}
}
}
}
}
The URL parameter filter_path reduces the response body to closely mimic your required format:
{
"aggregations" : {
"by_j_id" : {
"buckets" : [
{
"key" : "1",
"sorted_hits" : {
"hits" : {
"hits" : [
{
"_source" : {
"title" : "Hello from elastic",
"name" : "ABC",
"j_id" : "1",
"date" : "2021-03-02T12:29:31.356514"
}
},
{
"_source" : {
"title" : "Hello from elastic",
"name" : "PQR",
"j_id" : "1",
"date" : "2021-03-02T12:29:31.356514"
}
}
]
}
}
},
{
"key" : "2",
"sorted_hits" : {
"hits" : {
"hits" : [
{
"_source" : {
"title" : "Hello from elastic",
"name" : "XYZ",
"j_id" : "2",
"date" : "2021-03-02T12:29:31.356514"
}
}
]
}
}
},
{
"key" : "3",
"sorted_hits" : {
"hits" : {
"hits" : [
{
"_source" : {
"title" : "Hello from elastic",
"name" : "MNO",
"j_id" : "3",
"date" : "2021-03-02T12:29:31.356514"
}
}
]
}
}
}
]
}
}
}

Elasticsearch how to sort with nested condition

I have the following ES content, which is basically a list of videos with nested video ids for different users.
I need to sort the list based on the 'updated_at' date for a specific user (for example, 'B001').
{ "_index": "videos",
"_source": {
"id": 123456,
"short_title": "First good title",
"created_at": "2019-05-29T03:19:14",
"user_list": [
{
"user": "A001",
"video_id": "604214",
"updated_at": "2019-05-29T03:26:48"
},
{
"user": "B001",
"video_id": "762001",
"updated_at": "2019-05-29T06:27:56"
},
{
"user": "C001",
"video_id": "604218",
"updated_at": "2019-05-29T06:27:57"
}
]
}
},
{ "_index": "videos",
"_source": {
"id": 127456,
"short_title": "Second good title",
"created_at": "2019-05-29T04:19:14",
"user_list": [
{
"user": "B001",
"video_id": "216001",
"updated_at": "2019-05-29T06:26:58"
},
{
"user": "A001",
"video_id": "604218",
"updated_at": "2019-05-29T06:25:38"
},
{
"user": "C001",
"video_id": "626001",
"updated_at": "2019-05-29T06:25:42"
}
]
}
}
I have tried the following query:
GET videos/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "user_list",
"filter": {
"match": {
"user_list.user": "B001"
}
}
}
}
]
}
},
"from": 0,
"size": 10,
"sort": [
{
"user_list.updated_at": {
"order": "asc",
"nested_path": "user_list",
"nested_filter": {
"match": {
"user_list.user": "B001"
}
}
}
}
]
}
But I cannot get the right order.
Expected order:
{ "_index": "videos",
"_source": {
"id": 127456,
"short_title": "Second good title",
"created_at": "2019-05-29T04:19:14",
"user_list": [
{
"user": "B001",
"video_id": "216001",
"updated_at": "2019-05-29T06:26:58"
},
{
"user": "A001",
"video_id": "604218",
"updated_at": "2019-05-29T06:25:38"
},
{
"user": "C001",
"video_id": "626001",
"updated_at": "2019-05-29T06:25:42"
}
]
}
},
{ "_index": "videos",
"_source": {
"id": 123456,
"short_title": "First good title",
"created_at": "2019-05-29T03:19:14",
"user_list": [
{
"user": "A001",
"video_id": "604214",
"updated_at": "2019-05-29T03:26:48"
},
{
"user": "B001",
"video_id": "762001",
"updated_at": "2019-05-29T06:27:56"
},
{
"user": "C001",
"video_id": "604218",
"updated_at": "2019-05-29T06:27:57"
}
]
}
}
Here, video with id "127456" comes first in the order because B001 user's "updated_at" is needed to be in ascending order.
What am I missing in the query?
Mapping
PUT videos/_mapping
{
"properties" : {
"user_list" : {
"type" : "nested",
"properties" : {
"user" : {
"type" : "text"
},
"video_id" : {
"type" : "text"
},
"updated_at" : {
"type" : "date"
}
}
},
"id" : {
"type" : "integer"
},
"short_title" : {
"type" : "text"
},
"created_at" : {
"type" : "date"
}
}
}
Query
GET videos/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "user_list",
"query": {
"bool": {
"filter": {
"match": {
"user_list.user": "B001"
}
}
}
}
}
}
]
}
},
"from": 0,
"size": 10,
"sort": [
{
"user_list.updated_at": {
"order": "desc",
"nested_path": "user_list",
"nested_filter": {
"match": {
"user_list.user": "B001"
}
}
}
}
]
}
Result
{
"_index" : "videos",
"_type" : "_doc",
"_id" : "E3cuVGsBJ4bANhADI-an",
"_score" : null,
"_source" : {
"id" : 123456,
"short_title" : "First good title",
"created_at" : "2019-05-29T03:19:14",
"user_list" : [
{
"user" : "A001",
"video_id" : "604214",
"updated_at" : "2019-05-29T03:26:48"
},
{
"user" : "B001",
"video_id" : "762001",
"updated_at" : "2019-05-29T06:27:56"
},
{
"user" : "C001",
"video_id" : "604218",
"updated_at" : "2019-05-29T06:27:57"
}
]
},
"sort" : [
1559111276000
]
},
{
"_index" : "videos",
"_type" : "_doc",
"_id" : "FHcuVGsBJ4bANhADjua8",
"_score" : null,
"_source" : {
"id" : 127456,
"short_title" : "Second good title",
"created_at" : "2019-05-29T04:19:14",
"user_list" : [
{
"user" : "B001",
"video_id" : "216001",
"updated_at" : "2019-05-29T06:26:58"
},
{
"user" : "A001",
"video_id" : "604218",
"updated_at" : "2019-05-29T06:25:38"
},
{
"user" : "C001",
"video_id" : "626001",
"updated_at" : "2019-05-29T06:25:42"
}
]
},
"sort" : [
1559111218000
]
}
I am using elastic search 7.0.
Query is same as yours and I am sorting in descending order
123456 is coming first as "user" : "B001","updated_at" : "2019-05-29T06:27:56"
is larger than "user" : "B001", "updated_at" : "2019-05-29T06:26:58"

Resources