I'm trying to sort the result returned by ElasticSearch by the nested field sections.name as follows:
Mapping:
PUT /staff
{
"mappings": {
"list": {
"properties": {
"id": {"type": "text" },
"name": {
"type":"text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"sections" : {
"type":"nested",
"properties": {
"id": {"type":"text", "fielddata" : true},
"name": {
"fielddata" : true,
"type": "text",
"fields": {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
}
documents:
POST /staff/list
{
"id": 10,
"name": "abc def",
"sections":
[
{
"id":"1",
"name" : "zamphire"
},{
"id":"2",
"name" : "warden"
}
]
}
POST /staff/list
{
"id": 9,
"name": "abc def",
"sections":
[
{
"id":"1",
"name" : "shaggi"
},{
"id":"2",
"name" : "robert"
}
]
}
POST /staff/list
{
"id": 8,
"name": "abc def",
"sections":
[
{
"id":"3",
"name" : "zamphire"
},{
"id":"2",
"name" : "abi"
}
]
}
I'm performing the following query:
GET /staff/_search
{
"from": 0,
"query": {
"nested": {
"path": "sections",
"query": {
"match": {
"sections.id": {
"query": "1"
}
}
}
}
},
"size": 25,
"sort": [
{
"sections.name": {
"nested": {
"filter": {
"nested": {
"path": "sections",
"query": {
"term" : { "sections.id" : "1" }
}
}
}
},
"order": "asc"
}
}
],
"_source": {
"includes": [
"id",
"name",
"sections"
]
}
}
I get these results:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : null,
"hits" : [
{
"_index" : "staff",
"_type" : "list",
"_id" : "rJtyyGwBNB-cdBRb5XGR",
"_score" : null,
"_source" : {
"name" : "abc def",
"id" : 10,
"sections" : [
{
"name" : "zamphire",
"id" : "1"
},
{
"name" : "warden",
"id" : "2"
}
]
},
"sort" : [
null
]
},
{
"_index" : "staff",
"_type" : "list",
"_id" : "rZtyyGwBNB-cdBRb6nHU",
"_score" : null,
"_source" : {
"name" : "abc def",
"id" : 9,
"sections" : [
{
"name" : "shaggi",
"id" : "1"
},
{
"name" : "robert",
"id" : "2"
}
]
},
"sort" : [
null
]
}
]
}
}
I'm expecting the section shaggi to come before zamphire and thus the order of the two documents should be reversed.
I noticed this in the results:
"sort" : [
null
]
Is that related? What am I missing here?
Changing sort part to this should do the job according to the docs
"sort": [
{
"sections.name": {
"order": "asc",
"nested": {
"path": "sections",
"filter": {
"term" : { "sections.id" : "1" }
}
}
}
}
]
Returns
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "staff",
"_type" : "_doc",
"_id" : "8hSJyWwBHfpsFyAs9f_8",
"_score" : null,
"_source" : {
"name" : "abc def",
"id" : 9,
"sections" : [
{
"name" : "shaggi",
"id" : "1"
},
{
"name" : "robert",
"id" : "2"
}
]
},
"sort" : [
"shaggi"
]
},
{
"_index" : "staff",
"_type" : "_doc",
"_id" : "8RSJyWwBHfpsFyAs5v98",
"_score" : null,
"_source" : {
"name" : "abc def",
"id" : 10,
"sections" : [
{
"name" : "zamphire",
"id" : "1"
},
{
"name" : "warden",
"id" : "2"
}
]
},
"sort" : [
"zamphire"
]
}
]
}
}
Tested with elasticsearch 7.2.0.
Hope that helps.
Related
I'm trying to get the the documents that match all the itens inside a list, the field that I'm searching for is inside a list of nested :
map of my index:
PUT testindex1
{
"mappings": {
"properties": {
"patients": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"age": {
"type": "keyword"
}
}
}
}
}
}
Documents
PUT testindex1/_doc/1
{
"patients": [
{"name" : "1", "age" : "1"},
{"name" : "1", "age" : "2"},
{"name" : "1", "age" : "3"}
]
}
PUT testindex1/_doc/2
{
"patients": [
{"name" : "1", "age" : "1"},
{"name" : "1", "age" : "2"},
{"name" : "1", "age" : "3"}
]
}
PUT testindex1/_doc/3
{
"patients":[
{"name" : "1", "age" : "2"},
{"name" : "1", "age" : "5"},
{"name" : "1", "age" : "4"}
]
}
what I'm trying to get is all the documents where the patients ages are inside have list ["2", "1"], in this case only the document 1 and 2. I know that i can update the map by using
this approach
But this would mean that I would have to reprocess the entire dataset
get patients that have both ages "1" and "2" (only patients of index 1 and 2)
I've found the answer here : Search a nested field for multiple values on the same field with elasticsearch
Basicaly you need to search via a nested must :
GET testindex1/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "patients",
"query": {
"bool": {
"filter": [
{
"match": {
"patients.age": "2"
}
}
]
}
}
}
},
{
"nested": {
"path": "patients",
"query": {
"bool": {
"filter": [
{
"match": {
"patients.age": "1"
}
}
]
}
}
}
}
]
}
}
}
This returns only the patients that have age 1 and age 2, returning the following output :
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "testindex1",
"_id" : "1",
"_score" : 0.0,
"_source" : {
"patients" : [
{
"name" : "1",
"age" : "1"
},
{
"name" : "1",
"age" : "2"
},
{
"name" : "1",
"age" : "3"
}
]
}
},
{
"_index" : "testindex1",
"_id" : "2",
"_score" : 0.0,
"_source" : {
"patients" : [
{
"name" : "1",
"age" : "1"
},
{
"name" : "1",
"age" : "2"
},
{
"name" : "1",
"age" : "3"
}
]
}
}
]
}
}
Getting incorrect inner hits from parent child relationship when combined with boolean query
Hi Everyone
I am getting incorrect inner hits results when combining parent-child query with boolean query. To reproduce the issue, I create this Index
PUT /my-index-000001
{
"mappings": {
"_routing": {
"required": true
},
"properties": {
"parentProperty": {
"type": "text"
},
"childProperty": {
"type": "text"
},
"id": {
"type": "integer"
},
"myJoinField": {
"type": "join",
"relations": {
"parent": "mychild"
}
}
}
}
}
then I add these three documents (document with Id equals "1" is the parent of the other two documents)
POST /my-index-000001/_doc/1?routing=1
{
"id": 1,
"parentProperty": "a parent document",
"myJoinField": "parent"
}
POST /my-index-000001/_doc/2?routing=1
{
"id": 2,
"childProperty": "queensland civil administration",
"myJoinField": {
"name":"mychild",
"parent":"1"
}
}
POST /my-index-000001/_doc/3?routing=1
{
"id": 3,
"childProperty": "beautiful weather",
"myJoinField": {
"name":"mychild",
"parent":"1"
}
}
now we set up our index with 3 documents. I am looking for all child documents that meet this boolean query: [childProperty contains either "queensland civil" or both "beautiful" and "nothing"].
I expect that elastic returns only the child document with Id "2" since the child document with Id "3" does not have the term "nothing" in it.
The translated version of this query is as follows:
GET /my-index-000001/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"has_child": {
"inner_hits": {
"name": "opr1"
},
"query": {
"query_string": {
"analyzer": "stop",
"query": "childProperty:(\"queensland civil\")"
}
},
"type": "mychild"
}
},
{
"bool": {
"must": [
{
"has_child": {
"inner_hits": {
"name": "opr2"
},
"query": {
"query_string": {
"query": "childProperty:(beautiful)"
}
},
"type": "mychild"
}
},
{
"has_child": {
"inner_hits": {
"name": "opr3"
},
"query": {
"query_string": {
"query": "childProperty:(nothing)"
}
},
"type": "mychild"
}
}
]
}
}
]
}
}
}
and the result that is returned from elasitc is as follows:
{
"took" : 24,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_routing" : "1",
"_source" : {
"id" : 1,
"parentProperty" : "a parent document",
"myJoinField" : "parent"
},
"inner_hits" : {
"opr1" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2814486,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.2814486,
"_routing" : "1",
"_source" : {
"id" : 2,
"childProperty" : "queensland civil administration",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
},
"opr2" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.7549127,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.7549127,
"_routing" : "1",
"_source" : {
"id" : 3,
"childProperty" : "beautiful weather",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
},
"opr3" : {
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
}
}
]
}
}
as you can see in the result the elastic returns both child document which clearly is against what I have written in the "must" section of the query.
but if I rewrite the query as following then it will return ONLY the expected document (document with Id "2"):
GET /my-index-000001/_search
{
"query": {
"bool": {
"must": [
{
"has_child": {
"inner_hits": {
"name": "opr1"
},
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"query_string": {
"query": "childProperty:(\"queensland civil\")"
}
},
{
"bool": {
"must": [
{
"query_string": {
"query": "childProperty:(beautiful)"
}
},
{
"query_string": {
"query": "childProperty:(weather1)"
}
}
]
}
}
]
}
},
"type": "mychild"
}
}
]
}
}
}
here is the correct result:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_routing" : "1",
"_source" : {
"id" : 1,
"parentProperty" : "a parent document",
"myJoinField" : "parent"
},
"inner_hits" : {
"opr1" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2814486,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.2814486,
"_routing" : "1",
"_source" : {
"id" : 2,
"childProperty" : "queensland civil administration",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
}
}
}
]
}
}
I appreciate it if someone tells me what I did wrong in the first query or if this is the default behavior in elasitc when it comes to parent/child relationship.
My mapping looks like so:
"condition": {
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
and some data I have looks like:
"condition": [
{
"name": "condition",
"value": "new",
},
{
"name": "condition",
"value": "gently-used",
}
]
How can I write a query that finds all objects within the array that have a new condition?
I have the following but I am getting 0 results back:
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"attribute_condition": "new"
}
}
]
}
}
}
First, you need to map your condition field as a nested type.
"condition": {
"type": "nested",
"properties": {
"name": { "type": "keyword" },
"value": { "type": "keyword" }
}
},
Now you're able to query each element of the condition array independently from each other. Next, you need to use the nested query and request to retrieve the inner hits and output them in the inner_hits object of the query response
{
"query": {
"bool": {
"must": {
"nested": {
"path": "condition",
"query": {
"match": {
"condition.value": "new"
}
},
"inner_hits": {}
}
}
}
}
}
An example response will look like below:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931471,
"hits" : [
{
"_index" : "nested",
"_type" : "_doc",
"_id" : "Xx_LN3gBp5RUqdfAef3B",
"_score" : 0.6931471,
"_source" : {
"condition" : [
{
"name" : "condition",
"value" : "new"
},
{
"name" : "condition",
"value" : "gently-used"
}
]
},
"inner_hits" : { <--- here begins the list of inner hits
"condition" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931471,
"hits" : [
{
"_index" : "nested",
"_type" : "_doc",
"_id" : "Xx_LN3gBp5RUqdfAef3B",
"_nested" : {
"field" : "condition",
"offset" : 0
},
"_score" : 0.6931471,
"_source" : {
"name" : "condition",
"value" : "new"
}
}
]
}
}
}
}
]
}
}
Assume I have the following two elements in my elasticsearch index:
{
"name": "bob",
"likes": ["computer", "cat", "water"]
},
{
"name": "alice",
"likes": ["gaming", "gambling"]
}
I would now like to query for elements, that like computer, laptop or cat. (which matches bob, note that it should be an exact string match)
As a result I need the matches, as well as the count of matches, so would like to get the following back (since it found computer and cat, but not laptop or water):
{
"name": "bob",
"likes": ["computer", "cat"],
"likes_count": 2
}
Is there a way to achieve this with a single elasticsearch query? (note that I'm still stuck with ES2.4, but will hopefully soon be able to upgrade).
Ideally I would also like to sort the output by likes_count.
Thank you!
Best way would be to create likes as nested data type
Mapping
PUT index71
{
"mappings": {
"properties": {
"name":{
"type": "text"
},
"likes":{
"type": "nested",
"properties": {
"name":{
"type":"keyword"
}
}
}
}
}
}
Query:
GET index71/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "likes",
"query": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"inner_hits": {} ---> It will return matched elements in nested type
}
}
]
}
},
"aggs": {
"likes": {
"nested": {
"path": "likes"
},
"aggs": {
"matcheLikes": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"aggs": {
"likeCount": {
"value_count": {
"field": "likes.name"
}
}
}
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_score" : 1.0,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "cat"
},
{
"name" : "water"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.0,
"_source" : {
"name" : "computer"
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 1
},
"_score" : 1.0,
"_source" : {
"name" : "cat"
}
}
]
}
}
}
}
]
},
"aggregations" : {
"likes" : {
"doc_count" : 3,
"matcheLikes" : {
"doc_count" : 2,
"likeCount" : {
"value" : 2
}
}
}
}
If likes cannot be changed to nested type then scripts need to be used which will impact performance
Mapping
{
"index72" : {
"mappings" : {
"properties" : {
"likes" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
Query:
{
"script_fields": { ---> It will iterate through likes and get matched values
"matchedElements": {
"script": "def matchedLikes=[];def list_to_check = ['computer', 'laptop', 'cat']; def do_not_return = true; for(int i=0;i<doc['likes.keyword'].length;i++){ if(list_to_check.contains(doc['likes.keyword'][i])) {matchedLikes.add(doc['likes.keyword'][i])}} return matchedLikes;"
}
},
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes": [
"computer",
"laptop",
"cat"
]
}
}
]
}
}
}
},
"aggs": {
"Name": {
"terms": {
"field": "name.keyword",
"size": 10
},
"aggs": {
"Count": {
"scripted_metric": { --> get count of matched values
"init_script": "state.matchedLikes=[]",
"map_script": " def list_to_check = ['computer', 'laptop', 'cat']; def do_not_return = true; for(int i=0;i<doc['likes.keyword'].length;i++){ if(list_to_check.contains(doc['likes.keyword'][i])) {state.matchedLikes.add(doc['likes.keyword'][i]);}}",
"combine_script": "int count = 0; for (int i=0;i<state.matchedLikes.length;i++) { count += 1 } return count;",
"reduce_script": "int count = 0; for (a in states) { count += a } return count"
}
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "index72",
"_type" : "_doc",
"_id" : "wtqso3ABH6obcmRR0hSV",
"_score" : 0.0,
"fields" : {
"matchedElements" : [
"cat",
"computer"
]
}
}
]
},
"aggregations" : {
"Name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "bob",
"doc_count" : 1,
"Count" : {
"value" : 2
}
}
]
}
}
EDIT 1
To give higher score to more matches change terms query to should clause. Each term in should clause will contribute towards score
GET index71/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "likes",
"query": {
"bool": {
"should": [
{
"term": {
"likes.name": "computer"
}
},
{
"term": {
"likes.name": "cat"
}
},
{
"term": {
"likes.name": "laptop"
}
}
]
}
},
"inner_hits": {}
}
}
]
}
},
"aggs": {
"likes": {
"nested": {
"path": "likes"
},
"aggs": {
"matcheLikes": {
"filter": {
"bool": {
"must": [
{
"terms": {
"likes.name": [
"computer",
"cat",
"laptop"
]
}
}
]
}
},
"aggs": {
"likeCount": {
"value_count": {
"field": "likes.name"
}
}
}
}
}
}
}
}
Result
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.5363467,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_score" : 1.5363467,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "cat"
},
{
"name" : "water"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.7917595,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 1
},
"_score" : 1.7917595,
"_source" : {
"name" : "cat"
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "u9qTo3ABH6obcmRRRhSA",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.2809337,
"_source" : {
"name" : "computer"
}
}
]
}
}
}
},
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "pr-lqHABcSMy6UhGAWtW",
"_score" : 1.2809337,
"_source" : {
"name" : "bob",
"likes" : [
{
"name" : "computer"
},
{
"name" : "gaming"
},
{
"name" : "gambling"
}
]
},
"inner_hits" : {
"likes" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2809337,
"hits" : [
{
"_index" : "index71",
"_type" : "_doc",
"_id" : "pr-lqHABcSMy6UhGAWtW",
"_nested" : {
"field" : "likes",
"offset" : 0
},
"_score" : 1.2809337,
"_source" : {
"name" : "computer"
}
}
]
}
}
}
}
]
},
"aggregations" : {
"likes" : {
"doc_count" : 6,
"matcheLikes" : {
"doc_count" : 3,
"likeCount" : {
"value" : 3
}
}
}
}
Given the documents below, how would I search and return only the matched nested object. I would like the query to return the journal information with only the second nested article since that's the one being matched in the query.
{
"mappings": {
"properties": {
"isn" : { "type":"text" },
"title" : { "type":"text" },
"article": {
"type": "nested"
}
}
}
}
PUT journal/_doc/1
{
"isn" : "11223344",
"article" : [
{
"id" : 1,
"title" : "first article title",
"author" : "John"
},
{
"id" : 2,
"title" : "second article title",
"author" : "Carl"
}
]
}
GET journal/_search
{
"query": {
"nested": {
"path": "article",
"query": {
"bool": {
"must": [
{ "match": { "article.title": "second" }}
]
}
}
}
}
}
All you need is ask for inner_hits like below in your query :
GET journal/_search
{
"_source": false,
"query": {
"nested": {
"path": "article",
"query": {
"bool": {
"must": [
{ "match": { "article.title": "second" }}
]
}
}
, "inner_hits": {}
}
}
}
Note that the response has a specific scheme :
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931472,
"hits" : [
{
"_index" : "journal",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.6931472,
"inner_hits" : {
"article" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931472,
"hits" : [
{
"_index" : "journal",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "article",
"offset" : 1
},
"_score" : 0.6931472,
"_source" : {
"id" : 2,
"title" : "second article title",
"author" : "Carl"
}
}
]
}
}
}
}
]
}
}