I'm very new to elastic search. I'm trying to get some particular elements from an array...
I created my index like below
PUT store
{
"mappings": {
"properties": {
"storeList": {"type": "nested"},
"storeLocation": {"type": "text"},
"storePinCode" : {"type": "long"}
}
}
}
and I'm having data like this
{
"storeLocation": "tirupati",
"storePinCode" : 517501
"storeList" : [
{
"storeName" : "apollo",
"storeType" : "med"
},
{
"storeName" : "carrots",
"storeType" : "restaurants"
},
{
"storeName" : "more",
"storeType" : "supermarket"
}
]
},
{
"storeLocation": "hyderabad",
"storePinCode" : 500038
"storeList" : [
{
"storeName" : "apollo",
"storeType" : "med"
},
{
"storeName" : "bahar cafe",
"storeType" : "restaurants"
},
{
"storeName" : "dmart",
"storeType" : "supermarket"
}
]
}
My excepted output should be like below
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "store",
"_type" : "_doc",
"_id" : "Yk8SFWwB2zt5weEsMHn7",
"_score" : 1.0,
"_source" : {
"storeLocation" : "tirupati",
"storePinCode" : 517501,
"storeList" : [
{
"storeName" : "apollo",
"storeType" : "med"
}
]
}
},
{
"_index" : "store",
"_type" : "_doc",
"_id" : "ZE8SFWwB2zt5weEsqnkd",
"_score" : 1.0,
"_source" : {
"storeLocation" : "hyderabad",
"storePinCode" : 500038,
"storeList" : [
{
"storeName" : "apollo",
"storeType" : "med"
}
]
}
}
]
}
}
To achive that i try with below query
POST store/_search
{
"query": {
"nested": {
"path": "storeList",
"query": {
"bool" : {
"must" : [
{"match":{"storeList.storeName": "apollo"}}
]
}
},
"inner_hits": {}
}
}
}
I'm getting the output but it not exactly what I expect. Is it possible to get the output as I expect..?
Actual Output:
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.093527,
"hits" : [
{
"_index" : "store",
"_type" : "_doc",
"_id" : "Yk8SFWwB2zt5weEsMHn7",
"_score" : 1.093527,
"_source" : {
"storeLocation" : "tirupati",
"storePinCode" : 517501,
"storeList" : [
{
"storeName" : "apollo",
"storeType" : "med"
},
{
"storeName" : "carrots",
"storeType" : "restaurants"
},
{
"storeName" : "more",
"storeType" : "supermarket"
}
]
},
"inner_hits" : {
"storeList" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.093527,
"hits" : [
{
"_index" : "store",
"_type" : "_doc",
"_id" : "Yk8SFWwB2zt5weEsMHn7",
"_nested" : {
"field" : "storeList",
"offset" : 0
},
"_score" : 1.093527,
"_source" : {
"storeName" : "apollo",
"storeType" : "med"
}
}
]
}
}
}
},
{
"_index" : "store",
"_type" : "_doc",
"_id" : "ZE8SFWwB2zt5weEsqnkd",
"_score" : 1.093527,
"_source" : {
"storeLocation" : "hyderabad",
"storePinCode" : 500038,
"storeList" : [
{
"storeName" : "apollo",
"storeType" : "med"
},
{
"storeName" : "bahar cafe",
"storeType" : "restaurants"
},
{
"storeName" : "dmart",
"storeType" : "supermarket"
}
]
},
"inner_hits" : {
"storeList" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.093527,
"hits" : [
{
"_index" : "store",
"_type" : "_doc",
"_id" : "ZE8SFWwB2zt5weEsqnkd",
"_nested" : {
"field" : "storeList",
"offset" : 0
},
"_score" : 1.093527,
"_source" : {
"storeName" : "apollo",
"storeType" : "med"
}
}
]
}
}
}
}
]
}
}
could you please help me out of this...
#ajay sharma, as you suggested i change my query like this
GET store/_search
{
"_source": {
"includes": [ "*" ],
"excludes": [ "storeList" ]
},
"query": {
"nested": {
"path": "storeList",
"inner_hits": {
"_source": [
"storeName", "storeType"
]
},
"query": {
"bool": {
"must": [
{"match":{"storeList.storeName": "more"}}
]
}
}
}
}
}
but im getting the response like below...
{
"_index" : "store",
"_type" : "_doc",
"_id" : "Yk8SFWwB2zt5weEsMHn7",
"_score" : 1.0946013,
"_source" : {
"storeLocation" : "tirupati",
"storePinCode" : 517501
},
"inner_hits" : {
"storeList" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0946013,
"hits" : [
{
"_index" : "store",
"_type" : "_doc",
"_id" : "Yk8SFWwB2zt5weEsMHn7",
"_nested" : {
"field" : "storeList",
"offset" : 2
},
"_score" : 1.0946013,
"_source" : { }
}
]
}
}
}
}
I cannot respond to your comment by a comment. Therefore sharing as an answer.
I have updated the query below. Please check. I replicated your index on my local machine and could get the desired result.
Query
{
"_source": {
"includes": [ "*" ],
"excludes": [ "storeList" ]
},
"query": {
"nested": {
"path": "storeList",
"inner_hits": {
"_source": [
"storeList.storeName", "storeList.storeType" <-- changes are here -->
]
},
"query": {
"bool": {
"must": [
{"match":{"storeList.storeName": "more"}}
]
}
}
}
}
}
Output
"hits": {
"total": 1,
"max_score": 0.9808292,
"hits": [
{
"_index": "store",
"_type": "store",
"_id": "2",
"_score": 0.9808292,
"_source": {
"storeLocation": "tirupati",
"storePinCode": 517501
},
"inner_hits": {
"storeList": {
"hits": {
"total": 1,
"max_score": 0.9808292,
"hits": [
{
"_nested": {
"field": "storeList",
"offset": 2
},
"_score": 0.9808292,
"_source": {
"storeList": {
"storeType": "supermarket",
"storeName": "more"
}
}
}
]
}
}
}
}
]
}
Related
Getting incorrect inner hits from parent child relationship when combined with boolean query
Hi Everyone
I am getting incorrect inner hits results when combining parent-child query with boolean query. To reproduce the issue, I create this Index
PUT /my-index-000001
{
"mappings": {
"_routing": {
"required": true
},
"properties": {
"parentProperty": {
"type": "text"
},
"childProperty": {
"type": "text"
},
"id": {
"type": "integer"
},
"myJoinField": {
"type": "join",
"relations": {
"parent": "mychild"
}
}
}
}
}
then I add these three documents (document with Id equals "1" is the parent of the other two documents)
POST /my-index-000001/_doc/1?routing=1
{
"id": 1,
"parentProperty": "a parent document",
"myJoinField": "parent"
}
POST /my-index-000001/_doc/2?routing=1
{
"id": 2,
"childProperty": "queensland civil administration",
"myJoinField": {
"name":"mychild",
"parent":"1"
}
}
POST /my-index-000001/_doc/3?routing=1
{
"id": 3,
"childProperty": "beautiful weather",
"myJoinField": {
"name":"mychild",
"parent":"1"
}
}
now we set up our index with 3 documents. I am looking for all child documents that meet this boolean query: [childProperty contains either "queensland civil" or both "beautiful" and "nothing"].
I expect that elastic returns only the child document with Id "2" since the child document with Id "3" does not have the term "nothing" in it.
The translated version of this query is as follows:
GET /my-index-000001/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"has_child": {
"inner_hits": {
"name": "opr1"
},
"query": {
"query_string": {
"analyzer": "stop",
"query": "childProperty:(\"queensland civil\")"
}
},
"type": "mychild"
}
},
{
"bool": {
"must": [
{
"has_child": {
"inner_hits": {
"name": "opr2"
},
"query": {
"query_string": {
"query": "childProperty:(beautiful)"
}
},
"type": "mychild"
}
},
{
"has_child": {
"inner_hits": {
"name": "opr3"
},
"query": {
"query_string": {
"query": "childProperty:(nothing)"
}
},
"type": "mychild"
}
}
]
}
}
]
}
}
}
and the result that is returned from elasitc is as follows:
{
"took" : 24,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_routing" : "1",
"_source" : {
"id" : 1,
"parentProperty" : "a parent document",
"myJoinField" : "parent"
},
"inner_hits" : {
"opr1" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2814486,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.2814486,
"_routing" : "1",
"_source" : {
"id" : 2,
"childProperty" : "queensland civil administration",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
},
"opr2" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.7549127,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.7549127,
"_routing" : "1",
"_source" : {
"id" : 3,
"childProperty" : "beautiful weather",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
},
"opr3" : {
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
}
}
]
}
}
as you can see in the result the elastic returns both child document which clearly is against what I have written in the "must" section of the query.
but if I rewrite the query as following then it will return ONLY the expected document (document with Id "2"):
GET /my-index-000001/_search
{
"query": {
"bool": {
"must": [
{
"has_child": {
"inner_hits": {
"name": "opr1"
},
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"query_string": {
"query": "childProperty:(\"queensland civil\")"
}
},
{
"bool": {
"must": [
{
"query_string": {
"query": "childProperty:(beautiful)"
}
},
{
"query_string": {
"query": "childProperty:(weather1)"
}
}
]
}
}
]
}
},
"type": "mychild"
}
}
]
}
}
}
here is the correct result:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_routing" : "1",
"_source" : {
"id" : 1,
"parentProperty" : "a parent document",
"myJoinField" : "parent"
},
"inner_hits" : {
"opr1" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2814486,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.2814486,
"_routing" : "1",
"_source" : {
"id" : 2,
"childProperty" : "queensland civil administration",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
}
}
}
]
}
}
I appreciate it if someone tells me what I did wrong in the first query or if this is the default behavior in elasitc when it comes to parent/child relationship.
I want to get the counts of(SUSPECT and CLEAR) Each State in API using Elastic Search query-
Data inside Elastic Search looks like-
Sample data-
{
"_index" : "index_name"
"_type" : "_doc",
"_id" : "id1",
"_score" : 1.0,
"_source" : {
"slflag" : "SUSPECT",
"state_name" : "UTTAR PRADESH",
}
{
"_index" : "index_name",
"_type" : "_doc",
"_id" : id2",
"_score" : 1.0,
"_source" : {
"slflag" : "CLEAR",
"state_name" : "UTTAR PRADESH",
}
{
"_index" : "index_name"
"_type" : "_doc",
"_id" : "id3",
"_score" : 1.0,
"_source" : {
"slflag" : "SUSPECT",
"state_name" : "Delhi",
}
{
"_index" : "index_name",
"_type" : "_doc",
"_id" : id4",
"_score" : 1.0,
"_source" : {
"slflag" : "CLEAR",
"state_name" : "Madhya Pradesh",
}
{
"_index" : "index_name"
"_type" : "_doc",
"_id" : "id5",
"_score" : 1.0,
"_source" : {
"slflag" : "SUSPECT",
"state_name" : "Rajasthan",
}
{
"_index" : "index_name",
"_type" : "_doc",
"_id" : id6",
"_score" : 1.0,
"_source" : {
"slflag" : "CLEAR",
"state_name" : "Bihar",
}
Fields are - state_name, slflag
In slflag field we have two categories - "SUSPECT" and "CLEAR"
I want to make a query to get such results-
{
"stateName": "UTTAR PRADESH",
"clear": 688,
"suspect": 182
},
{
"stateName": "Bihar",
"clear": 398456,
"suspect": 117110
},
{
"stateName": "Rajasthan",
"clear": 688,
"suspect": 182
},
{
"stateName": "Delhi",
"clear": 12096,
"suspect": 984
}
I don't know how to count slflag for each state.
Thanks in advance.
Get /index-
{
"index" : {
"aliases" : { },
"mappings" : {
"properties" : {
"#timestamp" : {
"type" : "date"
},
"#version" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"slflag" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"state_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"wl_d_ind" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1587554261571",
"number_of_shards" : "1",
"number_of_replicas" : "1",
"uuid" : "zFKQmxyTSsyoVLRoCC_3IA",
"version" : {
"created" : "7060199"
},
"provided_name" : "index"
}
}
}
}
I tried below-
GET /index/_search
{
"size": 0,
"aggs": {
"states": {
"terms": {
"field": "state_name.keyword",
"size": 100
},
"aggs": {
"flag": {
"terms": {
"field": "slflag.keyword"
}
}
}
}
}
}
Above results in-
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"states" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "UTTAR PRADESH",
"doc_count" : 5403369,
"flag" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "CLEAR",
"doc_count" : 4540278
},
{
"key" : "SUSPECT",
"doc_count" : 863091
}
]
}
},
{
"key" : "RAJASTHAN",
"doc_count" : 2239768,
"flag" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "CLEAR",
"doc_count" : 1866196
},
{
"key" : "SUSPECT",
"doc_count" : 373572
}
]
}
},
{
"key" : "GOA",
"doc_count" : 12,
"flag" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "CLEAR",
"doc_count" : 12
}
]
}
}
]
}
}
}
You need to first aggregate on stateName and then on slflag, like this:
GET index_name/_search?filter_path=**.key,**.doc_count
{
"size": 0,
"aggs": {
"states": {
"terms": {
"field": "state_name.keyword",
"size": 100
},
"aggs": {
"flag": {
"terms": {
"field": "slflag.keyword"
}
}
}
}
}
}
Given the documents below, how would I search and return only the matched nested object. I would like the query to return the journal information with only the second nested article since that's the one being matched in the query.
{
"mappings": {
"properties": {
"isn" : { "type":"text" },
"title" : { "type":"text" },
"article": {
"type": "nested"
}
}
}
}
PUT journal/_doc/1
{
"isn" : "11223344",
"article" : [
{
"id" : 1,
"title" : "first article title",
"author" : "John"
},
{
"id" : 2,
"title" : "second article title",
"author" : "Carl"
}
]
}
GET journal/_search
{
"query": {
"nested": {
"path": "article",
"query": {
"bool": {
"must": [
{ "match": { "article.title": "second" }}
]
}
}
}
}
}
All you need is ask for inner_hits like below in your query :
GET journal/_search
{
"_source": false,
"query": {
"nested": {
"path": "article",
"query": {
"bool": {
"must": [
{ "match": { "article.title": "second" }}
]
}
}
, "inner_hits": {}
}
}
}
Note that the response has a specific scheme :
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931472,
"hits" : [
{
"_index" : "journal",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.6931472,
"inner_hits" : {
"article" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931472,
"hits" : [
{
"_index" : "journal",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "article",
"offset" : 1
},
"_score" : 0.6931472,
"_source" : {
"id" : 2,
"title" : "second article title",
"author" : "Carl"
}
}
]
}
}
}
}
]
}
}
I'm new to elasticsearch and struggling this situation (for example):
I want to index offices and clerks of each office. So, I've created this mapping:
PUT office
{
"mappings": {
"properties": {
"office_name": {
"type": "text"
},
"clerks": {
"type": "nested"
}
}
}
}
and indexing some data:
PUT /office/_doc/1
{
"name": "office a",
"clerks": [
{
"name": "a a a a"
},
{
"name": "a a"
},
{
"name": "b"
}
]
}
PUT /office/_doc/2
{
"name": "office b",
"clerks": [
{
"name": "a a a"
}
]
}
Now, I want to search for 'a':
GET /office/_search
{
"query": {
"nested": {
"path": "clerks",
"query": {
"match": {
"clerks.name": "a"
}
},
"inner_hits": {}
}
}
}
the result is:
{
"took" : 624,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.1272885,
"hits" : [
{
"_index" : "office",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.1272885,
"_source" : {
"name" : "office b",
"clerks" : [
{
"name" : "a a a"
}
]
},
"inner_hits" : {
"clerks" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.1272885,
"hits" : [
{
"_index" : "office",
"_type" : "_doc",
"_id" : "2",
"_nested" : {
"field" : "clerks",
"offset" : 0
},
"_score" : 1.1272885,
"_source" : {
"name" : "a a a"
}
}
]
}
}
}
},
{
"_index" : "office",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.1184678,
"_source" : {
"name" : "office a",
"clerks" : [
{
"name" : "a a a a"
},
{
"name" : "a a"
},
{
"name" : "b"
}
]
},
"inner_hits" : {
"clerks" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.1468691,
"hits" : [
{
"_index" : "office",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "clerks",
"offset" : 0
},
"_score" : 1.1468691,
"_source" : {
"name" : "a a a a"
}
},
{
"_index" : "office",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "clerks",
"offset" : 1
},
"_score" : 1.0900666,
"_source" : {
"name" : "a a"
}
}
]
}
}
}
}
]
}
}
I expect 'a a a' appears before 'a a' and the result to be like this:
[
{
"name": "a a a a"
},
{
"name": "a a a"
},
{
"name": "a a"
}
]
How can I achieve this result?
I have the following data in an Elasticsearch index called products
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"prod_id" : 1,
"currency" : "USD",
"price" : 1
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"prod_id" : 2,
"currency" : "INR",
"price" : 60
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"prod_id" : 3,
"currency" : "EUR",
"price" : 2
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"prod_id" : 5,
"currency" : "MYR",
"price" : 1
}
}
]
}
}
I am sorting the data based on the price field,
I have the following script to do so -
GET products/_search
{
"query": {
"function_score": {
"query": {
"match_all": {}
},
"functions": [{
"script_score": {
"script": {
"params": {
"USD": 1,
"SGD": 0.72,
"MYR": 0.24,
"INR": 0.014,
"EUR": 1.12
},
"source": "doc['price'].value * (doc.currency.value == 'eur'? params.EUR : doc.currency.value == 'myr' ? params.MYR : doc.currency.value == 'inr' ? params.INR : 1)"
}
}
}]
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
]
}
Because the field currency in the product index is of type text,
it is indexed with Standard Analyzer, which converts it to lower case.
I wish to optimise this part of the script, As I may end up with 20-30 currencies -
"source": "doc['price'].value * (doc.currency.value == 'eur'? params.EUR : doc.currency.value == 'myr' ? params.MYR : doc.currency.value == 'inr' ? params.INR : 1)"
I was able to optimize the source script with the following working solution -
GET products/_search
{
"query": {
"function_score": {
"query": {
"match_all": {}
},
"functions": [{
"script_score": {
"script": {
"params": {
"USD": 1,
"SGD": 0.72,
"MYR": 0.24,
"INR": 0.014,
"EUR": 1.12
},
"source": "doc['price'].value * params[doc['currency.keyword'].value]"
}
}
}]
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
]
}