Query string with AND operator in nested query not working. Any idea? - elasticsearch

I want to get the document in which nested child contains both words Mifune AND Miller-Meteor.
For more detail of nested, I've gone through https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-nested-query.html
here are the mappings
{
"mappings" : {
"properties" : {
"driver" : {
"type" : "nested",
"properties" : {
"last_name" : {
"type" : "text"
},
"vehicle" : {
"type" : "nested",
"properties" : {
"make" : {
"type" : "text"
},
"model" : {
"type" : "text"
}
}
}
}
}
}
}
}
i've two documents in the index
{
"driver" : {
"last_name" : "McQueen",
"vehicle" : [
{
"make" : "Powell Motors",
"model" : "Canyonero"
},
{
"make" : "Miller-Meteor",
"model" : "Ecto-1"
}
]
}
},{
"driver" : {
"last_name" : "Hudson",
"vehicle" : [
{
"make" : "Mifune",
"model" : "Mach Five"
},
{
"make" : "Miller-Meteor",
"model" : "Ecto-1"
}
]
}
}
query as below
{
"query" : {
"nested" : {
"path" : "driver",
"query" : {
"nested" : {
"path" : "driver.vehicle",
"query" : {
"bool" : {
"must" : [
{ "match" : { "driver.vehicle.make" : "Mifune" } },
{ "match" : { "driver.vehicle.make" : "Miller-Meteor" } }
]
}
}
}
}
}
}
}
I tried the above query but it did not work
also tried with query_string AND operator but it also not worked
{
"query": {
"nested": {
"path": "driver",
"query": {
"nested": {
"path": "driver.vehicle",
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "Mifune AND Miller-Meteor",
"fields": ["driver.vehicle.make"]
}
}
]
}
}
}
}
}
}
}

This is how you should query multiple nested fields.
There are two Nested queries inside your must clause.
The bool->must operator should be outside of your internal nested fields.
GET my_index/_search
{
"query": {
"nested": {
"path": "driver",
"query": {
"bool": {
"must": [
{
"nested": {
"path": "driver.vehicle",
"query": {
"match": {
"driver.vehicle.make": "Mifune"
}
}
}
},
{
"nested": {
"path": "driver.vehicle",
"query": {
"match": {
"driver.vehicle.make": "Miller-Meteor"
}
}
}
}
]
}
}
}
}
}
Results:
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 2.769686,
"hits" : [
{
"_index" : "my_index",
"_type" : "_doc",
"_id" : "2",
"_score" : 2.769686,
"_source" : {
"driver" : {
"last_name" : "Hudson",
"vehicle" : [
{
"make" : "Mifune",
"model" : "Mach Five"
},
{
"make" : "Miller-Meteor",
"model" : "Ecto-1"
}
]
}
}
}
]
}
}
Nested DataType
Hope this helps

Related

Elasticsearch Nested query not working as expected

I am bit new to elastic search. I am trying a nested query to get the result soem thing like below sql in query DSL..means I wanna restrict the search to driver last name as well as the vehicle make as well..like below use case.
select driver.last_name,driver.vehicle.make,driver.vehicle.model from drivers
where driver.last_name='Hudson' and driver.vehicle.make"="Miller-Mete;
But this doesn't work in elastic search sql as well as Query DSL...
--> can we do the query like this in ES...like let me clarify..
if department has List[employees] in Elasticsearch denoarmalized data..
and i want to restrict the query to department_name and emp_position..
--> is this use case even possible in elastic search?
select department_name,emp_name,emp_salary,emp_position
where emp_position="Intern" and department.name="devlopment"
--> Below are mappings and search Query DSL...
PUT /drivers
{
"mappings": {
"properties": {
"driver": {
"type": "nested",
"properties": {
"last_name": {
"type": "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"vehicle": {
"type": "nested",
"properties": {
"make": {
"type": "text"
},
"model": {
"type": "text"
}
}
}
}
}
}
}
}
GET /drivers/_mapping
O/P:
{
"drivers" : {
"mappings" : {
"properties" : {
"driver" : {
"type" : "nested",
"properties" : {
"last_name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"vehicle" : {
"type" : "nested",
"properties" : {
"make" : {
"type" : "text"
},
"model" : {
"type" : "text"
}
}
}
}
}
}
}
}
}
--> inserting documents..
PUT /drivers/_doc/1
{
"driver" : {
"last_name" : "McQueen",
"vehicle" : [
{
"make" : "Powell Motors",
"model" : "Canyonero"
},
{
"make" : "Miller-Meteor",
"model" : "Ecto-1"
}
]
}
PUT /drivers/_doc/2
{
"driver" : {
"last_name" : "Hudson",
"vehicle" : [
{
"make" : "Mifune",
"model" : "Mach Five"
},
{
"make" : "Miller-Meteor",
"model" : "Ecto-1"
}
]
}
}
--> Below is the search query dsl..this gives 0 results. Even i replace
"term": {
"driver.last_name.keyword": "McQueen"
}
with "match" or "filter" still gives 0 results...
GET /drivers/_search
{
"query": {
"nested": {
"path": "driver",
"query": {
"nested": {
"path": "driver.vehicle",
"query": {
"bool": {
"must": [
{ "match": { "driver.vehicle.make": "Powell Motors" } },
{ "match": { "driver.vehicle.model": "Canyonero" } },
{
"term": {
"driver.last_name.keyword": "McQueen"
}
}
]
}
}
}
}
}
}
}
==> below Query DSL gives 2 results...
GET /drivers/_search
{
"query": {
"nested": {
"path": "driver",
"query": {
"nested": {
"path": "driver.vehicle",
"query": {
"bool": {
"must": [
{ "match": { "driver.vehicle.make": "Miller-Meteor" } }
]
}
}
}
}
}
}
}
O/P:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.3097506,
"hits" : [
{
"_index" : "drivers",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.3097506,
"_source" : {
"driver" : {
"last_name" : "McQueen",
"vehicle" : [
{
"make" : "Powell Motors",
"model" : "Canyonero"
},
{
"make" : "Miller-Meteor",
"model" : "Ecto-1"
}
]
}
}
},
{
"_index" : "drivers",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.3097506,
"_source" : {
"driver" : {
"last_name" : "Hudson",
"vehicle" : [
{
"make" : "Mifune",
"model" : "Mach Five"
},
{
"make" : "Miller-Meteor",
"model" : "Ecto-1"
}
]
}
}
}
]
}
}
==> this gives "parsing_exception",
"reason" : "[bool] malformed query, expected [END_OBJECT] but found [FIELD_NAME]",
==> even replacing 1st query bool to "match" also gives this error...as below
GET /drivers/_search
{
"query": {
"nested": {
"path": "driver",
"query": {
"bool": {
"must": [
{"match": {
"driver.last_name.keyword": "Hudson"
}}
]
},
"nested": {
"path": "driver.vehicle",
"query": {
"bool": {
"must": [
{
"match": {
"driver.vehicle.make": "Miller-Meteor"
}
}
]
}
}
}
}
}
}

Using named queries (matched_queries) for nested types in Elasticsearch?

Using named queries, I can get a list of the matched_queries for boolean expressions such as:
(query1) AND (query2 OR query3 OR true)
Here is an example of using named queries to match on top-level document fields:
DELETE test
PUT /test
PUT /test/_mapping/_doc
{
"properties": {
"name": {
"type": "text"
},
"type": {
"type": "text"
},
"TAGS": {
"type": "nested"
}
}
}
POST /test/_doc
{
"name" : "doc1",
"type": "msword",
"TAGS" : [
{
"ID" : "tag1",
"TYPE" : "BASIC"
},
{
"ID" : "tag2",
"TYPE" : "BASIC"
},
{
"ID" : "tag3",
"TYPE" : "BASIC"
}
]
}
# (query1) AND (query2 or query3 or true)
GET /test/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name": {
"query": "doc1",
"_name": "query1"
}
}
}
],
"should": [
{
"match": {
"type": {
"query": "msword",
"_name": "query2"
}
}
},
{
"exists": {
"field": "type",
"_name": "query3"
}
}
]
}
}
}
The above query correctly returns all three matched_queries in the response:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.5753641,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "TKNJ9G4BbvPS27u-ZYux",
"_score" : 1.5753641,
"_source" : {
"name" : "doc1",
"type" : "msword",
"TAGS" : [
{
"ID" : "ds1",
"TYPE" : "BASIC"
},
{
"ID" : "wb1",
"TYPE" : "BASIC"
}
]
},
"matched_queries" : [
"query1",
"query2",
"query3"
]
}
]
}
}
However, I'm trying to run a similar search:
(query1) AND (query2 OR query3 OR true)
only this time on the nested TAGS object rather than top-level document fields.
I've tried the following query, but the problem is I need to supply the inner_hits object for nested objects in order to get the matched_queries in the response, and I can only add it to one of the three queries.
GET /test/_search
{
"query": {
"bool": {
"must": {
"nested": {
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag1",
"_name": "tag1-query"
}
}
},
// "inner_hits" : {}
}
},
"should": [
{
"nested": {
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag2",
"_name": "tag2-query"
}
}
},
// "inner_hits" : {}
}
},
{
"nested": {
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag3",
"_name": "tag3-query"
}
}
},
// "inner_hits" : {}
}
}
]
}
}
}
Elasticsearch will complain if I add more than one 'inner_hits'. I've commented out the places above where I can add it, but each of these will only return the single matched query.
I want my response to this query to return:
"matched_queries" : [
"tag1-query",
"tag2-query",
"tag3-query"
]
Any help is much appreciated, thanks!
A colleague helpfully provided a solution to this; move the _named parameter to directly under each nested section:
GET /test/_search
{
"query": {
"bool": {
"must": {
"nested": {
"_name": "tag1-query",
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag1"
}
}
}
}
},
"should": [
{
"nested": {
"_name": "tag2-query",
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag2"
}
}
}
}
},
{
"nested": {
"_name": "tag3-query",
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag3"
}
}
}
}
}
]
}
}
}
This correctly returns all three tags now in the matched_queries response:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 2.9424875,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "TaNy9G4BbvPS27u--oto",
"_score" : 2.9424875,
"_source" : {
"name" : "doc1",
"type" : "msword",
"TAGS" : [
{
"ID" : "ds1",
"TYPE" : "DATASOURCE"
},
{
"ID" : "wb1",
"TYPE" : "WORKBOOK"
},
{
"ID" : "wb2",
"TYPE" : "WORKBOOK"
}
]
},
"matched_queries" : [
"tag1-query",
"tag2-query",
"tag3-query"
]
}
]
}
}

elastic: query the sum of a filtered subset of nested documents

Consider a document (post) like this in elasticsearch index:
{
title: "I love ice cream!"
comments: [
{
body: "me too!",
reaction: 'positive',
likes: 20
},
{
body: "huh!",
reaction: 'sarcastic',
likes: 5
}
]
}
The comments is a field of nested type.
How can elastic answer this:
Give me all posts, where the total sum of likes on "sarcastic" comments is greater than 100.
I'm open to any other way of modelling data which helps answer such queries.
This can be solved using bucket selector aggregation.
Mapping:
{
"index1" : {
"mappings" : {
"properties" : {
"comments" : {
"type" : "nested",
"properties" : {
"body" : {
"type" : "text"
},
"likes" : {
"type" : "integer"
},
"reaction" : {
"type" : "text"
}
}
},
"title" : {
"type" : "keyword"
}
}
}
}
}
Data:
"hits" : [
{
"_index" : "index1",
"_type" : "_doc",
"_id" : "p0y9DGsBfPdKzuAGdQrm",
"_score" : 1.0,
"_source" : {
"title" : "I love ice cream!",
"comments" : [
{
"body" : "me too!",
"reaction" : "positive",
"likes" : 20
},
{
"body" : "huh!",
"reaction" : "sarcastic",
"likes" : 5
}
]
}
},
{
"_index" : "index1",
"_type" : "_doc",
"_id" : "qEy9DGsBfPdKzuAGnwox",
"_score" : 1.0,
"_source" : {
"title" : "I hate ice cream!",
"comments" : [
{
"body" : "me too!",
"reaction" : "positive",
"likes" : 10
},
{
"body" : "huh!",
"reaction" : "sarcastic",
"likes" : 5
}
]
}
}
]
}
Query:
GET index1/_search
{
"size": 0,
"aggs": {
"title": {
"terms": {
"field": "title"
},
"aggs": {
"comments": {
"nested": {
"path": "comments"
},
"aggs": {
"reaction": {
"filter": {
"term": {
"comments.reaction": "positive"
}
},
"aggs": {
"total_likes": {
"sum": {
"field": "comments.likes"
}
}
}
}
}
},
"total_likes_filter": {
"bucket_selector": {
"buckets_path": {
"likes": "comments>reaction>total_likes"
},
"script": "params.likes > 15"
}
}
}
}
}
}
Result:
"aggregations" : {
"title" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "I love ice cream!",
"doc_count" : 1,
"comments" : {
"doc_count" : 2,
"reaction" : {
"doc_count" : 1,
"total_likes" : {
"value" : 20.0
}
}
}
}
]
}
}
}
Bucket contains only "I love ice cream!" where total likes for reaction positive is greater than 20.
I hate ice cream! has total sum 5 for positive reaction so it is not included.

Elasticsearch - Conditional nested fetching

I have index mapping:
{
"dev.directory.3" : {
"mappings" : {
"profile" : {
"properties" : {
"email" : {
"type" : "string",
"index" : "not_analyzed"
},
"events" : {
"type" : "nested",
"properties" : {
"id" : {
"type" : "integer"
},
"name" : {
"type" : "string",
"index" : "not_analyzed"
},
}
}
}
}
}
}
}
with data:
"hits" : [ {
"_index" : "dev.directory.3",
"_type" : "profile",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"email" : "test#dummy.com",
"events" : [
{
"id" : 111,
"name" : "ABC",
},
{
"id" : 222,
"name" : "DEF",
}
],
}
}]
I'd like to filter only matched nested elements instead of returning all events array - is this possible in ES?
Example query:
{
"nested" : {
"path" : "events",
"query" : {
"bool" : {
"filter" : [
{ "match" : { "events.id" : 222 } },
]
}
}
}
}
Eg. If I query for events.id=222 there should be only single element on the result list returned.
What strategy for would be the best to achieve this kind of requirement?
You can use inner_hits to only get the nested records which matched the query.
{
"query": {
"nested": {
"path": "events",
"query": {
"bool": {
"filter": [
{
"match": {
"events.id": 222
}
}
]
}
},
"inner_hits": {}
}
},
"_source": false
}
I am also excluding the source to get only nested hits

elasticsearch mix "and filter" with "bool filter"

i work on elasticsearch, I try to mix two working queries. the first with "and filter" and the second with "bool filter" but i fail.
My queries are generated dynamically from a user interface.
the "and filter" :
I need "and filter" to query data, for example a field have to be equal to "africa" or "asia" or empty. this is an example of working query :
curl -XGET 'http://localhost:9200/botanique/specimens/_search?pretty' -d '
{
"fields" : ["D_TYPESTATUS", "O_HASMEDIA"],
"aggs" : {
"D_TYPESTATUS_MISSING" : {
"missing" : {
"field" : "D_TYPESTATUS"
}
},
"D_TYPESTATUS" : {
"terms" : {
"field" : "D_TYPESTATUS",
"size" : 10
}
}
},
"query" : {
"filtered" : {
"filter" : {
"and" : [
{ "or" : [{
"term" : {
"O_HASMEDIA" : "true"
}
}
]
}, {
"or" : [{
"term" : {
"T_GENUS" : "flemingia"
}
}
]
}, {
"or" : [{
"term" : {
"L_CONTINENT" : "africa"
}
}, {
"term" : {
"L_CONTINENT" : "asia"
}
}, {
"missing" : {
"field" : "L_CONTINENT"
}
}
]
}, {
"or" : [{
"term" : {
"I_INSTITUTIONCODE" : "mnhn"
}
}
]
}
]
}
}
}
}'
this query work fine, this is the result :
"hits" : {
"total" : 1006,
"max_score" : 1.0,
"hits" : [ {
"_index" : "botanique",
"_type" : "specimens",
"_id" : "9459AB31EC354F1FAE270BDB6C22CDF7",
"_score" : 1.0,
"fields" : {
"O_HASMEDIA" : [ true ],
"D_TYPESTATUS" : "syntype"
}
},
....
},
"aggregations" : {
"D_TYPESTATUS" : {
"buckets" : [ {
"key" : "syntype",
"doc_count" : 6
}, {
"key" : "type",
"doc_count" : 5
}, {
"key" : "isotype",
"doc_count" : 2
} ]
},
"D_TYPESTATUS_MISSING" : {
"doc_count" : 993
}
}
}
the second query :
Now i need to restrict the result data with the field : "D_TYPESTATUS" who must be different from the value "type" and must be not null.
this query work to do this :
curl -XGET 'http://localhost:9200/botanique/specimens/_search?size=10&pretty' -d ' {
"fields" : ["D_TYPESTATUS", "O_HASMEDIA"],
"aggs" : {
"D_TYPESTATUS_MISSING" : {
"missing" : {"field" : "D_TYPESTATUS"}
},
"D_TYPESTATUS" : {
"terms" : {"field" : "D_TYPESTATUS","size" : 20}
}
},
"query" : {
"filtered" : {
"query" : {
"query_string" : { "query" : "liliaceae" }
},
"filter" : {
"bool" : {
"must_not" : [{
"term" : {
"D_TYPESTATUS" : "type"
}
}
],
"must":{
"exists" : {
"field" : "D_TYPESTATUS"
}
}
}
}
}
}
}'
and the result :
{[ {
"_index" : "botanique_tmp2",
"_type" : "specimens",
"_id" : "0C388B4A3186410CBA46826BA296ECBC",
"_score" : 0.9641713,
"fields" : {
"D_TYPESTATUS" : [ "isotype" ],
"O_HASMEDIA" : [ true ]
}
} , ... ]},
"aggregations" : {
"D_TYPESTATUS" : {
"buckets" : [ {
"key" : "isotype",
"doc_count" : 40
}, {
"key" : "syntype",
"doc_count" : 37
}, {
"key" : "holotype",
"doc_count" : 6
}, {
"key" : "paratype",
"doc_count" : 3
}, {
"key" : "isonéotype",
"doc_count" : 2
} ]
},
"D_TYPESTATUS_MISSING" : {
"doc_count" : 0
}
}
how to integret the "bool filter" in the "and filter" ??
thanks a lot
I must be missing something, because it's easy:
{
"query": {
"filtered": {
"filter": {
"and": [
{
"or": [
{
"term": {
"O_HASMEDIA": "true"
}
}
]
},
{
"or": [
{
"term": {
"T_GENUS": "flemingia"
}
}
]
},
{
"or": [
{
"term": {
"L_CONTINENT": "africa"
}
},
{
"term": {
"L_CONTINENT": "asia"
}
},
{
"missing": {
"field": "L_CONTINENT"
}
}
]
},
{
"or": [
{
"term": {
"I_INSTITUTIONCODE": "mnhn"
}
}
]
},
{
"bool": {
"must_not": [
{
"term": {
"D_TYPESTATUS": "type"
}
}
],
"must": {
"exists": {
"field": "D_TYPESTATUS"
}
}
}
}
]
}
}
}
}

Resources