Query for nested fields returns results as if there was no nested mapping - elasticsearch

I am having difficulties understanding, why a query across nested fields is returning unexpected results.
I have the following template for my index
PUT /_template/nested_test
{
"index_patterns": [ "nested-*" ],
"settings": { "index.mapping.coerce": false },
"mappings": {
"dynamic": "strict",
"properties" {
"vNested": {
"type": "nested",
"properties": {
"v1": { "type": "keyword" },
"v2": {
"properties": {
"v21": {
"type": long"
}
}
}
}
}
}
}
}
I will post two documents to an index that matches the template.
POST /nested-example/_doc
{
"vNested": [
{
"v1": "User1",
"v2": {
"v21": 1
}
},
{
"v1": "User3",
"v2": {
"v21": 3
}
}
]
}
POST /nested-example/_doc
{
"vNested": [
{
"v1": "User1",
"v2": {
"v21": 3
}
},
{
"v1": "User2",
"v2": {
"v21": 2
}
}
]
}
Now I will create a query with the goal of only getting the results of those documents, where there exists User1 with a corresponding v21 value of 3. As far as I understand, my nested mapping should ensure that I will only get the second document as query result.
The following query:
GET /nested-example/_search
{
"query" : {
"bool": {
"filter": {
"bool": {
"must": [
{
"nested: {
"path": "vNested",
"query": {
"match": {
"vNested.v1": "User1"
}
}
}
},
{
"nested: {
"path": "vNested",
"query": {
"match": {
"vNested.v2.v21": "3"
}
}
}
}
]
}
}
}
}
}
returns both documents, not only the single document that I expected
I understand that the query string is not the most elegant - this is due to some business logic + front-end framework logic in place for creating the query strings based on user input and any suggestions on how to remove redundancies there are welcome as well.
However I struggle to understand why does this query return both documents including the one where the vNested object with v1=User1, and v21=1. Shouldn't the nested mapping of the vNested field prevent just that issue?

You need to use bool/must query inside the nested query since you are querying on a single object and not on multiple objects. Modify your query as
{
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "vNested",
"query": {
"bool": {
"must": [
{
"match": {
"vNested.v1": "User1"
}
},
{
"match": {
"vNested.v2.v21": "3"
}
}
]
}
},
"inner_hits":{}
}
}
]
}
}
}
}
}
Search Result is
"hits": [
{
"_index": "nested-example",
"_type": "_doc",
"_id": "AAu0IXkBKyWl6Va6kmTU",
"_score": 0.0,
"_source": {
"vNested": [
{
"v1": "User1",
"v2": {
"v21": 3
}
},
{
"v1": "User2",
"v2": {
"v21": 2
}
}
]
},
"inner_hits": {
"vNested": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 1.6931472,
"hits": [
{
"_index": "nested-example",
"_type": "_doc",
"_id": "AAu0IXkBKyWl6Va6kmTU",
"_nested": {
"field": "vNested",
"offset": 0
},
"_score": 1.6931472,
"_source": {
"v1": "User1",
"v2": {
"v21": 3
}
}
}
]
}
}
}
}
]

Related

Elastic Search Query on String Array Field

I'm working on Elastic Search and facing an issue regarding Array field. I've index named test-index with following mapping.
{
"test-index": {
"mappings": {
"properties": {
"courses": {
"type": "keyword"
}
}
}
}
}
My elastic search documents looks like this.
"hits": [
{
"_index": "test-index",
"_id": "1ac:0000000000_1",
"_score": 1,
"_source": {
"courses": [
"Course-1A",
"Course-1B",
"Course-1C",
"Course-1D",
"Course-1E",
"Course-1F"
]
}
},
{
"_index": "test-index",
"_id": "1ac:0000000000_2",
"_score": 1,
"_source": {
"courses": [
"Course-2A",
"Course-2B",
"Course-2C",
"Course-1A"
]
}
}
]
The document _id is my student ID. I want to get results with the maximum/highest relevance at the top and lowest on the bottom.
e.g
If I'm searching for courses ["Course-2A","Course-2B","Course-1C"] then user 1ac:0000000000_2 should appear at the top and user 1ac:0000000000_1 at the bottom.
I've tried following queries.
GET test-index/_search
{
"query": {
"bool": {
"must": [
{
"terms": {
"courses": [
"Course-1A",
"Course-2A",
"Course-2B"
]
}
}
]
}
}
}
User 1ac:0000000000_1 at the top and other at the bottom.
GET test-index/_search
{
"query": {
"bool": {
"should": [
{
"term": {
"courses": "Course-1A",
}
},
{
"term": {
"courses": "Course-2A",
}
},
{
"term": {
"courses": "Course-2B",
}
}
],
"minimum_should_match": "70%"
}
}
}
This gives me some desired results but not sure for larger dataset.

Reverse_nested aggregation + top hits : get parent and nested data at the same time

Do you know how to use reverse_nested aggregation to get both the parent and ONLY the nested data inside my top hit aggregations ?
The 'ONLY' part is the problem right now.
This is my mapping :
{
"ticket": {
"mappings": {
"properties": {
"name": {
"type": "keyword"
}
},
"tasks": {
"type": "nested",
"properties": {
"string_task_name": {
"type": "keyword"
}
}
}
}
}
}
My query uses top hits and reverse nested aggs.
{
"aggs": {
"object_tasks": {
"nested": {
"path": "object_tasks"
},
"aggs": {
"filter_by_tasks_attribute": {
"filter": {
"bool": {
"must": [
{
"wildcard": {
"object_tasks.string_task_name.keyword": "*"
}
}
]
}
},
"aggs": {
"using_reverse_nested": {
"reverse_nested": {
"path": "object_tasks"
},
"aggs": {
"names": {
"top_hits": {
"_source": {
"includes": [
"object_tasks.string_task_name",
"string_name"
]
},
"sort": [
{
"object_tasks.string_task_name.keyword": {
"order": "desc"
}
}
],
"from": 0,
"size": 10
}
}
}
}
}
}
}
}
}
}
{
"hits": {
"total": {
"value": 25,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "random_index",
"_type": "_doc",
"_id": "5",
"_score": null,
"_source": {
"object_tasks": [ ================> I don't want all these tasks names, I just want the task name of the current nested object I am in.
{
"string_task_name": "task1"
},
{
"string_task_name": "task2"
},
{
"string_task_name": "task3"
},
{
"string_task_name": "task4"
}
],
"string_name": "Dummy Ticket 854"
},
"sort": [
"seek_a_sme"
]
}
]
}
}
As you can see the result is giving me 4 tasks name. What I want is to return only 1 task name.
The only workaround I have found is to copy the data of tickets inside the tasks. But if I can avoid it that would be awesome.
I don't want all these tasks names, I just want the task name of the current nested object I am in.
The statement "of the current nested object I'm in" implies that you are inside of a nested context but you cannot be in one when you escape it through reverse_nested…
I'm not sure if I truly understood what you're gunning for here but you could aggregate on the terms of object_tasks.string_task_name.keyword and the keys of this aggregation would then function as the individual "current nested objects" that you're after:
{
"size": 0,
"aggs": {
"object_tasks": {
"nested": {
"path": "object_tasks"
},
"aggs": {
"filter_by_tasks_attribute": {
"filter": {
"bool": {
"must": [
{
"wildcard": {
"object_tasks.string_task_name.keyword": "*"
}
}
]
}
},
"aggs": {
"by_string_task_name": {
"terms": {
"field": "object_tasks.string_task_name.keyword",
"order": {
"_key": "desc"
},
"size": 10
},
"aggs": {
"using_reverse_nested": {
"reverse_nested": {},
"aggs": {
"names": {
"top_hits": {
"_source": {
"includes": [
"string_name"
]
},
"from": 0,
"size": 10
}
}
}
}
}
}
}
}
}
}
}
}
yielding
"aggregations" : {
"object_tasks" : {
...
"filter_by_tasks_attribute" : {
...
"by_string_task_name" : {
...
"buckets" : [
{
"key" : "task4", <--
...
"using_reverse_nested" : {
...
"names" : {
"hits" : {
...
"hits" : [
{
...
"_source" : {
"string_name" : "Dummy Ticket 854" <--
}
}
]
}
}
}
},
{
"key" : "task3", <--
...
},
{
"key" : "task2", <--
...
},
{
"key" : "task1", <--
...
}
}
]
}
}
}
}
Notice that the top_hits aggregation doesn't need to be sorted anymore -- object_tasks.string_task_name.keyword will always be the same for any currently aggregated terms bucket. What I did instead was order this terms aggregation by _key which works the same way as a top_hits sort would have. BTW -- yours was missing the nested path parameter.

Elasticsearch filter by nested fields

I have a problem with creating a query to Elasticsearch with many conditions. My model looks like:
data class Product(
#Id
val id: String? = null,
val category: String,
val imagesUrls: List<String>,
#Field(type = FieldType.Double)
val price: Double?,
#Field(type = FieldType.Nested)
val parameters: List<Parameter>?
)
data class Parameter(
val key: String,
val values: List<String>
)
I would like to query products by:
category (for example cars)
price (between 20k $ and 50k $)
and parameters -> For example products with many parameters, like key capacity values 4L, 5L and second parameter gear transmission values manual
My current query looks like this:
GET data/_search
{
"size": 10,
"query": {
"bool": {
"must": [
{
"term": {
"category.keyword": {
"value": "cars"
}
}
},
{
"nested": {
"path": "parameters",
"query": {
"bool": {
"must": [
{"term": {
"parameters.key.keyword": {
"value": "Capacity"
}
}},
{
"term": {
"parameters.key": {
"value": "4L, 5L"
}
}
}
]
}
}
}
}
]
}
}
Could you tell me how to filter the product when parameter key is equal to Capacity and check that the values list contains one of the values?
How to combine many this kind operations in one query?
Example data:
{
"category":"cars",
"name":"Ferrari",
"price":50000,
"parameters":[
{
"key":"capacity",
"values":"4L"
},
{
"key":"gear transmission",
"values":"automcatic"
}
]
}
The search query shown below queries the data based on:
category (for example cars)
And parameters -> For example products with many parameters, like key capacity values 4L, 5L and second parameter gear transmission
values manual
Adding a working example with index data, mapping, search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"parameters": {
"type": "nested"
}
}
}
}
Index Data:
{
"category":"cars",
"name":"Ferrari",
"price":50000,
"parameters":[
{
"key":"gear transmission",
"values":["4L","5L"]
},
{
"key":"capacity",
"values":"automcatic"
}
]
}
{
"category":"cars",
"name":"Ferrari",
"price":50000,
"parameters":[
{
"key":"capacity",
"values":["4L","5L"]
},
{
"key":"gear transmission",
"values":"automcatic"
}
]
}
{
"category":"cars",
"name":"Ferrari",
"price":50000,
"parameters":[
{
"key":"capacity",
"values":"4L"
},
{
"key":"gear transmission",
"values":"automcatic"
}
]
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"term": {
"category.keyword": {
"value": "cars"
}
}
},
{
"nested": {
"path": "parameters",
"query": {
"bool": {
"must": [
{
"match": {
"parameters.key": "capacity"
}
},
{
"terms": {
"parameters.values": [
"4l",
"5l"
]
}
}
]
}
}
}
},
{
"nested": {
"path": "parameters",
"query": {
"bool": {
"must": [
{
"match": {
"parameters.key": "gear transmission"
}
},
{
"match": {
"parameters.values": "automcatic"
}
}
]
}
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "bstof",
"_type": "_doc",
"_id": "1",
"_score": 3.9281754,
"_source": {
"category": "cars",
"name": "Ferrari",
"price": 50000,
"parameters": [
{
"key": "capacity",
"values": "4L"
},
{
"key": "gear transmission",
"values": "automcatic"
}
]
}
},
{
"_index": "bstof",
"_type": "_doc",
"_id": "2",
"_score": 3.9281754,
"_source": {
"category": "cars",
"name": "Ferrari",
"price": 50000,
"parameters": [
{
"key": "capacity",
"values": [
"4L",
"5L"
]
},
{
"key": "gear transmission",
"values": "automcatic"
}
]
}
}
]
When you need to match any one from a list then you can use terms query instead of term. Update the part in query from:
{
"term": {
"parameters.key": {
"value": "4L, 5L"
}
}
}
to below:
{
"terms": {
"parameters.values": {
"value": [
"4L",
"5L"
]
}
}
}
Note that if parameters.key is analysed field and there exist a keyword sub-field for the same, then use it instead. e.g parameters.values.keyword
You can read more on terms query here.

ElasticSearch simple query

I have structure like this in my ElasticSearch
{
_index: 'index',
_type: 'product',
_id: '896',
_score: 0,
_source: {
entity_id: '896',
category: [
{
category_id: 2,
is_virtual: 'false'
},
{
category_id: 82,
is_virtual: 'false'
}
]
}
}
I want return all "producs" that have "82" category_id.
{
"query": {
"bool": {
"filter": {
"terms": {
"category.category_id": [
82
]
}
}
}
}
}
This query gives me 0 hits.
What is right way to do this?
Adding working example, you need to define the category as nested field and modify your search query by including the nested path
Index Mapping
{
"mappings": {
"properties": {
"entity_id": {
"type": "text"
},
"category": {
"type": "nested"
}
}
}
}
Index your document
{
"entity_id": "896",
"category": [
{
"category_id": 2,
"is_virtual": false
},
{
"category_id": 82,
"is_virtual": false
}
]
}
Proper search query, note we are using nested query which doesn't support normal filter(so your query gives error)
{
"query": {
"nested": {
"path": "category",
"query": {
"bool": {
"must": [
{
"match": {
"category.category_id": 82
}
}
]
}
}
}
}
}
Search result retuns indexed doc
"hits": [
{
"_index": "complexnested",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"entity_id": "896",
"category": [
{
"category_id": 2,
"is_virtual": false
},
{
"category_id": 82,
"is_virtual": false
}
]
}
}
]
If your query gives you no results, I suspect that category is of type nested in your index mapping. If that's the case, that's good and you can modify your query like this to use the nested query:
{
"query": {
"bool": {
"filter": {
"nested": {
"path": "category",
"query": {
"terms": {
"category.category_id": [
82
]
}
}
}
}
}
}
}

Empty inner_hits in compound Elasticsearch filter

I'm seeing what appears to be aberrant behavior in inner_hits results within nested boolean queries.
Test data (abbreviated for brevity):
# MAPPING
PUT unit_testing
{
"mappings": {
"document": {
"properties": {
"display_name": {"type": "text"},
"metadata": {
"properties": {
"NAME": {"type": "text"}
}
}
}
},
"paragraph": {
"_parent": {"type": "document"},
"_routing": {"required": true},
"properties": {
"checksum": {"type": "text"},
"sentences": {
"type": "nested",
"properties": {
"text": {"type": "text"}
}
}
}
}
}
}
# DOCUMENT X 2 (d0, d1)
PUT unit_testing/document/doc_id_d0
{
"display_name": "Test Document d0",
"paragraphs": [
"para_id_d0p0",
"para_id_d0p1"
],
"metadata": {"NAME": "Test Document d0 Metadata"}
}
# PARAGRAPH X 2 (d0p0, d1p0)
PUT unit_testing/paragraph/para_id_d0p0?parent=doc_id_d0
{
"checksum": "para_checksum_d0p0",
"sentences": [
{"text": "Test sentence d0p0s0"},
{"text": "Test sentence d0p0s1 ODD"},
{"text": "Test sentence d0p0s2 EVEN"},
{"text": "Test sentence d0p0s3 ODD"},
{"text": "Test sentence d0p0s4 EVEN"}
]
}
This initial query behaves as I would expect (I'm aware that the metadata filter isn't actually necessary in this example case):
GET unit_testing/paragraph/_search
{
"_source": "false",
"query": {
"bool": {
"must": [
{
"has_parent": {
"query": {
"match_phrase": {
"metadata.NAME": "Test Document d0 Metadata"
}
},
"type": "document"
}
},
{
"nested": {
"inner_hits": {},
"path": "sentences",
"query": {
"match": {
"sentences.text": "d0p0s0"
}
}
}
}
]
}
}
}
It yields an inner_hits object containing the one sentence that matched the predicate (some fields removed for clarity):
{
"hits": {
"hits": [
{
"_source": {},
"inner_hits": {
"sentences": {
"hits": {
"hits": [
{
"_source": {
"text": "Test sentence d0p0s0"
}
}
]
}
}
}
}
]
}
}
The following query is an attempt to embed the query above within a parent "should" clause, to create a logical OR between the initial query, and an additional query that matches a single sentence:
GET unit_testing/paragraph/_search
{
"_source": "false",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"has_parent": {
"query": {
"match_phrase": {
"metadata.NAME": "Test Document d0 Metadata"
}
},
"type": "document"
}
},
{
"nested": {
"inner_hits": {},
"path": "sentences",
"query": {
"match": {
"sentences.text": "d0p0s0"
}
}
}
}
]
}
},
{
"nested": {
"inner_hits": {},
"path": "sentences",
"query": {
"match": {
"sentences.text": "d1p0s0"
}
}
}
}
]
}
}
}
While the "d1" query outputs the result one would expect, with an inner_hits object containing the matching sentence, the original "d0" query now yields an empty inner_hits object:
{
"hits": {
"hits": [
{
"_source": {},
"inner_hits": {
"sentences": {
"hits": {
"total": 0,
"hits": []
}
}
}
},
{
"_source": {},
"inner_hits": {
"sentences": {
"hits": {
"hits": [
{
"_source": {
"text": "Test sentence d1p0s0"
}
}
]
}
}
}
}
]
}
}
Although I'm using the elasticsearch_dsl Python library to build and combine these queries, and I'm something of a novice with respect to the Query DSL, the query format looks solid to me.
What am I missing?
I think what is missing is the name parameter for inner_hits - you have two inner_hits clauses at two different queries that would end up with the same name. Try giving the inner_hits a name parameter (0).
0 - https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-inner-hits.html#_options

Resources