Elasticsearch: When doing an "inner_hit" on nested documents, return all fields of matched offset in the hierarchy - elasticsearch

Mapping for document:
{
"mappings": {
"properties": {
"client_classes": {
"type": "nested",
"properties": {
"members": {
"type": "nested",
"properties": {
"phone_nos": {
"type": "nested"
}
}
}
}
}
}
}
}
Data in Document:
{
"client_name":"client1",
"client_classes":[
{
"class_name":"class1",
"members":[
{
"name":"name1",
"phone_nos":[
{
"ext":"91",
"number":"99119XXXX"
},
{
"ext":"04",
"number":"99885XXXX"
}
]
},
{
"name":"name2",
"phone_nos":[
{
"ext":"03",
"number":"99887XXXX"
}
]
}
]
}
]
}
I query for "number" with value "99119XXXX"
{
"query":{
"nested":{
"path":"client_classes.members.phone_nos",
"query":{
"match":{
"client_classes.members.phone_nos.number":"99119XXXX"
}
},
"inner_hits":{}
}
}
}
Result from inner hits:
"inner_hits":{
"client_classes.members.phone_nos":{
"hits":{
"total":{
"value":1,
"relation":"eq"
},
"max_score":0.9808291,
"hits":[
{
"_index":"clients",
"_type":"_doc",
"_id":"1",
"_nested":{
"field":"client_classes",
"offset":0,
"_nested":{
"field":"members",
"offset":0,
"_nested":{
"field":"phone_nos",
"offset":0
}
}
},
"_score":0.9808291,
"_source":{
"ext":"91",
"number":"99119XXXX"
}
}
]
}
}
}
I get the desired matched result hierarchy of all the nested objects, in the inner hit, but I only receive the "offset" value and "field" from these objects. I need the full object of the corresponding offset.
Something like this:
{
"client_name":"client1",
"client_classes":[
{
"class_name":"class1",
"members":[
{
"name":"name1",
"phone_nos":[
{
"ext":"91",
"number":"99119XXXX"
}
]
}
]
}
]
}
I understand that with inner_hit I also get the complete root document, from where I can use the offset values from the innerhit object. But fetching the entire root document could be expensive for our memory, so I only need the result I have shared above.
Is there any such possibility as of now?
I am using elasticsearch 7.7
UPDATE: Added Mapping, result and a slight fix in document

Yes, just add "_source": false at the top-level and you'll only get the nested inner hits
{
"_source": false, <--- add this
"query":{
"nested":{
"path":"client_classes.members.phone_nos",
"query":{
"match":{
"client_classes.members.phone_nos.number":"99119XXXX"
}
},
"inner_hits":{}
}
}
}

Related

How do I sort using the best matching nested field or a default in Elasticsearch?

I have a bunch of documents that look like this in my index:
{
"given_name":"John",
"family_name":"Smith",
"email_addresses": [
{
"email_address":"john#gmail.com",
"primary":true
},
{
"email_address":"j.smith#gmail.com",
"primary":false
},
{
"email_address":"jpsmith#gmail.com",
"primary":false
},
{
"email_address":"johnsmith111#gmail.com",
"primary":false
}
]
}
The mapping looks like this:
{
"mappings":{
"properties":{
"given_name":{
"type":"keyword",
"fields":{
"search":{
"type":"search_as_you_type"
}
}
},
"family_name":{
"type":"keyword",
"fields":{
"search":{
"type":"search_as_you_type"
}
}
},
"email_addresses":{
"type":"nested",
"properties":{
"email_address":{
"type":"keyword",
"fields":{
"search":{
"type":"search_as_you_type"
}
}
},
"primary":{
"type":"boolean"
}
}
}
}
}
}
I am running a prefix search on given_name, family_name and email_addresses. This will allow the user to start typing and relevant results from those fields should start returning:
{
"query":{
"bool":{
"should":[
{
"nested":{
"path":"email_addresses",
"query":{
"prefix":{
"email_addresses.email_address.search": {
"value":"j"
}
}
}
}
},
{
"multi_match":{
"query":"j",
"fields":[
"given_name.search",
"family_name.search"
],
"type": "bool_prefix"
}
}
]
}
}
}
I'd like to sort the results from the above by the best matching email_address in email_addresses if there is one or more matching email_address under email_addresses, otherwise to use the email_address under email_addresses where primary is true.
I have looked into a script for sorting, but I didn't find anyway to access the matched nested child in a script in the documentation.
Is there anyway to achieve this?
To do this, we can use a bool query in the nested sort.
Given we have the following 4 documents:
{
"given_name":"John",
"family_name":"Smith1",
"email_addresses": [
{
"email_address":"someguy50#example.com",
"primary":true
},
{
"email_address":"someguy51#example.com",
"primary":false
},
{
"email_address":"someguy52#gmail.com",
"primary":false
},
{
"email_address":"someguy53gmail.com",
"primary":false
}
]
}
{
"given_name":"John",
"family_name":"Smith2",
"email_addresses": [
{
"email_address":"someguy54#example.com",
"primary":true
},
{
"email_address":"johnsmith#example.com",
"primary":false
},
{
"email_address":"someguy55#gmail.com",
"primary":false
},
{
"email_address":"someguy56gmail.com",
"primary":false
}
]
}
{
"given_name":"John",
"family_name":"Smith3",
"email_addresses": [
{
"email_address":"someguy49#example.com",
"primary":true
},
{
"email_address":"someguy47#example.com",
"primary":false
},
{
"email_address":"someguy48#gmail.com",
"primary":false
},
{
"email_address":"someguy46gmail.com",
"primary":false
}
]
}
{
"given_name":"John",
"family_name":"Smith4",
"email_addresses": [
{
"email_address":"someguy45#example.com",
"primary":true
},
{
"email_address":"someguy44#example.com",
"primary":false
},
{
"email_address":"someguy43#gmail.com",
"primary":false
},
{
"email_address":"someguy42gmail.com",
"primary":false
}
]
}
We can write our query like so:
{
"query":{
"bool":{
"should":[
{
"nested":{
"path":"email_addresses",
"query":{
"prefix":{
"email_addresses.email_address.search":{
"value":"john"
}
}
}
}
},
{
"multi_match":{
"query":"john",
"fields":[
"given_name.search",
"family_name.search"
],
"type":"bool_prefix"
}
}
]
}
},
"sort":[
{
"email_addresses.email_address":{
"order" : "asc",
"nested":{
"path":"email_addresses",
"filter":{
"bool":{
"should":[
{
"prefix":{
"email_addresses.email_address.search":{
"value":"john"
}
}
},
{
"term":{
"email_addresses.primary": true
}
}
]
}
}
}
}
}
]
}
First we do a prefix search on the email_addresses.email_address, given_name and family_name.
Then we sort on the nested email_addresses field as follows:
Sort by the email_addresses.email_address that matches our query.
Sort by email_address.primary = true.
The way this works is that in the bool query, Elasticsearch will first find documents that matches the first query under should and sort those documents. For the remaining documents that do not match, it will proceed to the next query, which in our case is email_address.primary = true. If there are more documents that do not match either of these queries, they will be ordered using an order predetermined by Elasticsearch.

Filter nested sorting in elasticsearch

I have a document with a nested structure the nested object has an assignment_name and a due_date:
The mapping
{
"goal": {
"mappings": {
"doc": {
"properties": {
"title": {
"type": "keyword"
},
// lot's of other fields here ...
"steps": {
"type": "nested",
"properties": {
"assignment_name": {
"type": "keyword"
},
"due_date": {
"type": "date"
}
// lots of other fields here
}
}
}
}
}
}
}
I want to:
Filter all document that have a specific assignment_name (e.g.user_a)
Sort the result by the next due_date, not taking other assignements into account.
This query gives me random result (no sortings):
{
"query":{
"bool":{
"filter":[
{
"nested":{
"path":"steps",
"query":{
"term":{
"steps.assignment_name":"user_a"
}
}
}
}
]
}
},
"sort":[
{
"steps.due_date":{
"order":"asc",
"nested":{
"path":"steps",
"filter":{
"term":{
"steps.assignment_name":"user_a"
}
}
}
}
}
],
"from":0,
"size":25
}
Firstly you need to ensure that datatype for steps field is nested. Then you have to use nested sorting to sort documents based on a nested document field.
The query would be:
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "steps",
"query": {
"term": {
"steps.assignment_name": "user_a"
}
}
}
}
]
}
},
"sort": [
{
"steps.due_date": {
"order": "asc",
"nested": {
"path": "steps",
"filter": {
"term": {
"steps.assignment_name": "user_a"
}
}
}
}
}
]
}
The catch above is to use the same filter in sort as used in the main query to filter the documents. This ensures that the correct nested document's field value is considered to sort the documents.

Elasticsearch inverse range overlap query

I have the following document:
{
"blocked_availabilities": [
{
"start_time": "2016-05-26T19:30:00Z",
"end_time": "2016-05-26T20:30:00Z"
},
{
"start_time": "2017-05-26T16:00:00Z",
"end_time": "2017-05-26T17:00:00Z",
}
]
}
blocked_availabilities is a nested type in the mapping.
What I'm trying to do is match documents that do not overlap with a specified start and end time. I have the following query to do this (which doesn't work, of course):
{
"query":{
"bool":{
"filter":{
"nested":{
"path":"blocked_availabilities",
"query":{
"bool":{
"must_not":{
"bool":{
"must":[
{
"range":{
"blocked_availabilities.start_time":{
"from":null,
"include_lower":true,
"include_upper":true,
"to":"2016-05-26T20:00:00Z"
}
}
},
{
"range":{
"blocked_availabilities.end_time":{
"from":"2016-05-26T19:00:00Z",
"include_lower":true,
"include_upper":true,
"to":null
}
}
}
]
}
}
}
}
}
}
}
}
}
The problem seems to be that one of the nested documents doesn't match so the whole document is returned.
Is there a good way to do what I want? I expect this document to not be returned by my query since it overlaps with the first nested document.
One way to achieve this is to check it there is any nested object withing the overlapping period and do a must-not of the nested query.
This would end up matching on only documents which do not contain any blocked_availabilities overlapping in the desired time period.
Example:
Setup Index
put test
put test/test/_mapping
{
"properties": {
"blocked_availabilities": {
"type": "nested",
"properties": {
"start_time": {
"type": "date"
},
"end_time": {
"type": "date"
}
}
}
}
}
}
put test/test/1
{
"blocked_availabilities": [
{
"start_time": "2016-05-26T19:30:00Z",
"end_time": "2016-05-26T20:30:00Z"
},
{
"start_time": "2017-05-26T16:00:00Z",
"end_time": "2017-05-26T17:00:00Z"
}
]
}
Query:
put test/test/_search
{
"query": {
"bool": {
"must_not": [
{
"nested": {
"path": "blocked_availabilities",
"query": {
"bool": {
"should": [
{
"range": {
"blocked_availabilities.end_time": {
"lte": "2016-05-26T20:00:00Z",
"gte": "2016-05-26T19:00:00Z"
}
}
},
{
"range": {
"blocked_availabilities.start_time": {
"lte": "2016-05-26T20:00:00Z",
"gte": "2016-05-26T19:00:00Z"
}
}
}
]
}
}
}
}
]
}
}
}
You're trying to make not ((start > s) and (end < e)). Why not simply make (start > e) or (end < s)? Looks it should work if data is consistent.

Multi-level nesting in elastic search

I have the below structure (small part of a very large elastic-search document)
sample: {
{
"md5sum":"4002cbda13066720513d1c9d55dba809",
"id":1,
"sha256sum":"1c6e77ec49413bf7043af2058f147fb147c4ee741fb478872f072d063f2338c5",
"sha1sum":"ba1e6e9a849fb4e13e92b33d023d40a0f105f908",
"created_at":"2016-02-02T14:25:19+00:00",
"updated_at":"2016-02-11T20:43:22+00:00",
"file_size":188416,
"type":{
"name":"EXE"
},
"tags":[
],
"sampleSources":[
{
"filename":"4002cbda13066720513d1c9d55dba809",
"source":{
"name":"default"
}
},
{
"filename":"4002cbda13066720332513d1c9d55dba809",
"source":{
"name":"default"
}
}
]
}
}
The filter I would like to use is to find by the 'name' contained within sample.sampleSources.source using elastic search.
I tried the below queries
curl -XGET "http://localhost:9200/app/sample/_search?pretty" -d {query}
where, {query} is
{
"query":{
"nested":{
"path":"sample.sampleSources",
"query":{
"nested":{
"path":"sample.sampleSources.source",
"query":{
"match":{
"sample.sampleSources.source.name":"default"
}
}
}
}
}
}
}
However, it is not returning me any results. I have certain cases in my document where the nesting is more deeper than this. Can someone please guide me as to how should I formulate this query so that it works for all cases?
EDIT 1
Mappings:
{
"app":{
"mappings":{
"sample":{
"sampleSources":{
"type":"nested",
"properties":{
"filename":{
"type":"string"
},
"source":{
"type":"nested",
"properties":{
"name":{
"type":"string"
}
}
}
}
}
}
EDIT 2
The solution posted by Waldemar Neto below works well for match query but not for a wild-card or neither for a regexp
Can you please guide? I need the wild-card and the regexp queries to be working for this.
i tried here using your examples and works fine.
Take a look in my data.
mapping:
PUT /app
{
"mappings": {
"sample": {
"properties": {
"sampleSources": {
"type": "nested",
"properties": {
"source": {
"type": "nested"
}
}
}
}
}
}
}
indexed data
POST /app/sample
{
"md5sum": "4002cbda13066720513d1c9d55dba809",
"id": 1,
"sha256sum": "1c6e77ec49413bf7043af2058f147fb147c4ee741fb478872f072d063f2338c5",
"sha1sum": "ba1e6e9a849fb4e13e92b33d023d40a0f105f908",
"created_at": "2016-02-02T14:25:19+00:00",
"updated_at": "2016-02-11T20:43:22+00:00",
"file_size": 188416,
"type": {
"name": "EXE"
},
"tags": [],
"sampleSources": [
{
"filename": "4002cbda13066720513d1c9d55dba809",
"source": {
"name": "default"
}
},
{
"filename": "4002cbda13066720332513d1c9d55dba809",
"source": {
"name": "default"
}
}
]
}
Search query
GET /app/sample/_search
{
"query": {
"nested": {
"path": "sampleSources.source",
"query": {
"match": {
"sampleSources.source.name": "default"
}
}
}
}
}
Example using wildcard
GET /app/sample/_search
{
"query": {
"nested": {
"path": "sampleSources.source",
"query": {
"wildcard": {
"sampleSources.source.name": {
"value": "*aul*"
}
}
}
}
}
}
The only thing that I saw some difference was in the path, you don't need to set the sample (type) in the nested path, only the inner objets.
Test and give me a feedback.

Nested ElasticSearch query results in too many items

The nested ElasticSearch query below returns some results it should not hit. A lot of results do not contain the requested order number but are listed nevertheless. I'm not getting all documents though so the query is definitely reducing the result set on some level.
{
"query": {
"nested": {
"path": "orders",
"query": {
"match": {
"orderNumber": "242347"
}
}
}
}
}
The query result (truncated):
{
"took":0,
"timed_out":false,
"_shards": {
"total":1,
"successful":1,
"failed":0
},
"hits": {
"total":60,
"max_score":9.656103,
"hits":[
{
"_index": "index1",
"_type":"documenttype1",
"_id":"mUmudQrVSC6rn68ujDJ8iA",
"_score":9.656103,
"_source" : {
"documentId": 12093894,
"orders": [
{
"customerId": 129048669,
"orderNumber": "242347", // <-- CORRECT HIT ON ORDER
},
{
"customerId": 229405848,
"orderNumber": "431962"
}
]
}
},
{
"_index":"index1",
"_type":"documenttype1",
"_id":"9iO5QBCpT_6kmH3CoBTdWw",
"_score":9.656103,
"_source" : {
"documentId": 43390283,
// <-- ORDER ISN'T HERE BUT THE DOCUMENT IS HIT NEVERTHELESS!
"orders": [
{
"customerId": 229405848,
"orderNumber": "431962"
},
{
"customerId": 129408979,
"orderNumber": "142701"
}
]
}
}
// Left out 58 more results most of which do not contain
// the requested order number.
]
}
}
As you can see, there is a hit (actually, there are quite a few of them) that shouldn't be there because none of the orders contain the requested order number.
This is the mapping for documenttype1:
{
"index1":{
"properties":{
"documentId":{
"type":"integer"
},
"orders":{
"type":"nested",
"properties":{
"customerId":{
"type":"integer"
},
"orderNumber":{
"type":"string",
"analyzer":"custom_internal_code"
}
}
}
}
}
}
Finally, here are the settings to clarify the custom_internal_code analyzer as referred to in the mapping shown above:
{
"index1":{
"settings":{
"index.analysis.analyzer.custom_internal_code.filter.1":"asciifolding",
"index.analysis.analyzer.custom_internal_code.type":"custom",
"index.analysis.analyzer.custom_internal_code.filter.0":"lowercase",
"index.analysis.analyzer.custom_internal_code.tokenizer":"keyword",
}
}
}
for a exact search use termquery [1] and make orderNumber not_analyzed [2].
[1]
http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-term-query.html#query-dsl-term-query
[2]
http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/mapping-intro.html#_literal_index_literal
It seems that you should use bool query instead of match.
But. If you want just filter your records, your should use nested filter instead of query. It works faster, because you have not to calculate scores.
http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/query-dsl-nested-filter.html
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "orders",
"filter": {
"bool": {
"must": [
{
"term": {
"orderNumber": "242347"
}
}
]
}
},
"_cache": true
}
}
}
}
}

Resources