Highlight nested object in Elasticsearch - elasticsearch

Here is my sample dataset,
{
"parent":[
{
"name":"John Doe 1",
"age":"100 year",
"sex":"male",
"child":[
{
"name":"Jane Doe 1",
"height":100.00,
"width":100.00
},
{
"name":"Jane Doe 2",
"height":100.00,
"width":100.00
}
]
},
{
"name":"John Doe 2",
"age":"100 year",
"sex":"male",
"child":[
{
"name":"Jane Doe 3",
"height":100.00,
"width":100.00
},
{
"name":"Jane Doe 4",
"height":100.00,
"width":100.00
}
]
}
]
}
And my definition:
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"default": {
"type": "simple"
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"parent": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"age": {
"type": "text"
},
"sex": {
"type": "text"
},
"child": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"height": {
"type": "float"
},
"width": {
"type": "float"
}
}
}
}
}
}
}
}
}
I'm using the following query to look for matches in the parent.name property and can get highlights.
{
"query": {
"bool": {
"should": [
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent",
"query": {
"bool": {
"must": [
{
"match": {
"parent.name": {
"query": "John",
"fuzziness": "AUTO:3,6",
"prefix_length": "0"
}
}
}
]
}
}
}
}
],
}
},
"_source": ["parent"],
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"createdOn": {
"order": "desc"
}
}
]
}
Is there a way to get inline highlights for the matches in the child.name properties also so that it would be easy to find exactly which element of that corresponding array got matched?
For example, for the given sample data, if I search by "Doe", I'm expecting to get 6 hits, whereas if I search by "Jane", I would get only 4.

You can simply add another nested query clause inside you top level should.
Here's how your query should look:
{
"query": {
"bool": {
"should": [
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent",
"query": {
"bool": {
"must": [
{
"match": {
"parent.name": {
"query": "John Doe 1"
}
}
}
]
}
}
}
},
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.child.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent.child",
"query": {
"bool": {
"must": [
{
"match": {
"parent.child.name": {
"query": "Jane Doe 1"
}
}
}
]
}
}
}
}
],
"minimum_should_match": 1
}
},
"_source": ["parent"],
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"createdOn": {
"order": "desc"
}
}
]
}

Related

Elasticsearch Querying Double Nested Object, Match Multiple Rows in Query Within Parent

My data model is related to patient records. At the highest level is the Patient, then their information such as Lab Panels and the individual rows of the results of the panel. So it looks like this: {Patient:{Labs:[{Results:[{}]}]}}
I am able to successfully create the two nested objects Labs nested in Patient and Results nested in Labs, populate it, and query it. What I am unable to successfully do is create a query that constrains the results to a single Lab, and then match by more than one row in the Results object.
An example is attached, where I only want labs that are "Lipid Panel" and the results are HDL <= 46 and LDL >= 140.
Any suggestions?
Example Index
PUT localhost:9200/testpipeline
{
"aliases": {},
"mappings": {
"dynamic": "false",
"properties": {
"ageAtFirstEncounter": {
"type": "float"
},
"dateOfBirth": {
"type": "date"
},
"gender": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labs": {
"type": "nested",
"properties": {
"ageOnDateOfService": {
"type": "float"
},
"date": {
"type": "date"
},
"encounterId": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"isEdVisit": {
"type": "boolean"
},
"labPanelName": {
"type": "keyword"
},
"labPanelNameId": {
"type": "float"
},
"labPanelSourceName": {
"type": "text",
"store": true
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"results": {
"type": "nested",
"properties": {
"dataType": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labTestName": {
"type": "keyword"
},
"labTestNameId": {
"type": "float"
},
"resultAsNumber": {
"type": "float"
},
"resultAsText": {
"type": "keyword"
},
"sourceName": {
"type": "text",
"store": true
},
"unit": {
"type": "keyword"
}
}
}
}
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"race": {
"type": "keyword"
}
}
}
}
Example Document
PUT localhost:9200/testpipeline/_doc/274746
{
"id": 274746,
"personId": "10005786.000000",
"processingLogId": 51,
"gender": "Female",
"dateOfBirth": "1945-01-01T00:00:00",
"ageAtFirstEncounter": 76,
"labs": [
{
"isEdVisit": false,
"labPanelSourceName": "Lipid Panel",
"dataType": "LAB",
"ageOnDateOfService": 76.9041,
"results": [
{
"unit": "mg/dL",
"labTestNameId": 160,
"labTestName": "HDL",
"sourceName": "HDL",
"resultAsNumber": 46.0,
"resultAsText": "46",
"id": 2150284
},
{
"unit": "mg/dL",
"labTestNameId": 158,
"labTestName": "LDL",
"sourceName": "LDL",
"resultAsNumber": 144.0,
"resultAsText": "144.00",
"id": 2150286
}
],
"id": "9ab9ba84-580b-f2d2-4d32-25658ea5f1bf",
"sourceId": 2150278,
"personId": "10003783.000000",
"encounterId": "39617217.000000",
"processingLogId": 51,
"date": "2021-11-08T00:00:00"
}
],
"lastModified": "2022-03-24T10:21:29.8682784-05:00"
}
Example Query
POST localhost:9200/testpipeline/_search
{
"fields": [
"personId",
"processingLogId",
"id",
"gender",
"ageAtFirstDOS",
"dateOfBirth"
],
"from": 0,
"query": {
"bool": {
"should": [
{
"constant_score": {
"boost": 200,
"filter": {
"bool": {
"_name": "CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,SoftScore:200",
"should": [
{
"bool": {
"must": [
{
"nested": {
"path": "labs",
"inner_hits": {
"size": 3,
"name": "labs,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:8b41f346-2861-4099-b3c0-fcd6393c367b"
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"labs.labPanelSourceName": {
"_name": "CriteriaFilterId:2068,Pipeline.Labs.LabPanelSourceName,es_match_phrase=>'Lipid Panel' found in text",
"query": "Lipid Panel",
"slop": 100
}
}
},
{
"nested": {
"path": "labs.results",
"inner_hits": {
"size": 3,
"name": "labs.results,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:3564e83f-958b-4fe8-848e-f9edb5d7f3b2"
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"lte": 46
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 160
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"gte": 140.0
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 158
}
}
}
]
}
}
],
"minimum_should_match": 2
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
],
"minimum_should_match": 1,
"filter": [
]
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"processingLogId": {
"order": "asc"
}
},
{
"personId": {
"order": "asc"
}
}
],
"_source": false
}

Modify elasticsearch score based on nested field value with filter

Elasticsearch version: 7.11
I have the following structure of index:
PUT /my-test-index
{
"mappings": {
"properties": {
"brand": {
"type": "text"
},
"id": {
"type": "keyword"
},
"availabilityId": {
"type": "integer"
},
"priorities": {
"type": "nested",
"properties": {
"typeId": {
"type": "keyword"
},
"value": {
"type": "integer"
}
}
},
"title": {
"type": "text"
}
}
}
}
For instance, using this data:
POST /my-test-index/_doc
{
"id": 1,
"brand": "Milk One",
"availabilityId": 1,
"title": "Great Value 2% Reduced-Fat Milk, 0.5 Gallon, 64 Fl. Oz",
"priorities": [
{
"typeId": 1,
"value": 3000
},
{
"typeId": 2,
"value": 4000
}
]
}
POST /my-test-index/_doc
{
"id": 2,
"brand": "Milky Two",
"availabilityId": 2,
"title": "Great Value 3.5% Milk Fat Whole Milk, 64 Fl Oz",
"priorities": [
{
"typeId": 1,
"value": 1000
},
{
"typeId": 2,
"value": 3000
}
]
}
POST /my-test-index/_doc
{
"id": 3,
"brand": "Nesty",
"availabilityId": 3,
"title": "Great Value 1% Low-Fat Milk, 128 Fl Oz"
}
POST /my-test-index/_doc
{
"id": 4,
"brand": "No milk",
"availabilityId": 4,
"title": "Almond Breeze Vanilla Almondmilk",
"priorities": [
{
"typeId": 1,
"value": 6000
},
{
"typeId": 2,
"value": 2000
}
]
}
I want:
filtered documents by availabilityId (1, 2, 3)
multi match by title, brand
modify score by multiplying it with log1p by priorities.value, but only with type = 1. If priorities is null, then set it to 6000
sort by score
My query:
POST /my-test-index/_search?typed_keys=true
{
"query": {
"bool": {
"filter": [
{
"bool": {
"filter": [
{
"terms": {
"availabilityId": [
"1",
"2",
"3"
]
}
}
]
}
}
],
"must": [
{
"bool": {
"should": [
{
"multi_match": {
"fields": [
"title^100",
"brand^15"
],
"fuzziness": 0,
"minimum_should_match": "2<80%",
"query": "milk",
"type": "most_fields"
}
}
]
}
}
],
"should": [
{
"nested": {
"path": "priorities",
"query": {
"function_score": {
"query": {
"match": {
"priorities.typeId": "1"
}
},
"functions": [
{
"field_value_factor": {
"field": "priorities.value",
"modifier": "log1p",
"missing": 6000
}
}
]
}
}
}
}
]
}
},
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"_id": {
"order": "asc"
}
}
]
}
Problem:
final score is not multiply, but only sum
documents without priorities is ignored
I solved my problem by using the function script_score:
POST /my-test-index/_search?typed_keys=true
{
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script" : """double priority1 = 6000; if(params._source["priorities"] != null){for (int i = 0; i < params._source["priorities"].length; ++i){ if(params._source["priorities"][i].typeId == 1) priority1 = params._source["priorities"][i].value;}} return Math.log10(priority1 + 1);"""
}
}
],
"query": {
"bool": {
"filter": [
{
"bool": {
"filter": [
{
"terms": {
"availabilityId": [
"1",
"2",
"3"
]
}
}
]
}
}
],
"must": [
{
"bool": {
"should": [
{
"multi_match": {
"fields": [
"title^100",
"brand^15"
],
"fuzziness": 0,
"minimum_should_match": "2<80%",
"query": "milk",
"type": "most_fields"
}
}
]
}
}
]
}
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"_id": {
"order": "asc"
}
}
]
}

Nested filter returns 0 doc_count

For this index and sample data:
PUT job_offers
{
"mappings": {
"properties": {
"location": {
"properties": {
"slug": {
"type": "keyword"
},
"name": {
"type": "keyword"
}
},
"type": "nested"
},
"experience": {
"properties": {
"slug": {
"type": "keyword"
},
"name": {
"type": "keyword"
}
},
"type": "nested"
}
}
}
}
POST job_offers/_doc
{
"title": "Junior Ruby on Rails Developer",
"location": [
{
"slug": "new-york",
"name": "New York"
},
{
"slug": "atlanta",
"name": "Atlanta"
},
{
"slug": "remote",
"name": "Remote"
}
],
"experience": [
{
"slug": "junior",
"name": "Junior"
}
]
}
POST job_offers/_doc
{
"title": "Ruby on Rails Developer",
"location": [
{
"slug": "chicago",
"name": "Chicago"
},
{
"slug": "atlanta",
"name": "Atlanta"
}
],
"experience": [
{
"slug": "senior",
"name": "Senior"
}
]
}
I try to run filter on experience.slug:
GET job_offers/_search
{
"query": {
"nested": {
"path": "location",
"query": {
"terms": {
"location.slug": [
"remote",
"new-york"
]
}
}
}
},
"aggs": {
"filtered_job_offers": {
"global": {},
"aggs": {
"filtered_location": {
"filter": {
"bool": {
"must": [
{
"terms": {
"experience.slug": [
"junior"
]
}
}
]
}
}
}
}
}
}
}
Response for this:
"aggregations" : {
"filtered_job_offers" : {
"doc_count" : 2,
"filtered_location" : {
"doc_count" : 0
}
}
}
Why do I get doc_count: 0 for filtered_location instead of 1? How can I make it work?
You were pretty close! Gotta use a nested query in the aggregations:
...
"aggs": {
"filtered_job_offers": {
"global": {},
"aggs": {
"filtered_location": {
"filter": {
"bool": {
"must": [
{
"nested": { <-----
"path": "experience",
"query": {
"terms": {
"experience.slug": [
"junior"
]
}
}
}
}
]
}
}
}
}
}
}

Nested object retrieval in ElasticSearch query

I'm new in ElasticSearch and I have a few questions regarding nested object retrieval when a specific condition is matched.
I have a tree-like structure as follow:
{
"id": 4,
"sora": [
{
"pContext": {
"context": {
"sT": "D3",
"uT": "ST"
},
"entities": [
{
"name": "premium",
"bName": "premium",
"fT": "site",
"eT": "F_P",
"children": [
{
"name": "capa",
"bName": "capa",
"fT": "site",
"eT": "FFT",
"children": []
},
{
"name": "code",
"bName": "Codes",
"fT": "site",
"eT": "FFT",
"children": []
},
{
"name": "selection A",
"fT": "site",
"eT": "SELECTION_A",
"children": [
{
"name": "A1",
"fT": "site",
"eT": "ADD",
"children": []
},
{
"name": "A2",
"fT": "site",
"eT": "ADD",
"children": []
}
]
}
]
}
]
}
},
{
"pContext": {
"context": {
"sT": "D2",
"uT": "ST"
},
"entities": [
{
"name": "112",
"bName": "112",
"eT": "D_TYPE",
"children": []
}
]
}
}
]
}
My structure can have more levels.
I have many documents as described above. In order to filter my document I can use the simple query sintax:
{
"_source": {
"excludes": [
"*.context"
]
},
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.context.sT": "D3"
},
"match": {
"sora.pContext.entities.name": "premium"
},
"match": {
"sora.pContext.entities.fT": "site"
}
}
]
}
}
}
What I would like to know is, how can I get the nested object that
matches my query and their children. I need the object that matched
the must inclusive filter. Is that possible?
How can I search for a field without specifing the path?
Thanks
# EDIT
My mapping:
{
"mappings": {
"abc": {
"properties": {
"id": {
"type": "integer"
},
"sora": {
"type": "nested",
"properties": {
"pContext": {
"type": "nested",
"properties": {
"context": {
"type": "nested",
"properties": {
"sT": {
"type": "text"
},
"uT": {
"type": "text"
}
}
},
"entities": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"bName": {
"type": "text"
},
"fT": {
"type": "text"
},
"eT": {
"type": "text"
},
"children": {
"type": "object"
}
}
}
}
}
}
}
}
}
}
}
Yes you can get the matching objects by using inner_hits along with nested query and not the one you added to the question.
Your query will look as below:
{
"_source": {
"excludes": [
"*.context"
]
},
"query": {
"bool": {
"filter": [
{
"nested": {
"inner_hits": {},
"path": "sora.pContext",
"query": {
"bool": {
"must": [
{
"nested": {
"path": "sora.pContext.context",
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.context.sT": "D3"
}
}
]
}
}
}
},
{
"nested": {
"path": "sora.pContext.entities",
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.entities.name": "premium"
}
},
{
"match": {
"sora.pContext.entities.fT": "site"
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
}
I have added link to inner_hits documentation where you can understand how the results will look like.
Well, if someone else is facing the same issue my solution was added all child in the same path/level as the parent but keep the mapping with parent and their children. With that, I'm able to search and retrieve the parts of the parent as wanted.

Elasticsearch how to sort with condition

On my ElasticSearch (2.x) I have documents like this:
{
"title": "A good title",
"formats": [{
"name": "pdf",
"prices": [{
"price": 11.99,
"currency": "EUR"
}, {
"price": 18.99,
"currency": "AUD"
}]
}]
}
I'd like to sort documents by formats.prices.price but only where the formats.prices.currency === 'EUR'
I tried to do a nested field on formats.prices and then run this query:
{
"query": {
"filtered": {
"query": {
"and": [
{
"match_all": {}
}
]
}
}
},
"sort": {
"formats.prices.price": {
"order": "desc",
"nested_path": "formats.prices",
"nested_filter": {
"term": {
"currency": "EUR"
}
}
}
}
}
But unfortunately I cannot get the right order.
UPDATE:
Relevant part of mapping:
"formats": {
"properties": {
"name": {
"type": "string"
},
"prices": {
"type": "nested",
"include_in_parent": true,
"properties": {
"currency": {
"type": "string"
},
"price": {
"type": "double"
}
}
}
}
},
i hope this will solve your problem
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "formats.prices",
"filter": {
"match": {
"formats.prices.currency": "EUR"
}
}
}
}
]
}
},
"from": 0,
"size": 50,
"sort": [
{
"formats.prices.price": {
"order": "asc",
"nested_path": "formats.prices",
"nested_filter": {
"match": {
"formats.prices.currency": "EUR"
}
}
}
}
]
}

Resources