Nested object retrieval in ElasticSearch query - elasticsearch

I'm new in ElasticSearch and I have a few questions regarding nested object retrieval when a specific condition is matched.
I have a tree-like structure as follow:
{
"id": 4,
"sora": [
{
"pContext": {
"context": {
"sT": "D3",
"uT": "ST"
},
"entities": [
{
"name": "premium",
"bName": "premium",
"fT": "site",
"eT": "F_P",
"children": [
{
"name": "capa",
"bName": "capa",
"fT": "site",
"eT": "FFT",
"children": []
},
{
"name": "code",
"bName": "Codes",
"fT": "site",
"eT": "FFT",
"children": []
},
{
"name": "selection A",
"fT": "site",
"eT": "SELECTION_A",
"children": [
{
"name": "A1",
"fT": "site",
"eT": "ADD",
"children": []
},
{
"name": "A2",
"fT": "site",
"eT": "ADD",
"children": []
}
]
}
]
}
]
}
},
{
"pContext": {
"context": {
"sT": "D2",
"uT": "ST"
},
"entities": [
{
"name": "112",
"bName": "112",
"eT": "D_TYPE",
"children": []
}
]
}
}
]
}
My structure can have more levels.
I have many documents as described above. In order to filter my document I can use the simple query sintax:
{
"_source": {
"excludes": [
"*.context"
]
},
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.context.sT": "D3"
},
"match": {
"sora.pContext.entities.name": "premium"
},
"match": {
"sora.pContext.entities.fT": "site"
}
}
]
}
}
}
What I would like to know is, how can I get the nested object that
matches my query and their children. I need the object that matched
the must inclusive filter. Is that possible?
How can I search for a field without specifing the path?
Thanks
# EDIT
My mapping:
{
"mappings": {
"abc": {
"properties": {
"id": {
"type": "integer"
},
"sora": {
"type": "nested",
"properties": {
"pContext": {
"type": "nested",
"properties": {
"context": {
"type": "nested",
"properties": {
"sT": {
"type": "text"
},
"uT": {
"type": "text"
}
}
},
"entities": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"bName": {
"type": "text"
},
"fT": {
"type": "text"
},
"eT": {
"type": "text"
},
"children": {
"type": "object"
}
}
}
}
}
}
}
}
}
}
}

Yes you can get the matching objects by using inner_hits along with nested query and not the one you added to the question.
Your query will look as below:
{
"_source": {
"excludes": [
"*.context"
]
},
"query": {
"bool": {
"filter": [
{
"nested": {
"inner_hits": {},
"path": "sora.pContext",
"query": {
"bool": {
"must": [
{
"nested": {
"path": "sora.pContext.context",
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.context.sT": "D3"
}
}
]
}
}
}
},
{
"nested": {
"path": "sora.pContext.entities",
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.entities.name": "premium"
}
},
{
"match": {
"sora.pContext.entities.fT": "site"
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
}
I have added link to inner_hits documentation where you can understand how the results will look like.

Well, if someone else is facing the same issue my solution was added all child in the same path/level as the parent but keep the mapping with parent and their children. With that, I'm able to search and retrieve the parts of the parent as wanted.

Related

Elasticsearch Querying Double Nested Object, Match Multiple Rows in Query Within Parent

My data model is related to patient records. At the highest level is the Patient, then their information such as Lab Panels and the individual rows of the results of the panel. So it looks like this: {Patient:{Labs:[{Results:[{}]}]}}
I am able to successfully create the two nested objects Labs nested in Patient and Results nested in Labs, populate it, and query it. What I am unable to successfully do is create a query that constrains the results to a single Lab, and then match by more than one row in the Results object.
An example is attached, where I only want labs that are "Lipid Panel" and the results are HDL <= 46 and LDL >= 140.
Any suggestions?
Example Index
PUT localhost:9200/testpipeline
{
"aliases": {},
"mappings": {
"dynamic": "false",
"properties": {
"ageAtFirstEncounter": {
"type": "float"
},
"dateOfBirth": {
"type": "date"
},
"gender": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labs": {
"type": "nested",
"properties": {
"ageOnDateOfService": {
"type": "float"
},
"date": {
"type": "date"
},
"encounterId": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"isEdVisit": {
"type": "boolean"
},
"labPanelName": {
"type": "keyword"
},
"labPanelNameId": {
"type": "float"
},
"labPanelSourceName": {
"type": "text",
"store": true
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"results": {
"type": "nested",
"properties": {
"dataType": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labTestName": {
"type": "keyword"
},
"labTestNameId": {
"type": "float"
},
"resultAsNumber": {
"type": "float"
},
"resultAsText": {
"type": "keyword"
},
"sourceName": {
"type": "text",
"store": true
},
"unit": {
"type": "keyword"
}
}
}
}
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"race": {
"type": "keyword"
}
}
}
}
Example Document
PUT localhost:9200/testpipeline/_doc/274746
{
"id": 274746,
"personId": "10005786.000000",
"processingLogId": 51,
"gender": "Female",
"dateOfBirth": "1945-01-01T00:00:00",
"ageAtFirstEncounter": 76,
"labs": [
{
"isEdVisit": false,
"labPanelSourceName": "Lipid Panel",
"dataType": "LAB",
"ageOnDateOfService": 76.9041,
"results": [
{
"unit": "mg/dL",
"labTestNameId": 160,
"labTestName": "HDL",
"sourceName": "HDL",
"resultAsNumber": 46.0,
"resultAsText": "46",
"id": 2150284
},
{
"unit": "mg/dL",
"labTestNameId": 158,
"labTestName": "LDL",
"sourceName": "LDL",
"resultAsNumber": 144.0,
"resultAsText": "144.00",
"id": 2150286
}
],
"id": "9ab9ba84-580b-f2d2-4d32-25658ea5f1bf",
"sourceId": 2150278,
"personId": "10003783.000000",
"encounterId": "39617217.000000",
"processingLogId": 51,
"date": "2021-11-08T00:00:00"
}
],
"lastModified": "2022-03-24T10:21:29.8682784-05:00"
}
Example Query
POST localhost:9200/testpipeline/_search
{
"fields": [
"personId",
"processingLogId",
"id",
"gender",
"ageAtFirstDOS",
"dateOfBirth"
],
"from": 0,
"query": {
"bool": {
"should": [
{
"constant_score": {
"boost": 200,
"filter": {
"bool": {
"_name": "CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,SoftScore:200",
"should": [
{
"bool": {
"must": [
{
"nested": {
"path": "labs",
"inner_hits": {
"size": 3,
"name": "labs,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:8b41f346-2861-4099-b3c0-fcd6393c367b"
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"labs.labPanelSourceName": {
"_name": "CriteriaFilterId:2068,Pipeline.Labs.LabPanelSourceName,es_match_phrase=>'Lipid Panel' found in text",
"query": "Lipid Panel",
"slop": 100
}
}
},
{
"nested": {
"path": "labs.results",
"inner_hits": {
"size": 3,
"name": "labs.results,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:3564e83f-958b-4fe8-848e-f9edb5d7f3b2"
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"lte": 46
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 160
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"gte": 140.0
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 158
}
}
}
]
}
}
],
"minimum_should_match": 2
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
],
"minimum_should_match": 1,
"filter": [
]
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"processingLogId": {
"order": "asc"
}
},
{
"personId": {
"order": "asc"
}
}
],
"_source": false
}

Nested filter returns 0 doc_count

For this index and sample data:
PUT job_offers
{
"mappings": {
"properties": {
"location": {
"properties": {
"slug": {
"type": "keyword"
},
"name": {
"type": "keyword"
}
},
"type": "nested"
},
"experience": {
"properties": {
"slug": {
"type": "keyword"
},
"name": {
"type": "keyword"
}
},
"type": "nested"
}
}
}
}
POST job_offers/_doc
{
"title": "Junior Ruby on Rails Developer",
"location": [
{
"slug": "new-york",
"name": "New York"
},
{
"slug": "atlanta",
"name": "Atlanta"
},
{
"slug": "remote",
"name": "Remote"
}
],
"experience": [
{
"slug": "junior",
"name": "Junior"
}
]
}
POST job_offers/_doc
{
"title": "Ruby on Rails Developer",
"location": [
{
"slug": "chicago",
"name": "Chicago"
},
{
"slug": "atlanta",
"name": "Atlanta"
}
],
"experience": [
{
"slug": "senior",
"name": "Senior"
}
]
}
I try to run filter on experience.slug:
GET job_offers/_search
{
"query": {
"nested": {
"path": "location",
"query": {
"terms": {
"location.slug": [
"remote",
"new-york"
]
}
}
}
},
"aggs": {
"filtered_job_offers": {
"global": {},
"aggs": {
"filtered_location": {
"filter": {
"bool": {
"must": [
{
"terms": {
"experience.slug": [
"junior"
]
}
}
]
}
}
}
}
}
}
}
Response for this:
"aggregations" : {
"filtered_job_offers" : {
"doc_count" : 2,
"filtered_location" : {
"doc_count" : 0
}
}
}
Why do I get doc_count: 0 for filtered_location instead of 1? How can I make it work?
You were pretty close! Gotta use a nested query in the aggregations:
...
"aggs": {
"filtered_job_offers": {
"global": {},
"aggs": {
"filtered_location": {
"filter": {
"bool": {
"must": [
{
"nested": { <-----
"path": "experience",
"query": {
"terms": {
"experience.slug": [
"junior"
]
}
}
}
}
]
}
}
}
}
}
}

How to get by ids and filter nested data to leave only objects filtered by category?

How to get by ids and filter notes to leave only given category?
Data:
POST c1_2/Blog/1
{
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
POST c1_2/Blog/2
{
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "second"
},
{
"message": "blablabla",
"category": "third"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
Search:
POST c1_2/Blog/_search
{
"query": {
"bool": {
"must": [
{
"ids": {
"values": [
1,
2,
3
]
}
},
{
"terms": {
"post.notes.main.category": [
"test"
]
}
}
]
}
}
}
Current results, objects in notes main/cart aren't filtered by category:
{
"took": 9,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1.0122644,
"hits": [
{
"_index": "c1_2",
"_type": "Blog",
"_id": "1",
"_score": 1.0122644,
"_source": {
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
}
]
}
}
Desired effect:
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
}
]
}
In my real app query is embedded in "filtered" and "filter", if I will put query above instead filter "ids" like in example below, then will it return the same data?
POST c1_2/Blog/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"ids": {
"values": [
"1",
"2"
]
}
}
}
}
}
Mapping:
{
"posts": {
"mappings": {
"posts": {
"dynamic_templates": [{
"blog": {
"mapping": {
"index": "analyzed"
},
"path_match": "blog.*",
"path_unmatch": "*.medias.*"
}
}, {
"ids": {
"mapping": {
"index": "not_analyzed",
"type": "string"
},
"match": "_id|base_id",
"match_pattern": "regex"
}
}],
"_all": {
"enabled": false
},
"properties": {
"query": {
"properties": {
"filtered": {
"properties": {
"filter": {
"properties": {
"ids": {
"properties": {
"values": {
"type": "string"
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"match_all": {
"type": "object"
}
}
},
"source": {
"dynamic": "true",
"properties": {
"post": {
"dynamic": "true",
"properties": {
"_id": {
"type": "string",
"index": "not_analyzed"
},
"base_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
},
"blog": {
"properties": {
"post": {
"properties": {
"_id": {
"type": "string"
},
"notes": {
"properties": {
"main": {
"properties": {
"id": {
"type": "string"
},
"message": {
"type": "string"
},
"category": {
"type": "string"
}
}
},
"cart": {
"properties": {
"id": {
"type": "string"
},
"message": {
"type": "string"
},
"category": {
"type": "string"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": {
"terms": {
"_id": [1, 2]
}
},
"must_not": {
"terms": {
"post.notes.main.category": [
"other"
]
}
}
}
}
}
}
}

How to filter by ids together with filter fields value?

How to add additional filter to match category values in blog.post.notes all fields? First I want to filter by ids, then filter notes category, is it possible?
I can filter only by ids:
GET posts/posts/_search?fields=_id&_source=blog.post.notes
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"ids": {
"values": [
"100000000001234"
]
}
}
}
}
}
How to filter e.g. "test" category from current results:
{
"took": 58,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [{
"_index": "posts",
"_type": "posts",
"_id": "100000000001234",
"_score": 1,
"_source": {
"blog": {
"post": {
"notes": {
"main": [{
"message": "blablabla",
"category": "test"
}, {
"message": "blablabla",
"category": "other"
}],
"cart": [{
"message": "blablabla",
"category": "test"
}, {
"message": "blablabla",
"category": "other"
}]
}
}
}
}
}]
}
}
curl -XGET localhost:9200/posts/_mapping/posts
{
"posts": {
"mappings": {
"posts": {
"dynamic_templates": [{
"blog": {
"mapping": {
"index": "analyzed"
},
"path_match": "blog.*",
"path_unmatch": "*.medias.*"
}
}, {
"ids": {
"mapping": {
"index": "not_analyzed",
"type": "string"
},
"match": "_id|base_id",
"match_pattern": "regex"
}
}],
"_all": {
"enabled": false
},
"properties": {
"query": {
"properties": {
"filtered": {
"properties": {
"filter": {
"properties": {
"ids": {
"properties": {
"values": {
"type": "string"
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"match_all": {
"type": "object"
}
}
},
"source": {
"dynamic": "true",
"properties": {
"post": {
"dynamic": "true",
"properties": {
"_id": {
"type": "string",
"index": "not_analyzed"
},
"base_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
},
"blog": {
"properties": {
"post": {
"properties": {
"_id": {
"type": "string"
},
"notes": {
"properties": {
"main": {
"properties": {
"id": {
"type": "string"
},
"message": {
"type": "string"
},
"category": {
"type": "string"
}
}
},
"cart": {
"properties": {
"id": {
"type": "string"
},
"message": {
"type": "string"
},
"category": {
"type": "string"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
You can use bool query with must on ids and terms
POST c1_2/Test/_search
{
"query": {
"bool": {
"must": [
{
"ids": {
"values": [
1,
2,
3
]
}
},
{
"terms": {
"blog.post.notes.main.category": [
"categoryfilter"
]
}
}
]
}
}
}
But since you have main and cart categories you must use filter on each of them, in my example i filter on main categories, if you need to do filter on both you need to use one more or filter which will filter on main or cart categories
Also you should know that category should be not_analyzed in order to filter on something like "my super category" other wise query will not be working properly.
Example
POST c1_2/Blog/1
{
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
POST c1_2/Blog/2
{
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "second"
},
{
"message": "blablabla",
"category": "third"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
POST c1_2/Blog/_search
{
"query": {
"bool": {
"must": [
{
"ids": {
"values": [
1,
2,
3
]
}
},
{
"terms": {
"post.notes.main.category": [
"test"
]
}
}
]
}
}
}

Elasticsearch, how to use filter gte on specific structure

I'm trying to learn Elasticsearch, and therefore I'm trying to find a good data structure, that will fit my needs for querying.
My data structure actually looks like this :
{
"questions": [
{
"id": "n1-pain-score",
"name": "Pain score",
"answer": {
"value": 3,
"label": "Small pain"
}
},
{
"id": "n2-temperature",
"name": "Temperature",
"answer": {
"value": 37.5,
"label": "37.5°C"
}
}
]
}
For a given patient, there are multiple questions and answers.
Is there a way to take advantage of Elasticsearch filtered queries and say :
I want to see the patients that have a pain score over 6 ?
Note that I'm only testing and I still can modify the data structure to get what I want :).
It is possible to do with a filtered query. A plain query will also do:
GET {index}/{type}/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "Pain score"
}
},
{
"range": {
"answer.value": {
"gt":6
}
}
}
]
}
}
}
I suggest the following mapping and queries. Most important part - questions needs to be of type nested so that the id matching should of the same element of questions. Meaning the id with value n1-pain-score must belong to the same element that also contains answer.value: 3.
DELETE test
PUT /test
{
"mappings": {
"test": {
"properties": {
"questions": {
"type": "nested",
"properties": {
"answer": {
"properties": {
"label": {
"type": "string"
},
"value": {
"type": "long"
}
}
},
"id": {
"type": "string",
"fields": {
"notAnalyzed": {
"type": "string",
"index": "not_analyzed"
}
}
},
"name": {
"type": "string"
}
}
}
}
}
}
}
POST /test/test/1
{
"questions": [
{
"id": "n1-pain-score",
"name": "Pain score",
"answer": {
"value": 3,
"label": "Small pain"
}
},
{
"id": "n2-temperature",
"name": "Temperature",
"answer": {
"value": 37.5,
"label": "37.5°C"
}
}
]
}
GET /test/_search
{
"query": {
"nested": {
"path": "questions",
"query": {
"bool": {
"must": [
{
"term": {
"questions.id.notAnalyzed": {
"value": "n1-pain-score"
}
}
},
{
"range": {
"questions.answer.value": {
"gte": 3
}
}
}
]
}
}
}
}
}
GET /test/_search
{
"query": {
"nested": {
"path": "questions",
"query": {
"bool": {
"must": [
{
"term": {
"questions.id.notAnalyzed": {
"value": "n2-temperature"
}
}
},
{
"range": {
"questions.answer.value": {
"gte": 38
}
}
}
]
}
}
}
}
}

Resources