Nested filter returns 0 doc_count - elasticsearch

For this index and sample data:
PUT job_offers
{
"mappings": {
"properties": {
"location": {
"properties": {
"slug": {
"type": "keyword"
},
"name": {
"type": "keyword"
}
},
"type": "nested"
},
"experience": {
"properties": {
"slug": {
"type": "keyword"
},
"name": {
"type": "keyword"
}
},
"type": "nested"
}
}
}
}
POST job_offers/_doc
{
"title": "Junior Ruby on Rails Developer",
"location": [
{
"slug": "new-york",
"name": "New York"
},
{
"slug": "atlanta",
"name": "Atlanta"
},
{
"slug": "remote",
"name": "Remote"
}
],
"experience": [
{
"slug": "junior",
"name": "Junior"
}
]
}
POST job_offers/_doc
{
"title": "Ruby on Rails Developer",
"location": [
{
"slug": "chicago",
"name": "Chicago"
},
{
"slug": "atlanta",
"name": "Atlanta"
}
],
"experience": [
{
"slug": "senior",
"name": "Senior"
}
]
}
I try to run filter on experience.slug:
GET job_offers/_search
{
"query": {
"nested": {
"path": "location",
"query": {
"terms": {
"location.slug": [
"remote",
"new-york"
]
}
}
}
},
"aggs": {
"filtered_job_offers": {
"global": {},
"aggs": {
"filtered_location": {
"filter": {
"bool": {
"must": [
{
"terms": {
"experience.slug": [
"junior"
]
}
}
]
}
}
}
}
}
}
}
Response for this:
"aggregations" : {
"filtered_job_offers" : {
"doc_count" : 2,
"filtered_location" : {
"doc_count" : 0
}
}
}
Why do I get doc_count: 0 for filtered_location instead of 1? How can I make it work?

You were pretty close! Gotta use a nested query in the aggregations:
...
"aggs": {
"filtered_job_offers": {
"global": {},
"aggs": {
"filtered_location": {
"filter": {
"bool": {
"must": [
{
"nested": { <-----
"path": "experience",
"query": {
"terms": {
"experience.slug": [
"junior"
]
}
}
}
}
]
}
}
}
}
}
}

Related

Elasticsearch Querying Double Nested Object, Match Multiple Rows in Query Within Parent

My data model is related to patient records. At the highest level is the Patient, then their information such as Lab Panels and the individual rows of the results of the panel. So it looks like this: {Patient:{Labs:[{Results:[{}]}]}}
I am able to successfully create the two nested objects Labs nested in Patient and Results nested in Labs, populate it, and query it. What I am unable to successfully do is create a query that constrains the results to a single Lab, and then match by more than one row in the Results object.
An example is attached, where I only want labs that are "Lipid Panel" and the results are HDL <= 46 and LDL >= 140.
Any suggestions?
Example Index
PUT localhost:9200/testpipeline
{
"aliases": {},
"mappings": {
"dynamic": "false",
"properties": {
"ageAtFirstEncounter": {
"type": "float"
},
"dateOfBirth": {
"type": "date"
},
"gender": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labs": {
"type": "nested",
"properties": {
"ageOnDateOfService": {
"type": "float"
},
"date": {
"type": "date"
},
"encounterId": {
"type": "keyword"
},
"id": {
"type": "keyword"
},
"isEdVisit": {
"type": "boolean"
},
"labPanelName": {
"type": "keyword"
},
"labPanelNameId": {
"type": "float"
},
"labPanelSourceName": {
"type": "text",
"store": true
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"results": {
"type": "nested",
"properties": {
"dataType": {
"type": "keyword"
},
"id": {
"type": "float"
},
"labTestName": {
"type": "keyword"
},
"labTestNameId": {
"type": "float"
},
"resultAsNumber": {
"type": "float"
},
"resultAsText": {
"type": "keyword"
},
"sourceName": {
"type": "text",
"store": true
},
"unit": {
"type": "keyword"
}
}
}
}
},
"personId": {
"type": "keyword"
},
"processingLogId": {
"type": "float"
},
"race": {
"type": "keyword"
}
}
}
}
Example Document
PUT localhost:9200/testpipeline/_doc/274746
{
"id": 274746,
"personId": "10005786.000000",
"processingLogId": 51,
"gender": "Female",
"dateOfBirth": "1945-01-01T00:00:00",
"ageAtFirstEncounter": 76,
"labs": [
{
"isEdVisit": false,
"labPanelSourceName": "Lipid Panel",
"dataType": "LAB",
"ageOnDateOfService": 76.9041,
"results": [
{
"unit": "mg/dL",
"labTestNameId": 160,
"labTestName": "HDL",
"sourceName": "HDL",
"resultAsNumber": 46.0,
"resultAsText": "46",
"id": 2150284
},
{
"unit": "mg/dL",
"labTestNameId": 158,
"labTestName": "LDL",
"sourceName": "LDL",
"resultAsNumber": 144.0,
"resultAsText": "144.00",
"id": 2150286
}
],
"id": "9ab9ba84-580b-f2d2-4d32-25658ea5f1bf",
"sourceId": 2150278,
"personId": "10003783.000000",
"encounterId": "39617217.000000",
"processingLogId": 51,
"date": "2021-11-08T00:00:00"
}
],
"lastModified": "2022-03-24T10:21:29.8682784-05:00"
}
Example Query
POST localhost:9200/testpipeline/_search
{
"fields": [
"personId",
"processingLogId",
"id",
"gender",
"ageAtFirstDOS",
"dateOfBirth"
],
"from": 0,
"query": {
"bool": {
"should": [
{
"constant_score": {
"boost": 200,
"filter": {
"bool": {
"_name": "CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,SoftScore:200",
"should": [
{
"bool": {
"must": [
{
"nested": {
"path": "labs",
"inner_hits": {
"size": 3,
"name": "labs,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:8b41f346-2861-4099-b3c0-fcd6393c367b"
},
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"labs.labPanelSourceName": {
"_name": "CriteriaFilterId:2068,Pipeline.Labs.LabPanelSourceName,es_match_phrase=>'Lipid Panel' found in text",
"query": "Lipid Panel",
"slop": 100
}
}
},
{
"nested": {
"path": "labs.results",
"inner_hits": {
"size": 3,
"name": "labs.results,CriteriaFilterId:2068,CriteriaId:1,CriteriaClassId:1,Points:200,T5:False,guid:3564e83f-958b-4fe8-848e-f9edb5d7f3b2"
},
"query": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"lte": 46
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 160
}
}
}
]
}
},
{
"bool": {
"must": [
{
"range": {
"labs.results.resultAsNumber": {
"gte": 140.0
}
}
},
{
"term": {
"labs.results.labTestNameId": {
"value": 158
}
}
}
]
}
}
],
"minimum_should_match": 2
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
}
}
],
"minimum_should_match": 1,
"filter": [
]
}
},
"size": 10,
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"processingLogId": {
"order": "asc"
}
},
{
"personId": {
"order": "asc"
}
}
],
"_source": false
}

Elasticsearch has_child returning no results

I'm trying to get all child of a parent document but not getting any result with has_child query,
{
"index": "index_x",
"include_type_name": true,
"body": {
"mappings": {
"agents": {
"properties": {
"id": {
"type": "keyword"
},
"listings": {
"type": "join",
"eager_global_ordinals": true,
"relations": {
"agent": "listing"
}
},
"name": {
"type": "object"
}
}
}
}
}
}
here's my query
{
"query": {
"bool": {
"must": [
{
"term": {
"_id": <id>
}
},
{
"has_child": {
"type": "listing",
"query": {
"match_all": {}
},
"inner_hits": {}
}
}
]
}
}
}
however, when I run this query I'm getting child results just fine
{
"query": {
"bool": {
"must": [
{
"parent_id": {
"type":"listing",
"id": <id>
}
}
]
}
}
}
Same with has_parent query, not getting any results.
I'm using Elasticsearch 7.7
Sounds like you want to use the has_parent query. Here is minimal example of how it can work on ESv7.7:
PUT /so
{
"mappings": {
"properties" : {
"my-join-field" : {
"type" : "join",
"relations": {
"parent": "child"
}
},
"tag" : {
"type" : "keyword"
}
}
}
}
POST /so/_doc/1
{
"my-join-field": "parent",
"tag": "Adult"
}
POST /so/_doc/2?routing=1
{
"my-join-field": {
"name": "child",
"parent": "1"
},
"tag": "Youth"
}
POST /so/_doc/3?routing=1
{
"my-join-field": {
"name": "child",
"parent": "1"
},
"tag": "Youth2"
}
GET /so/_search
{
"query": {
"has_parent": {
"parent_type": "parent",
"query": {
"match": {
"tag": "Adult"
}
}
}
}
}

Nested object retrieval in ElasticSearch query

I'm new in ElasticSearch and I have a few questions regarding nested object retrieval when a specific condition is matched.
I have a tree-like structure as follow:
{
"id": 4,
"sora": [
{
"pContext": {
"context": {
"sT": "D3",
"uT": "ST"
},
"entities": [
{
"name": "premium",
"bName": "premium",
"fT": "site",
"eT": "F_P",
"children": [
{
"name": "capa",
"bName": "capa",
"fT": "site",
"eT": "FFT",
"children": []
},
{
"name": "code",
"bName": "Codes",
"fT": "site",
"eT": "FFT",
"children": []
},
{
"name": "selection A",
"fT": "site",
"eT": "SELECTION_A",
"children": [
{
"name": "A1",
"fT": "site",
"eT": "ADD",
"children": []
},
{
"name": "A2",
"fT": "site",
"eT": "ADD",
"children": []
}
]
}
]
}
]
}
},
{
"pContext": {
"context": {
"sT": "D2",
"uT": "ST"
},
"entities": [
{
"name": "112",
"bName": "112",
"eT": "D_TYPE",
"children": []
}
]
}
}
]
}
My structure can have more levels.
I have many documents as described above. In order to filter my document I can use the simple query sintax:
{
"_source": {
"excludes": [
"*.context"
]
},
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.context.sT": "D3"
},
"match": {
"sora.pContext.entities.name": "premium"
},
"match": {
"sora.pContext.entities.fT": "site"
}
}
]
}
}
}
What I would like to know is, how can I get the nested object that
matches my query and their children. I need the object that matched
the must inclusive filter. Is that possible?
How can I search for a field without specifing the path?
Thanks
# EDIT
My mapping:
{
"mappings": {
"abc": {
"properties": {
"id": {
"type": "integer"
},
"sora": {
"type": "nested",
"properties": {
"pContext": {
"type": "nested",
"properties": {
"context": {
"type": "nested",
"properties": {
"sT": {
"type": "text"
},
"uT": {
"type": "text"
}
}
},
"entities": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"bName": {
"type": "text"
},
"fT": {
"type": "text"
},
"eT": {
"type": "text"
},
"children": {
"type": "object"
}
}
}
}
}
}
}
}
}
}
}
Yes you can get the matching objects by using inner_hits along with nested query and not the one you added to the question.
Your query will look as below:
{
"_source": {
"excludes": [
"*.context"
]
},
"query": {
"bool": {
"filter": [
{
"nested": {
"inner_hits": {},
"path": "sora.pContext",
"query": {
"bool": {
"must": [
{
"nested": {
"path": "sora.pContext.context",
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.context.sT": "D3"
}
}
]
}
}
}
},
{
"nested": {
"path": "sora.pContext.entities",
"query": {
"bool": {
"must": [
{
"match": {
"sora.pContext.entities.name": "premium"
}
},
{
"match": {
"sora.pContext.entities.fT": "site"
}
}
]
}
}
}
}
]
}
}
}
}
]
}
}
}
I have added link to inner_hits documentation where you can understand how the results will look like.
Well, if someone else is facing the same issue my solution was added all child in the same path/level as the parent but keep the mapping with parent and their children. With that, I'm able to search and retrieve the parts of the parent as wanted.

Highlight nested object in Elasticsearch

Here is my sample dataset,
{
"parent":[
{
"name":"John Doe 1",
"age":"100 year",
"sex":"male",
"child":[
{
"name":"Jane Doe 1",
"height":100.00,
"width":100.00
},
{
"name":"Jane Doe 2",
"height":100.00,
"width":100.00
}
]
},
{
"name":"John Doe 2",
"age":"100 year",
"sex":"male",
"child":[
{
"name":"Jane Doe 3",
"height":100.00,
"width":100.00
},
{
"name":"Jane Doe 4",
"height":100.00,
"width":100.00
}
]
}
]
}
And my definition:
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"default": {
"type": "simple"
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"parent": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"age": {
"type": "text"
},
"sex": {
"type": "text"
},
"child": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"height": {
"type": "float"
},
"width": {
"type": "float"
}
}
}
}
}
}
}
}
}
I'm using the following query to look for matches in the parent.name property and can get highlights.
{
"query": {
"bool": {
"should": [
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent",
"query": {
"bool": {
"must": [
{
"match": {
"parent.name": {
"query": "John",
"fuzziness": "AUTO:3,6",
"prefix_length": "0"
}
}
}
]
}
}
}
}
],
}
},
"_source": ["parent"],
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"createdOn": {
"order": "desc"
}
}
]
}
Is there a way to get inline highlights for the matches in the child.name properties also so that it would be easy to find exactly which element of that corresponding array got matched?
For example, for the given sample data, if I search by "Doe", I'm expecting to get 6 hits, whereas if I search by "Jane", I would get only 4.
You can simply add another nested query clause inside you top level should.
Here's how your query should look:
{
"query": {
"bool": {
"should": [
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent",
"query": {
"bool": {
"must": [
{
"match": {
"parent.name": {
"query": "John Doe 1"
}
}
}
]
}
}
}
},
{
"nested": {
"inner_hits": {
"highlight": {
"fields": {
"parent.child.name": {}
},
"number_of_fragments": 0,
"pre_tags": [
"<span>"
],
"post_tags": [
"</span>"
]
}
},
"path": "parent.child",
"query": {
"bool": {
"must": [
{
"match": {
"parent.child.name": {
"query": "Jane Doe 1"
}
}
}
]
}
}
}
}
],
"minimum_should_match": 1
}
},
"_source": ["parent"],
"sort": [
{
"_score": {
"order": "desc"
}
},
{
"createdOn": {
"order": "desc"
}
}
]
}

How to get by ids and filter nested data to leave only objects filtered by category?

How to get by ids and filter notes to leave only given category?
Data:
POST c1_2/Blog/1
{
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
POST c1_2/Blog/2
{
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "second"
},
{
"message": "blablabla",
"category": "third"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
Search:
POST c1_2/Blog/_search
{
"query": {
"bool": {
"must": [
{
"ids": {
"values": [
1,
2,
3
]
}
},
{
"terms": {
"post.notes.main.category": [
"test"
]
}
}
]
}
}
}
Current results, objects in notes main/cart aren't filtered by category:
{
"took": 9,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1.0122644,
"hits": [
{
"_index": "c1_2",
"_type": "Blog",
"_id": "1",
"_score": 1.0122644,
"_source": {
"post": {
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
},
{
"message": "blablabla",
"category": "other"
}
]
}
}
}
}
]
}
}
Desired effect:
"notes": {
"main": [
{
"message": "blablabla",
"category": "test"
}
],
"cart": [
{
"message": "blablabla",
"category": "test"
}
]
}
In my real app query is embedded in "filtered" and "filter", if I will put query above instead filter "ids" like in example below, then will it return the same data?
POST c1_2/Blog/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"ids": {
"values": [
"1",
"2"
]
}
}
}
}
}
Mapping:
{
"posts": {
"mappings": {
"posts": {
"dynamic_templates": [{
"blog": {
"mapping": {
"index": "analyzed"
},
"path_match": "blog.*",
"path_unmatch": "*.medias.*"
}
}, {
"ids": {
"mapping": {
"index": "not_analyzed",
"type": "string"
},
"match": "_id|base_id",
"match_pattern": "regex"
}
}],
"_all": {
"enabled": false
},
"properties": {
"query": {
"properties": {
"filtered": {
"properties": {
"filter": {
"properties": {
"ids": {
"properties": {
"values": {
"type": "string"
}
}
}
}
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
},
"match_all": {
"type": "object"
}
}
},
"source": {
"dynamic": "true",
"properties": {
"post": {
"dynamic": "true",
"properties": {
"_id": {
"type": "string",
"index": "not_analyzed"
},
"base_id": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
},
"blog": {
"properties": {
"post": {
"properties": {
"_id": {
"type": "string"
},
"notes": {
"properties": {
"main": {
"properties": {
"id": {
"type": "string"
},
"message": {
"type": "string"
},
"category": {
"type": "string"
}
}
},
"cart": {
"properties": {
"id": {
"type": "string"
},
"message": {
"type": "string"
},
"category": {
"type": "string"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": {
"terms": {
"_id": [1, 2]
}
},
"must_not": {
"terms": {
"post.notes.main.category": [
"other"
]
}
}
}
}
}
}
}

Resources