Elasticsearch aggregate on inner_hits - elasticsearch

I am trying to do some aggregations on the inner_hits of a nested object (queries), which are filterated based on the query date. This aggregation I am doing in the following block is aggregating on the main document and all objects in "queries", and not just the ones in the inner hits.
GET /networkcollection/branch_routers/_search/
{
"_source": false,
"query": {
"filtered": {
"query": {
"match": {
"mh": 123
}
},
"filter": {
"nested": {
"path": "queries",
"filter": {
"range": {
"queries.dateQuery": {
"gt": "20160101T200000.000Z",
"lte": "now"
}
}
},
"inner_hits": {}
}
}
}
},
"aggs": {
"queries": {
"filter": {
"nested": {
"path": "queries",
"filter": {
"range": {
"queries.dateQuery": {
"gte": "20160101T200000.000Z",
"lte": "now"
}
}
}
}
},
"aggs": {
"minDateQuery": {
"min": {
"field": "queries.dateQuery"
}
}
}
}
}
}
How can I accomplish this aggregation so that it aggregates only the "queries" objects returned in the inner_hits?

I'm very late on this answer, but it is very much possible to aggregate only on the inner_hits.
My ES version : 6.2.3
I'm providing a detailed response, with index mapping, a few dummy documents and the search_query + response.
The basic idea is to use the "filter" aggregation. You don't need to actually use the "query" part of the search_request at all, unless you're doing some very complex queries(to narrow down the aggregation profile). Most simple queries can easily be specified in the aggregation "filter".
Index setup:
PUT networkcollection
{
"mappings": {
"branch_routers" : {
"properties" : {
"mh" : {
"type" : "text"
},
"queries" : {
"type" : "nested",
"properties" : {
"dateQuery" : {
"type" : "date"
}
}
}
}
}
}
}
PUT networkcollection/branch_routers/1
{
"mh" : "corona",
"queries" : [
{
"dateQuery" : "2012-04-23"
},
{
"dateQuery" : "2013-04-23"
},
{
"dateQuery" : "2014-04-23"
},
{
"dateQuery" : "2015-04-23"
},
{
"dateQuery" : "2016-04-23"
},
{
"dateQuery" : "2017-04-23"
},
{
"dateQuery" : "2018-04-23"
},
{
"dateQuery" : "2019-04-23"
},
{
"dateQuery" : "2020-04-23"
}
]
}
PUT networkcollection/branch_routers/2
{
"mh" : "happy",
"queries" : [
{
"dateQuery" : "2009-04-23"
},
{
"dateQuery" : "2008-04-23"
},
{
"dateQuery" : "2007-04-23"
},
{
"dateQuery" : "2015-04-23"
},
{
"dateQuery" : "2016-04-23"
},
{
"dateQuery" : "2017-04-23"
},
{
"dateQuery" : "2018-04-23"
},
{
"dateQuery" : "2019-04-23"
},
{
"dateQuery" : "2020-04-23"
}
]
}
PUT networkcollection/branch_routers/3
{
"mh" : "happy",
"queries" : [
{
"dateQuery" : "2001-04-23"
},
{
"dateQuery" : "2008-04-23"
},
{
"dateQuery" : "2007-04-23"
},
{
"dateQuery" : "2015-04-23"
},
{
"dateQuery" : "2016-04-23"
},
{
"dateQuery" : "2017-04-23"
},
{
"dateQuery" : "2018-04-23"
},
{
"dateQuery" : "2019-04-23"
},
{
"dateQuery" : "2020-04-23"
}
]
}
We added three basic documents, now we try to filter on the "mh" as "happy", and we want the minimum dateQuery in the nested objects, such that it filters between the year 2016 and now (We're currently in the middle of the corona-virus lockdown, so you know the year :) ).
Search Query:
GET networkcollection/branch_routers/_search
{
"_source": false,
"query": {
"match": {
"mh": "happy"
}
},
"aggs": {
"filtered_agg": {
"filter": {
"match" : {
"mh" : "happy"
}
},
"aggs": {
"filtered_nested": {
"nested": {
"path": "queries"
},
"aggs": {
"dateQuery_agg": {
"date_range": {
"field": "queries.dateQuery",
"ranges": [
{
"from": "now-4y/y",
"to": "now"
}
]
},
"aggs": {
"min_date": {
"min": {
"field": "queries.dateQuery"
}
}
}
}
}
}
}
}
}
}
Response:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.2876821,
"hits": [
{
"_index": "networkcollection",
"_type": "branch_routers",
"_id": "2",
"_score": 0.2876821
},
{
"_index": "networkcollection",
"_type": "branch_routers",
"_id": "3",
"_score": 0.2876821
}
]
},
"aggregations": {
"filtered_agg": {
"doc_count": 2,
"filtered_nested": {
"doc_count": 18,
"dateQuery_agg": {
"buckets": [
{
"key": "2016-01-01T00:00:00.000Z-2020-05-14T23:02:31.611Z",
"from": 1451606400000,
"from_as_string": "2016-01-01T00:00:00.000Z",
"to": 1589497351611,
"to_as_string": "2020-05-14T23:02:31.611Z",
"doc_count": 10,
"min_date": {
"value": 1461369600000,
"value_as_string": "2016-04-23T00:00:00.000Z"
}
}
]
}
}
}
}
}
As you can see, it correctly filters out the documents listed with "mh" = "corona", and keeps only the two documents with "mh" = "happy", and then filters only those "queries" objects which lie in my specified date range, and finally provides the min_date.

Related

Elasticsearch - get all nested objects of all documents

Let's imagine Elasticsearch index where each document represents a country. Country has cities field, which is defined as nested.
Sample mapping (simplified for brevity of this example):
{
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"cities": {
"type": "nested",
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
// other properties are omitted for brevity
}
}
}
}
The documents which I'm inserting to the index look like this:
{
"name": "Slovakia",
"cities": [
{
"name": "Bratislava"
},
{
"name": "Zilina"
},
...
]
}
{
"name": "Czech Republic",
"cities": [
{
"name": "Praha"
},
{
"name": "Brno"
},
...
]
}
Is it possible to compose a query which returns all cities (over all countries) and supports sorting & pagination? In response, I'd like to have the complete nested objects + some fields of the parent object (so that I can display which country the city belongs to).
The first returned page (response) would contain 10 cities from Czech Republic, the second page would contain 10 cities where four of them are (the last ones) from Czech Republic and six are from Slovakia.
I was looking into composite aggregation, but I don't know how add country name to sources:
{
"query": {
"match_all": {}
},
"aggs": {
"nested_aggs": {
"nested": {
"path": "cities"
},
"aggs": {
"by_name": {
"composite": {
"sources": [
{
"cityName": {
"terms": {
"field": "cities.name.keyword",
"order": "asc"
}
}
}
]
}
}
}
}
}
}
Is it possible to compose such query without modifying the Elasticsearch mapping?
All members of composite aggregations need to be defined under the same context — you cannot intermix nested and non-nested contexts.
The easiest option would be to first aggregate on the countries and then on the cities:
{
"size": 0,
"aggs": {
"by_country": {
"terms": {
"field": "name.keyword",
"size": 10
},
"aggs": {
"nested_cities": {
"nested": {
"path": "cities"
},
"aggs": {
"by_cities": {
"terms": {
"field": "cities.name.keyword",
"size": 10
}
}
}
}
}
}
}
}
If you do have the option of changing the mapping, you can leverage the include_in_root feature which'll enable you to perform composite aggs such as:
{
"size": 0,
"aggs": {
"by_name": {
"composite": {
"sources": [
{
"countryName": {
"terms": {
"field": "name.keyword",
"order": "asc"
}
}
},
{
"cityName": {
"terms": {
"field": "cities.name.keyword",
"order": "asc"
}
}
}
]
}
}
}
}
which can be easily paginated.
Here's what the result would look like:
...
"aggregations" : {
"by_name" : {
"after_key" : {
"countryName" : "Slovakia",
"cityName" : "Zilina"
},
"buckets" : [
{
"key" : {
"countryName" : "Czech Republic",
"cityName" : "Brno"
},
"doc_count" : 1
},
{
"key" : {
"countryName" : "Czech Republic",
"cityName" : "Praha"
},
"doc_count" : 1
},
{
"key" : {
"countryName" : "Slovakia",
"cityName" : "Bratislava"
},
"doc_count" : 1
},
{
"key" : {
"countryName" : "Slovakia",
"cityName" : "Zilina"
},
"doc_count" : 1
}
]
}
}

Reverse_nested aggregation + top hits : get parent and nested data at the same time

Do you know how to use reverse_nested aggregation to get both the parent and ONLY the nested data inside my top hit aggregations ?
The 'ONLY' part is the problem right now.
This is my mapping :
{
"ticket": {
"mappings": {
"properties": {
"name": {
"type": "keyword"
}
},
"tasks": {
"type": "nested",
"properties": {
"string_task_name": {
"type": "keyword"
}
}
}
}
}
}
My query uses top hits and reverse nested aggs.
{
"aggs": {
"object_tasks": {
"nested": {
"path": "object_tasks"
},
"aggs": {
"filter_by_tasks_attribute": {
"filter": {
"bool": {
"must": [
{
"wildcard": {
"object_tasks.string_task_name.keyword": "*"
}
}
]
}
},
"aggs": {
"using_reverse_nested": {
"reverse_nested": {
"path": "object_tasks"
},
"aggs": {
"names": {
"top_hits": {
"_source": {
"includes": [
"object_tasks.string_task_name",
"string_name"
]
},
"sort": [
{
"object_tasks.string_task_name.keyword": {
"order": "desc"
}
}
],
"from": 0,
"size": 10
}
}
}
}
}
}
}
}
}
}
{
"hits": {
"total": {
"value": 25,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "random_index",
"_type": "_doc",
"_id": "5",
"_score": null,
"_source": {
"object_tasks": [ ================> I don't want all these tasks names, I just want the task name of the current nested object I am in.
{
"string_task_name": "task1"
},
{
"string_task_name": "task2"
},
{
"string_task_name": "task3"
},
{
"string_task_name": "task4"
}
],
"string_name": "Dummy Ticket 854"
},
"sort": [
"seek_a_sme"
]
}
]
}
}
As you can see the result is giving me 4 tasks name. What I want is to return only 1 task name.
The only workaround I have found is to copy the data of tickets inside the tasks. But if I can avoid it that would be awesome.
I don't want all these tasks names, I just want the task name of the current nested object I am in.
The statement "of the current nested object I'm in" implies that you are inside of a nested context but you cannot be in one when you escape it through reverse_nested…
I'm not sure if I truly understood what you're gunning for here but you could aggregate on the terms of object_tasks.string_task_name.keyword and the keys of this aggregation would then function as the individual "current nested objects" that you're after:
{
"size": 0,
"aggs": {
"object_tasks": {
"nested": {
"path": "object_tasks"
},
"aggs": {
"filter_by_tasks_attribute": {
"filter": {
"bool": {
"must": [
{
"wildcard": {
"object_tasks.string_task_name.keyword": "*"
}
}
]
}
},
"aggs": {
"by_string_task_name": {
"terms": {
"field": "object_tasks.string_task_name.keyword",
"order": {
"_key": "desc"
},
"size": 10
},
"aggs": {
"using_reverse_nested": {
"reverse_nested": {},
"aggs": {
"names": {
"top_hits": {
"_source": {
"includes": [
"string_name"
]
},
"from": 0,
"size": 10
}
}
}
}
}
}
}
}
}
}
}
}
yielding
"aggregations" : {
"object_tasks" : {
...
"filter_by_tasks_attribute" : {
...
"by_string_task_name" : {
...
"buckets" : [
{
"key" : "task4", <--
...
"using_reverse_nested" : {
...
"names" : {
"hits" : {
...
"hits" : [
{
...
"_source" : {
"string_name" : "Dummy Ticket 854" <--
}
}
]
}
}
}
},
{
"key" : "task3", <--
...
},
{
"key" : "task2", <--
...
},
{
"key" : "task1", <--
...
}
}
]
}
}
}
}
Notice that the top_hits aggregation doesn't need to be sorted anymore -- object_tasks.string_task_name.keyword will always be the same for any currently aggregated terms bucket. What I did instead was order this terms aggregation by _key which works the same way as a top_hits sort would have. BTW -- yours was missing the nested path parameter.

How to count number of fields inside nested field? - Elasticsearch

I did the following mapping. I would like to count the number of products in each nested field "products" (for each document separately). I would also like to do a histogram aggregation, so that I would know the number of specific bucket sizes.
PUT /receipts
{
"mappings": {
"properties": {
"id" : {
"type": "integer"
},
"user_id" : {
"type": "integer"
},
"date" : {
"type": "date"
},
"sum" : {
"type": "double"
},
"products" : {
"type": "nested",
"properties": {
"name" : {
"type" : "text"
},
"number" : {
"type" : "double"
},
"price_single" : {
"type" : "double"
},
"price_total" : {
"type" : "double"
}
}
}
}
}
}
I've tried this query, but I get the number of all the products instead of number of products for each document separately.
GET /receipts/_search
{
"query": {
"match_all": {}
},
"size": 0,
"aggs": {
"terms": {
"nested": {
"path": "products"
},
"aggs": {
"bucket_size": {
"value_count": {
"field": "products"
}
}
}
}
}
}
Result of the query:
"aggregations" : {
"terms" : {
"doc_count" : 6552,
"bucket_size" : {
"value" : 0
}
}
}
UPDATE
Now I have this code where I make separate buckets for each id and count the number of products inside them.
GET /receipts/_search
{
"query": {
"match_all": {}
},
"size" : 0,
"aggs": {
"terms":{
"terms":{
"field": "_id"
},
"aggs": {
"nested": {
"nested": {
"path": "products"
},
"aggs": {
"bucket_size": {
"value_count": {
"field": "products.number"
}
}
}
}
}
}
}
}
Result of the query:
"aggregations" : {
"terms" : {
"doc_count_error_upper_bound" : 5,
"sum_other_doc_count" : 490,
"buckets" : [
{
"key" : "1",
"doc_count" : 1,
"nested" : {
"doc_count" : 21,
"bucket_size" : {
"value" : 21
}
}
},
{
"key" : "10",
"doc_count" : 1,
"nested" : {
"doc_count" : 5,
"bucket_size" : {
"value" : 5
}
}
},
{
"key" : "100",
"doc_count" : 1,
"nested" : {
"doc_count" : 12,
"bucket_size" : {
"value" : 12
}
}
},
...
Is is possible to group these values (21, 5, 12, ...) into buckets to make a histogram of them?
products is only the path to the array of individual products, not an aggregatable field. So you'll need to use it on one of your product's field -- such as the number:
GET receipts/_search
{
"size": 0,
"aggs": {
"terms": {
"nested": {
"path": "products"
},
"aggs": {
"bucket_size": {
"value_count": {
"field": "products.number"
}
}
}
}
}
}
Note that is a product has no number, it'll not contribute to the total count. It's therefore best practice to always include an ID in each of them and then aggregate on that field.
Alternatively you could use a script to account for missing values. Luckily value_count does not deduplicate -- meaning if two products are alike and/or have empty values, they'll still be counted as two:
GET receipts/_search
{
"size": 0,
"aggs": {
"terms": {
"nested": {
"path": "products"
},
"aggs": {
"bucket_size": {
"value_count": {
"script": {
"source": "doc['products.number'].toString()"
}
}
}
}
}
}
}
UPDATE
You could also use a nested composite aggregation which'll give you the histogrammed product count w/ the corresponding receipt id:
GET /receipts/_search
{
"size": 0,
"aggs": {
"my_aggs": {
"nested": {
"path": "products"
},
"aggs": {
"composite_parent": {
"composite": {
"sources": [
{
"receipt_id": {
"terms": {
"field": "_id"
}
}
},
{
"product_number": {
"histogram": {
"field": "products.number",
"interval": 1
}
}
}
]
}
}
}
}
}
}
The interval is modifiable.

Elastic Search: Selecting multiple vlaues in aggregates

In Elastic Search I have the following index with 'allocated_bytes', 'total_bytes' and other fields:
{
"_index" : "metrics-blockstore_capacity-2017_06",
"_type" : "datapoint",
"_id" : "AVzHwgsi9KuwEU6jCXy5",
"_score" : 1.0,
"_source" : {
"timestamp" : 1498000001000,
"resource_guid" : "2185d15c-5298-44ac-8646-37575490125d",
"allocated_bytes" : 1.159196672E9,
"resource_type" : "machine",
"total_bytes" : 1.460811776E11,
"machine" : "2185d15c-5298-44ac-8646-37575490125d"
}
I have the following query to
1)get a point for 30 minute interval using date-histogram
2)group by field on resource_guid.
3)max aggregate to find the max value.
{
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": 1497992400000,
"lte": 1497996000000
}
}
}
]
}
},
"aggregations": {
"groupByTime": {
"date_histogram": {
"field": "timestamp",
"interval": "30m",
"order": {
"_key": "desc"
}
},
"aggregations": {
"groupByField": {
"terms": {
"size": 1000,
"field": "resource_guid"
},
"aggregations": {
"maxValue": {
"max": {
"field": "allocated_bytes"
}
}
}
},
"sumUnique": {
"sum_bucket": {
"buckets_path": "groupByField>maxValue"
}
}
}
}
}
}
But with this query I am able to get only allocated_bytes, but I need to have both allocated_bytes and total_bytes at the result point.
Following is the result from the above query:
{
"key_as_string" : "2017-06-20T21:00:00.000Z",
"key" : 1497992400000,
"doc_count" : 9,
"groupByField" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ {
"key" : "2185d15c-5298-44ac-8646-37575490125d",
"doc_count" : 3,
"maxValue" : {
"value" : 1.156182016E9
}
}, {
"key" : "c3513cdd-58bb-4f8e-9b4c-467230b4f6e2",
"doc_count" : 3,
"maxValue" : {
"value" : 1.156165632E9
}
}, {
"key" : "eff13403-9737-4d08-9dca-fb6c12c3a6fa",
"doc_count" : 3,
"maxValue" : {
"value" : 1.156182016E9
}
} ]
},
"sumUnique" : {
"value" : 3.468529664E9
}
}
I do need both allocated_bytes and total_bytes. How do I get multiple fields( allocated_bytes, total_bytes) for each point?
For example:
"sumUnique" : {
"Allocatedvalue" : 3.468529664E9,
"TotalValue" : 9.468529664E9
}
or like this:
"allocatedBytessumUnique" : {
"value" : 3.468529664E9
}
"totalBytessumUnique" : {
"value" : 9.468529664E9
},
You can just add another aggregation:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": 1497992400000,
"lte": 1497996000000
}
}
}
]
}
},
"aggregations": {
"groupByTime": {
"date_histogram": {
"field": "timestamp",
"interval": "30m",
"order": {
"_key": "desc"
}
},
"aggregations": {
"groupByField": {
"terms": {
"size": 1000,
"field": "resource_guid"
},
"aggregations": {
"maxValueAllocated": {
"max": {
"field": "allocated_bytes"
}
},
"maxValueTotal": {
"max": {
"field": "total_bytes"
}
}
}
},
"sumUniqueAllocatedBytes": {
"sum_bucket": {
"buckets_path": "groupByField>maxValueAllocated"
}
},
"sumUniqueTotalBytes": {
"sum_bucket": {
"buckets_path": "groupByField>maxValueTotal"
}
}
}
}
}
}
I hope you are aware that sum_bucket calculates sibling aggregations only, in this case gives sum of max values, not the sum of total_bytes. If you want to get sum of total_bytes you can use sum aggregation

Strange results when querying nested objects

Elasticsearch version: 2.3.3
Plugins installed: no plugin
JVM version: 1.8.0_91
OS version: Linux version 3.19.0-56-generic (Ubuntu 4.8.2-19ubuntu1)
I get strange results when I query nested objects on multiple paths. I want to search for all female with dementia. And there are matched patients among the results. But I also get other diagnoses I'm not looking for, the diagnoses related to these patients.
For example, I also get the following diagnoses despite the fact that I looked only for dementia.
Mental disorder, not otherwise specified
Essential (primary) hypertension
Why is that?
I want to get only female with dementia and don't want other diagnoses.
Client_Demographic_Details contains one document per patient. Diagnosis contains multiple documents per patient. The ultimate goal is to index my whole data from PostgreSQL DB (72 tables, over 1600 columns in total) into Elasticsearch.
Query:
{'query': {
'bool': {
'must': [
{'nested': {
'path': 'Diagnosis',
'query': {
'bool': {
'must': [{'match_phrase': {'Diagnosis.Diagnosis': {'query': "dementia"}}}]
}
}
}},
{'nested': {
'path': 'Client_Demographic_Details',
'query': {
'bool': {
'must': [{'match_phrase': {'Client_Demographic_Details.Gender_Description': {'query': "female"}}}]
}
}
}}
]
}
}}
Results:
{
"hits": {
"hits": [
{
"_score": 3.4594634,
"_type": "Patient",
"_id": "72",
"_source": {
"Client_Demographic_Details": [
{
"Gender_Description": "Female",
"Patient_ID": 72,
}
],
"Diagnosis": [
{
"Diagnosis": "F00.0 - Dementia in Alzheimer's disease with early onset",
"Patient_ID": 72,
},
{
"Patient_ID": 72,
"Diagnosis": "F99.X - Mental disorder, not otherwise specified",
},
{
"Patient_ID": 72,
"Diagnosis": "I10.X - Essential (primary) hypertension",
}
]
},
"_index": "denorm1"
}
],
"total": 6,
"max_score": 3.4594634
},
"_shards": {
"successful": 5,
"failed": 0,
"total": 5
},
"took": 8,
"timed_out": false
}
Mapping:
{
"denorm1" : {
"aliases" : { },
"mappings" : {
"Patient" : {
"properties" : {
"Client_Demographic_Details" : {
"type" : "nested",
"properties" : {
"Patient_ID" : {
"type" : "long"
},
"Gender_Description" : {
"type" : "string"
}
}
},
"Diagnosis" : {
"type" : "nested",
"properties" : {
"Patient_ID" : {
"type" : "long"
},
"Diagnosis" : {
"type" : "string"
}
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1473974457603",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "Jo9cI4kRQjeWcZ7WMB6ZAw",
"version" : {
"created" : "2030399"
}
}
},
"warmers" : { }
}
}
Try this
{
"_source": {
"exclude": [
"Client_Demographic_Details",
"Diagnosis"
]
},
"query": {
"bool": {
"must": [
{
"nested": {
"path": "Diagnosis",
"query": {
"bool": {
"must": [
{
"match_phrase": {
"Diagnosis.Diagnosis": {
"query": "dementia"
}
}
}
]
}
},
"inner_hits": {}
}
},
{
"nested": {
"path": "Client_Demographic_Details",
"query": {
"bool": {
"must": [
{
"match_phrase": {
"Client_Demographic_Details.Gender_Description": {
"query": "female"
}
}
}
]
}
},
"inner_hits": {}
}
}
]
}
}
}
Matched doc on nested will be inside inner hits and rest in source.
i know it's not a concrete approach
As #blackmamba suggested, I constructed mapping with Client_Demographic_Details as root object and Diagnosis as a nested object.
Mapping:
{
"denorm2" : {
"aliases" : { },
"mappings" : {
"Patient" : {
"properties" : {
"BRC_ID" : {
"type" : "long"
},
"Diagnosis" : {
"type" : "nested",
"properties" : {
"BRC_ID" : {
"type" : "long"
},
"Diagnosis" : {
"type" : "string"
}
}
},
"Gender_Description" : {
"type" : "string"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1474031740689",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "fMeKa6sfThmxkg_281WdHA",
"version" : {
"created" : "2030399"
}
}
},
"warmers" : { }
}
}
Query:
I added source filtering and highlight.
{
'_source': {
'exclude': ['Diagnosis'],
'include': ['BRC_ID', 'Gender_Description']
},
'highlight': {
'fields': {
'Gender_Description': {}
}
},
'query': {
'bool': {
'must': [
{'nested': {
'path': 'Diagnosis',
'query': {
'bool': {
'must': [{'match_phrase': {'Diagnosis.Diagnosis': {'query': "dementia"}}}]
}
},
'inner_hits': {
'highlight': {
'fields': {
'Diagnosis.Diagnosis': {}
}
},
'_source': ['BRC_ID', 'Diagnosis']
}
}},
{'match_phrase': {'Gender_Description': {'query': "female"}}}
]
}
}}

Resources