Strange results when querying nested objects - elasticsearch

Elasticsearch version: 2.3.3
Plugins installed: no plugin
JVM version: 1.8.0_91
OS version: Linux version 3.19.0-56-generic (Ubuntu 4.8.2-19ubuntu1)
I get strange results when I query nested objects on multiple paths. I want to search for all female with dementia. And there are matched patients among the results. But I also get other diagnoses I'm not looking for, the diagnoses related to these patients.
For example, I also get the following diagnoses despite the fact that I looked only for dementia.
Mental disorder, not otherwise specified
Essential (primary) hypertension
Why is that?
I want to get only female with dementia and don't want other diagnoses.
Client_Demographic_Details contains one document per patient. Diagnosis contains multiple documents per patient. The ultimate goal is to index my whole data from PostgreSQL DB (72 tables, over 1600 columns in total) into Elasticsearch.
Query:
{'query': {
'bool': {
'must': [
{'nested': {
'path': 'Diagnosis',
'query': {
'bool': {
'must': [{'match_phrase': {'Diagnosis.Diagnosis': {'query': "dementia"}}}]
}
}
}},
{'nested': {
'path': 'Client_Demographic_Details',
'query': {
'bool': {
'must': [{'match_phrase': {'Client_Demographic_Details.Gender_Description': {'query': "female"}}}]
}
}
}}
]
}
}}
Results:
{
"hits": {
"hits": [
{
"_score": 3.4594634,
"_type": "Patient",
"_id": "72",
"_source": {
"Client_Demographic_Details": [
{
"Gender_Description": "Female",
"Patient_ID": 72,
}
],
"Diagnosis": [
{
"Diagnosis": "F00.0 - Dementia in Alzheimer's disease with early onset",
"Patient_ID": 72,
},
{
"Patient_ID": 72,
"Diagnosis": "F99.X - Mental disorder, not otherwise specified",
},
{
"Patient_ID": 72,
"Diagnosis": "I10.X - Essential (primary) hypertension",
}
]
},
"_index": "denorm1"
}
],
"total": 6,
"max_score": 3.4594634
},
"_shards": {
"successful": 5,
"failed": 0,
"total": 5
},
"took": 8,
"timed_out": false
}
Mapping:
{
"denorm1" : {
"aliases" : { },
"mappings" : {
"Patient" : {
"properties" : {
"Client_Demographic_Details" : {
"type" : "nested",
"properties" : {
"Patient_ID" : {
"type" : "long"
},
"Gender_Description" : {
"type" : "string"
}
}
},
"Diagnosis" : {
"type" : "nested",
"properties" : {
"Patient_ID" : {
"type" : "long"
},
"Diagnosis" : {
"type" : "string"
}
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1473974457603",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "Jo9cI4kRQjeWcZ7WMB6ZAw",
"version" : {
"created" : "2030399"
}
}
},
"warmers" : { }
}
}

Try this
{
"_source": {
"exclude": [
"Client_Demographic_Details",
"Diagnosis"
]
},
"query": {
"bool": {
"must": [
{
"nested": {
"path": "Diagnosis",
"query": {
"bool": {
"must": [
{
"match_phrase": {
"Diagnosis.Diagnosis": {
"query": "dementia"
}
}
}
]
}
},
"inner_hits": {}
}
},
{
"nested": {
"path": "Client_Demographic_Details",
"query": {
"bool": {
"must": [
{
"match_phrase": {
"Client_Demographic_Details.Gender_Description": {
"query": "female"
}
}
}
]
}
},
"inner_hits": {}
}
}
]
}
}
}
Matched doc on nested will be inside inner hits and rest in source.
i know it's not a concrete approach

As #blackmamba suggested, I constructed mapping with Client_Demographic_Details as root object and Diagnosis as a nested object.
Mapping:
{
"denorm2" : {
"aliases" : { },
"mappings" : {
"Patient" : {
"properties" : {
"BRC_ID" : {
"type" : "long"
},
"Diagnosis" : {
"type" : "nested",
"properties" : {
"BRC_ID" : {
"type" : "long"
},
"Diagnosis" : {
"type" : "string"
}
}
},
"Gender_Description" : {
"type" : "string"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1474031740689",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "fMeKa6sfThmxkg_281WdHA",
"version" : {
"created" : "2030399"
}
}
},
"warmers" : { }
}
}
Query:
I added source filtering and highlight.
{
'_source': {
'exclude': ['Diagnosis'],
'include': ['BRC_ID', 'Gender_Description']
},
'highlight': {
'fields': {
'Gender_Description': {}
}
},
'query': {
'bool': {
'must': [
{'nested': {
'path': 'Diagnosis',
'query': {
'bool': {
'must': [{'match_phrase': {'Diagnosis.Diagnosis': {'query': "dementia"}}}]
}
},
'inner_hits': {
'highlight': {
'fields': {
'Diagnosis.Diagnosis': {}
}
},
'_source': ['BRC_ID', 'Diagnosis']
}
}},
{'match_phrase': {'Gender_Description': {'query': "female"}}}
]
}
}}

Related

Should and Filter combination in ElasticSearch

I have this query which return the correct result
GET /person/_search
{
"query": {
"bool": {
"should": [
{
"fuzzy": {
"nameDetails.name.nameValue.surname": {
"value": "Pibba",
"fuzziness": "AUTO"
}
}
},
{
"fuzzy": {
"nameDetails.nameValue.firstName": {
"value": "Fawsu",
"fuzziness": "AUTO"
}
}
}
]
}
}
}
and the result is below:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 3.6012557,
"hits" : [
{
"_index" : "person",
"_type" : "_doc",
"_id" : "70002",
"_score" : 3.6012557,
"_source" : {
"gender" : "Male",
"activeStatus" : "Inactive",
"deceased" : "No",
"nameDetails" : {
"name" : [
{
"nameValue" : {
"firstName" : "Fawsu",
"middleName" : "L.",
"surname" : "Pibba"
},
"nameType" : "Primary Name"
},
{
"nameValue" : {
"firstName" : "Fausu",
"middleName" : "L.",
"surname" : "Pibba"
},
"nameType" : "Spelling Variation"
}
]
}
}
}
]
}
But when I add the filter for Gender, it returns no result
GET /person/_search
{
"query": {
"bool": {
"should": [
{
"fuzzy": {
"nameDetails.name.nameValue.surname": {
"value": "Pibba",
"fuzziness": "AUTO"
}
}
},
{
"fuzzy": {
"nameDetails.nameValue.firstName": {
"value": "Fawsu",
"fuzziness": "AUTO"
}
}
}
],
"filter": [
{
"term": {
"gender": "Male"
}
}
]
}
}
}
Even I just use filter, it return no result
GET /person/_search
{
"query": {
"bool": {
"filter": [
{
"term": {
"gender": "Male"
}
}
]
}
}
}
You are not getting any search result, because you are using the term query (in the filter clause). Term query will return the document only if it has an exact match.
A standard analyzer is used when no analyzer is specified, which will tokenize Male to male. So either you can search for male instead of Male or use any of the below solutions.
If you have not defined any explicit index mapping, you need to add .keyword to the gender field. This uses the keyword analyzer instead of the standard analyzer (notice the ".keyword" after gender field). Try out this below query -
{
"query": {
"bool": {
"filter": [
{
"term": {
"gender.keyword": "Male"
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "66879128",
"_type": "_doc",
"_id": "1",
"_score": 0.0,
"_source": {
"gender": "Male",
"activeStatus": "Inactive",
"deceased": "No",
"nameDetails": {
"name": [
{
"nameValue": {
"firstName": "Fawsu",
"middleName": "L.",
"surname": "Pibba"
},
"nameType": "Primary Name"
},
{
"nameValue": {
"firstName": "Fausu",
"middleName": "L.",
"surname": "Pibba"
},
"nameType": "Spelling Variation"
}
]
}
}
}
]
If you have defined index mapping, then modify the mapping for gender field as shown below
{
"mappings": {
"properties": {
"gender": {
"type": "keyword"
}
}
}
}

Elasticsearch: filter documents by array passed in request contains all document array elements

My documents stored in elasticsearch have following structure:
{
"id": 1,
"test": "name",
"rules": [
{
"id": 2,
"name": "rule1",
"ruleDetails": [
{
"id": 3,
"requiredAnswerId": 1
},
{
"id": 4,
"requiredAnswerId": 2
},
{
"id": 5,
"requiredAnswerId": 3
}
]
}
]
}
where, rules property has nested type.
I need to query documents by checking that array of requiredAnswerId passed in the search request (provided terms) contains all rules.ruleDetails.requiredAnswerId stored in the document.
Does anyone know which elasticsearch option I can use to build such specific query? Or maybe, it is better to fetch the whole document and perform filtering on the application level.
UPDATED
Adding mapping
{
"my_index": {
"mappings": {
"properties": {
"id": {
"type": "long"
},
"test": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"rules": {
"type": "nested",
"properties": {
"id": {
"type": "long"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"ruleDetails": {
"properties": {
"id": {
"type": "long"
},
"requiredAnswerId": {
"type": "long"
}
}
}
}
}
}
}
}
}
Mapping:
{
"index4" : {
"mappings" : {
"properties" : {
"id" : {
"type" : "integer"
},
"rules" : {
"type" : "nested",
"properties" : {
"id" : {
"type" : "integer"
},
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"ruleDetails" : {
"properties" : {
"id" : {
"type" : "long"
},
"requiredAnswerId" : {
"type" : "long"
}
}
}
}
},
"test" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
}
}
}
}
}
Query: This will need use of scripts which are not good from performance perspective. I am looping through all documents and checking if field is present is passed parameters
{
"query": {
"nested": {
"path": "rules",
"query": {
"script": {
"script": {
"source": "for(a in doc['rules.ruleDetails.requiredAnswerId']){if(!params.Ids.contains((int)a)) return false; } return true;",
"params": {
"Ids": [
1,
2,
3
]
}
}
}
},
"inner_hits": {}
}
}
}
Result:
"hits" : [
{
"_index" : "index4",
"_type" : "_doc",
"_id" : "TxOpvnEBf42mOjxvvLQB",
"_score" : 4.0,
"_source" : {
"id" : 1,
"test" : "name",
"rules" : [
{
"id" : 2,
"name" : "rule1",
"ruleDetails" : [
{
"id" : 3,
"requiredAnswerId" : 1
},
{
"id" : 4,
"requiredAnswerId" : 2
},
{
"id" : 5,
"requiredAnswerId" : 3
}
]
},
{
"id" : 3,
"name" : "rule3",
"ruleDetails" : [
{
"id" : 3,
"requiredAnswerId" : 1
},
{
"id" : 4,
"requiredAnswerId" : 2
}
]
}
]
},
"inner_hits" : {
"rules" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 4.0,
"hits" : [
{
"_index" : "index4",
"_type" : "_doc",
"_id" : "TxOpvnEBf42mOjxvvLQB",
"_nested" : {
"field" : "rules",
"offset" : 0
},
"_score" : 4.0,
"_source" : {
"id" : 2,
"name" : "rule1",
"ruleDetails" : [
{
"id" : 3,
"requiredAnswerId" : 1
},
{
"id" : 4,
"requiredAnswerId" : 2
},
{
"id" : 5,
"requiredAnswerId" : 3
}
]
}
}
]
}
}
}
}
]
EDIT 1
Terms_set can be used as an alternative. It will be faster compared to script query
Returns documents that contain a minimum number of exact terms in a
provided field.
minimum_should_match_script- size of array can be used to match the minimum number of passed values.
Query:
{
"query": {
"nested": {
"path": "rules",
"query": {
"bool": {
"filter": {
"terms_set": {
"rules.ruleDetails.requiredAnswerId": {
"terms": [
1,
2,
3
],
"minimum_should_match_script": {
"source": "doc['rules.ruleDetails.requiredAnswerId'].size()"
}
}
}
}
}
},
"inner_hits": {}
}
}
}
After some time playing with ES and reading its documentation, I found that you should keep in mind that provided script should be compiled and applied for the document, hence it will be slower, if you just know the required number elements that should match in advance.
Therefore, I created a separate field requiredMatches that stores the number of rules.ruleDetails.requiredAnswerId elements for every document and calculate it before indexing document. Then, instead of using minimum_should_match_script in my search query, I am using minimum_should_match_field:
{
"query": {
"nested": {
"path": "rules",
"query": {
"bool": {
"filter": {
"terms_set": {
"rules.ruleDetails.requiredAnswerId": {
"terms": [
1,
2,
3
],
"minimum_should_match_field": "requiredMatches"
}
}
}
}
},
"inner_hits": {}
}
}
}
I used, following example, as a reference

Elasticsearch - How to Generate Facets for Doubly Nested Objects

Using elasticsearch 7, I am trying to build facets for doubly nested objects.
So in the example below I would like to pull out the artist id codes from the artistMakerPerson field. I can pull out the association which is nested at a single depth but I can't get the syntax for the nested nested objects.
You could use the following code in Kibana to recreate an example.
My mapping looks like this:
PUT test_artist
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"properties": {
"object" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
},
"copy_to" : [
"global_search"
]
},
"uniqueID" : {
"type" : "keyword",
"copy_to" : [
"global_search"
]
},
"artistMakerPerson" : {
"type" : "nested",
"properties" : {
"association" : {
"type" : "keyword"
},
"name" : {
"type" : "nested",
"properties" : {
"id" : {
"type" : "keyword"
},
"text" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
}
},
"copy_to" : [
"gs_authority"
]
}
}
},
"note" : {
"type" : "text"
}
}
}
}
}
}
Index a document with:
PUT /test_artist/_doc/123
{
"object": "cup",
"uniquedID": "123",
"artistMakerPerson" : [
{
"name" : {
"text" : "Johann Kandler",
"id" : "A6734"
},
"association" : "modeller",
"note" : "probably"
},
{
"name" : {
"text" : "Peter Reinicke",
"id" : "A27702"
},
"association" : "designer",
"note" : "probably"
}
]
}
I am using this query to pull out facets or aggregations for artistMakerPerson.association
GET test_artist/_search
{
"size": 0,
"aggs": {
"artists": {
"nested": {
"path": "artistMakerPerson"
},
"aggs": {
"kinds": {
"terms": {
"field": "artistMakerPerson.association",
"size": 10
}
}
}
}
}
}
and I am rewarded with buckets for designer and modeller but I get nothing when I try to pull out the deeper artist id:
GET test_artist/_search
{
"size": 0,
"aggs": {
"artists": {
"nested": {
"path": "artistMakerPerson"
},
"aggs": {
"kinds": {
"terms": {
"field": "artistMakerPerson.name.id",
"size": 10
}
}
}
}
}
}
What am I doing wrong?
Change the path from artistMakerPerson to artistMakerPerson.name.
GET test_artist/_search
{
"size": 0,
"aggs": {
"artists": {
"nested": {
"path": "artistMakerPerson.name"
},
"aggs": {
"kinds": {
"terms": {
"field": "artistMakerPerson.name.id",
"size": 10
}
}
}
}
}
}

Using named queries (matched_queries) for nested types in Elasticsearch?

Using named queries, I can get a list of the matched_queries for boolean expressions such as:
(query1) AND (query2 OR query3 OR true)
Here is an example of using named queries to match on top-level document fields:
DELETE test
PUT /test
PUT /test/_mapping/_doc
{
"properties": {
"name": {
"type": "text"
},
"type": {
"type": "text"
},
"TAGS": {
"type": "nested"
}
}
}
POST /test/_doc
{
"name" : "doc1",
"type": "msword",
"TAGS" : [
{
"ID" : "tag1",
"TYPE" : "BASIC"
},
{
"ID" : "tag2",
"TYPE" : "BASIC"
},
{
"ID" : "tag3",
"TYPE" : "BASIC"
}
]
}
# (query1) AND (query2 or query3 or true)
GET /test/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name": {
"query": "doc1",
"_name": "query1"
}
}
}
],
"should": [
{
"match": {
"type": {
"query": "msword",
"_name": "query2"
}
}
},
{
"exists": {
"field": "type",
"_name": "query3"
}
}
]
}
}
}
The above query correctly returns all three matched_queries in the response:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.5753641,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "TKNJ9G4BbvPS27u-ZYux",
"_score" : 1.5753641,
"_source" : {
"name" : "doc1",
"type" : "msword",
"TAGS" : [
{
"ID" : "ds1",
"TYPE" : "BASIC"
},
{
"ID" : "wb1",
"TYPE" : "BASIC"
}
]
},
"matched_queries" : [
"query1",
"query2",
"query3"
]
}
]
}
}
However, I'm trying to run a similar search:
(query1) AND (query2 OR query3 OR true)
only this time on the nested TAGS object rather than top-level document fields.
I've tried the following query, but the problem is I need to supply the inner_hits object for nested objects in order to get the matched_queries in the response, and I can only add it to one of the three queries.
GET /test/_search
{
"query": {
"bool": {
"must": {
"nested": {
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag1",
"_name": "tag1-query"
}
}
},
// "inner_hits" : {}
}
},
"should": [
{
"nested": {
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag2",
"_name": "tag2-query"
}
}
},
// "inner_hits" : {}
}
},
{
"nested": {
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag3",
"_name": "tag3-query"
}
}
},
// "inner_hits" : {}
}
}
]
}
}
}
Elasticsearch will complain if I add more than one 'inner_hits'. I've commented out the places above where I can add it, but each of these will only return the single matched query.
I want my response to this query to return:
"matched_queries" : [
"tag1-query",
"tag2-query",
"tag3-query"
]
Any help is much appreciated, thanks!
A colleague helpfully provided a solution to this; move the _named parameter to directly under each nested section:
GET /test/_search
{
"query": {
"bool": {
"must": {
"nested": {
"_name": "tag1-query",
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag1"
}
}
}
}
},
"should": [
{
"nested": {
"_name": "tag2-query",
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag2"
}
}
}
}
},
{
"nested": {
"_name": "tag3-query",
"path": "TAGS",
"query": {
"match": {
"TAGS.ID": {
"query": "tag3"
}
}
}
}
}
]
}
}
}
This correctly returns all three tags now in the matched_queries response:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 2.9424875,
"hits" : [
{
"_index" : "test",
"_type" : "_doc",
"_id" : "TaNy9G4BbvPS27u--oto",
"_score" : 2.9424875,
"_source" : {
"name" : "doc1",
"type" : "msword",
"TAGS" : [
{
"ID" : "ds1",
"TYPE" : "DATASOURCE"
},
{
"ID" : "wb1",
"TYPE" : "WORKBOOK"
},
{
"ID" : "wb2",
"TYPE" : "WORKBOOK"
}
]
},
"matched_queries" : [
"tag1-query",
"tag2-query",
"tag3-query"
]
}
]
}
}

Elasticsearch aggregate on inner_hits

I am trying to do some aggregations on the inner_hits of a nested object (queries), which are filterated based on the query date. This aggregation I am doing in the following block is aggregating on the main document and all objects in "queries", and not just the ones in the inner hits.
GET /networkcollection/branch_routers/_search/
{
"_source": false,
"query": {
"filtered": {
"query": {
"match": {
"mh": 123
}
},
"filter": {
"nested": {
"path": "queries",
"filter": {
"range": {
"queries.dateQuery": {
"gt": "20160101T200000.000Z",
"lte": "now"
}
}
},
"inner_hits": {}
}
}
}
},
"aggs": {
"queries": {
"filter": {
"nested": {
"path": "queries",
"filter": {
"range": {
"queries.dateQuery": {
"gte": "20160101T200000.000Z",
"lte": "now"
}
}
}
}
},
"aggs": {
"minDateQuery": {
"min": {
"field": "queries.dateQuery"
}
}
}
}
}
}
How can I accomplish this aggregation so that it aggregates only the "queries" objects returned in the inner_hits?
I'm very late on this answer, but it is very much possible to aggregate only on the inner_hits.
My ES version : 6.2.3
I'm providing a detailed response, with index mapping, a few dummy documents and the search_query + response.
The basic idea is to use the "filter" aggregation. You don't need to actually use the "query" part of the search_request at all, unless you're doing some very complex queries(to narrow down the aggregation profile). Most simple queries can easily be specified in the aggregation "filter".
Index setup:
PUT networkcollection
{
"mappings": {
"branch_routers" : {
"properties" : {
"mh" : {
"type" : "text"
},
"queries" : {
"type" : "nested",
"properties" : {
"dateQuery" : {
"type" : "date"
}
}
}
}
}
}
}
PUT networkcollection/branch_routers/1
{
"mh" : "corona",
"queries" : [
{
"dateQuery" : "2012-04-23"
},
{
"dateQuery" : "2013-04-23"
},
{
"dateQuery" : "2014-04-23"
},
{
"dateQuery" : "2015-04-23"
},
{
"dateQuery" : "2016-04-23"
},
{
"dateQuery" : "2017-04-23"
},
{
"dateQuery" : "2018-04-23"
},
{
"dateQuery" : "2019-04-23"
},
{
"dateQuery" : "2020-04-23"
}
]
}
PUT networkcollection/branch_routers/2
{
"mh" : "happy",
"queries" : [
{
"dateQuery" : "2009-04-23"
},
{
"dateQuery" : "2008-04-23"
},
{
"dateQuery" : "2007-04-23"
},
{
"dateQuery" : "2015-04-23"
},
{
"dateQuery" : "2016-04-23"
},
{
"dateQuery" : "2017-04-23"
},
{
"dateQuery" : "2018-04-23"
},
{
"dateQuery" : "2019-04-23"
},
{
"dateQuery" : "2020-04-23"
}
]
}
PUT networkcollection/branch_routers/3
{
"mh" : "happy",
"queries" : [
{
"dateQuery" : "2001-04-23"
},
{
"dateQuery" : "2008-04-23"
},
{
"dateQuery" : "2007-04-23"
},
{
"dateQuery" : "2015-04-23"
},
{
"dateQuery" : "2016-04-23"
},
{
"dateQuery" : "2017-04-23"
},
{
"dateQuery" : "2018-04-23"
},
{
"dateQuery" : "2019-04-23"
},
{
"dateQuery" : "2020-04-23"
}
]
}
We added three basic documents, now we try to filter on the "mh" as "happy", and we want the minimum dateQuery in the nested objects, such that it filters between the year 2016 and now (We're currently in the middle of the corona-virus lockdown, so you know the year :) ).
Search Query:
GET networkcollection/branch_routers/_search
{
"_source": false,
"query": {
"match": {
"mh": "happy"
}
},
"aggs": {
"filtered_agg": {
"filter": {
"match" : {
"mh" : "happy"
}
},
"aggs": {
"filtered_nested": {
"nested": {
"path": "queries"
},
"aggs": {
"dateQuery_agg": {
"date_range": {
"field": "queries.dateQuery",
"ranges": [
{
"from": "now-4y/y",
"to": "now"
}
]
},
"aggs": {
"min_date": {
"min": {
"field": "queries.dateQuery"
}
}
}
}
}
}
}
}
}
}
Response:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.2876821,
"hits": [
{
"_index": "networkcollection",
"_type": "branch_routers",
"_id": "2",
"_score": 0.2876821
},
{
"_index": "networkcollection",
"_type": "branch_routers",
"_id": "3",
"_score": 0.2876821
}
]
},
"aggregations": {
"filtered_agg": {
"doc_count": 2,
"filtered_nested": {
"doc_count": 18,
"dateQuery_agg": {
"buckets": [
{
"key": "2016-01-01T00:00:00.000Z-2020-05-14T23:02:31.611Z",
"from": 1451606400000,
"from_as_string": "2016-01-01T00:00:00.000Z",
"to": 1589497351611,
"to_as_string": "2020-05-14T23:02:31.611Z",
"doc_count": 10,
"min_date": {
"value": 1461369600000,
"value_as_string": "2016-04-23T00:00:00.000Z"
}
}
]
}
}
}
}
}
As you can see, it correctly filters out the documents listed with "mh" = "corona", and keeps only the two documents with "mh" = "happy", and then filters only those "queries" objects which lie in my specified date range, and finally provides the min_date.

Resources