ELASTICSERCH - Inner_hits aggregations - elasticsearch

I am trying to do an aggregation of the {"wildcare": {"data.addresses.ces.cp": "maria*"},
{"macth": { "data.addresses.ces.direction": "rodriguez"}} fields, but it does not return the results of the query.
{ "_source": "created_at",
"size": 1,
"sort": [
{
"created_at.keyword": {
"order": "desc"
}
}
],
"query": {
"nested": {
"path": "data.addresses",
"inner_hits": {
},
"query": {
"nested": {
"path": "data.addresses.ces",
"query":
{"wildcare": {"data.addresses.ces.cp": "maria*"},
{"macth": { "data.addresses.ces.direction": "rodriguez"}}
}
}
}
}
}
How can I perform an aggregation that returns the values ​​of the query, and not all the values ​​of the JSON?
In case the aggregations don't support inner_hits, how could I get wildcare and macth in aggs?

You need to repeat the filter conditions in the aggregation part so that the aggregation only runs on the selected nested documents:
{
"_source": "created_at",
"size": 1,
"sort": [
{
"created_at.keyword": {
"order": "desc"
}
}
],
"query": {
"nested": {
"path": "data.addresses",
"inner_hits": {},
"query": {
"nested": {
"path": "data.addresses.ces",
"query": {
"bool": {
"filter": [
{
"wildcard": {
"data.addresses.ces.cp": "maria*"
}
},
{
"match": {
"data.addresses.ces.direction": "rodriguez"
}
}
]
}
}
}
}
}
},
"aggs": {
"addresses": {
"nested": {
"path": "data.addresses"
},
"aggs": {
"ces": {
"nested": {
"path": "data.addresses.ces"
},
"aggs": {
"query": {
"filter": {
"bool": {
"filter": [
{
"wildcard": {
"data.addresses.ces.cp": "maria*"
}
},
{
"match": {
"data.addresses.ces.direction": "rodriguez"
}
}
]
}
},
"aggs": {
"cp": {
"terms": {
"field": "data.addresses.ces.cp"
}
},
"direction": {
"terms": {
"field": "data.addresses.ces.direction"
}
}
}
}
}
}
}
}
}
}

Related

sorting on multiple aggregations

Say I have documents like so, in a people index:
{
zip: string,
birthDate: Date,
graduationDate: Date,
marriedDate: Date,
deathDate: Date,
...
}
I want to be able to do a single query to elastic search where I retrieve several different counts of records, all with a birthDate within a specific range, then a secondary term query like graduationDate:* or marriedDate:*, then grouped by zip. The kicker is that I want to be able to sort by these counts.
So far I have this:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"query_string": {
"query": "birthDate:[1979-03-01 TO 1979-03-31]",
}
}
]
}
},
"aggs": {
“total”: {
"aggs": {
"group_by_zip": {
"composite": {
"sources": [
{
"zip": {
"terms": {
"field": "zip"
}
}
}
]
}
}
}
},
"graduated": {
"filter": {
"query_string": {
"query": "graduationDate:*",
}
},
"aggs": {
"group_by_zip": {
"composite": {
"sources": [
{
"zip": {
"terms": {
"field": "zip"
}
}
}
]
}
}
}
},
"married": {
"filter": {
"query_string": {
"query": "marriedDate:*",
}
},
"aggs": {
"group_by_zip": {
"composite": {
"sources": [
{
"zip": {
"terms": {
"field": "zip"
}
}
}
]
}
}
}
},
"died": {
"filter": {
"query_string": {
"query": "deathDate:*",
}
},
"aggs": {
"group_by_zip": {
"composite": {
"sources": [
{
"zip": {
"terms": {
"field": "zip"
}
}
}
]
}
}
}
}
}
}
But I can't figure out how to SORT this by, say, the _docCount of married:desc and get the same collection of zips for each of the aggs. There are 41692 zip codes, so this needs to page obviously.
I figured it out!
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"query_string": {
"query": "birthDate:[1979-03-28 TO 1979-03-31]",
}
}
]
}
},
"aggs": {
"group_by_zip": {
"composite": {
"sources": [
{
"zip": {
"terms": {
"field": "zip"
}
}
}
]
},
"aggs": {
"total": {
"filter": {
"query_string": {
"query": "*"
}
}
},
"graduated": {
"filter": {
"query_string": {
"query": "graduationDate:*" }
}
},
"married": {
"filter": {
"query_string": {
"query": "marriedDate:*"
}
}
},
"died": {
"filter": {
"query_string": {
"query": "deadDate:*"
}
}
},
"sort_it": {
"bucket_sort": {
"sort": [
{"graduated>_count": {"order": "desc"}}
]
}
}
}
}
}
}

Elasticsearch Composite aggregation with pagination

I am using this query to fetch aggregated result but because the result matching the query criteria is very large the number of buckets is larger than 10000.
How should I write/modify this query so that I can paginate the result?
I have read that bucket aggregation doesn't allow pagination but it can be converted into composite aggregation to support pagination of response.
Any alternate way to paginate the response would also be helpful.
{
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2022-04-26T00:00:00.000Z",
"lte": "2022-04-26T23:59:59.999Z"
}
}
},
{
"terms": {
"job.keyword": [
"JOB_1",
"JOB_2",
"JOB_3"
]
}
},
{
"bool": {
"should": [
{
"nested": {
"path": "tags",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"tags.name.keyword": "jobType"
}
},
{
"term": {
"tags.value.keyword": "discrete"
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
},
"size": 0,
"aggs": {
"job": {
"terms": {
"field": "job.keyword"
},
"aggs": {
"accountId": {
"terms": {
"field": "accountId.keyword",
"size": 10000
},
"aggs": {
"accountUsageStats": {
"stats": {
"field": "count"
}
},
"tags": {
"top_hits": {
"size": 1,
"_source": {
"include": [
"tags"
]
}
}
}
}
}
}
}
}
}

ElasticSearch - combining search queries not working

I would like to have an intersection of 2 queries
I got 3 documents in the index:
"_id": "68c220aa-ea51-4f84-b880-29af3302cae9",
"_id": "b6c1c3c5-e959-480f-a145-f5598fafea66",
"_id": "2d30de72-0a2b-465c-8770-970ad9760d47",
Query1:
{
"from": 0,
"query": {
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asReference": {
"query": "8670ff39-6a0d-4ae8-e217-08d88efd4771"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "f51ca670-4223-4ea2-8007-d111dd38a14f"
}
}
}
]
}
}
]
}
}
}
},
"size": 10,
"sort": [
{
"modified": {
"order": "asc"
}
},
{
"created": {
"order": "asc"
}
}
]
}
returns all 3 documents as it should
"_id": "68c220aa-ea51-4f84-b880-29af3302cae9",
"_id": "b6c1c3c5-e959-480f-a145-f5598fafea66",
"_id": "2d30de72-0a2b-465c-8770-970ad9760d47",
Then I do query2:
{
"from": 0,
"query": {
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asShortString": {
"query": "RA-005"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "7ff3dbc1-3586-4475-9162-5430bb06c6d0"
}
}
}
]
}
}
]
}
}
}
},
"size": 10,
"sort": [
{
"modified": {
"order": "asc"
}
},
{
"created": {
"order": "asc"
}
}
]
}
returns 1 document:
"_id": "b6c1c3c5-e959-480f-a145-f5598fafea66"
But when I combine the queries to:
{
"from": 0,
"query": {
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asReference": {
"query": "8670ff39-6a0d-4ae8-e217-08d88efd4771"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "f51ca670-4223-4ea2-8007-d111dd38a14f"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asShortString": {
"query": "RA-005"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "7ff3dbc1-3586-4475-9162-5430bb06c6d0"
}
}
}
]
}
}
]
}
}
}
},
"size": 10,
"sort": [
{
"modified": {
"order": "asc"
}
},
{
"created": {
"order": "asc"
}
}
]
}
Here I do not get any documents
So the subqueries are working but combined it does not work (it produces 0 results)
What am I missing here?
Due to the way nested documents and queries work, you need to have two separate nested queries in your bool/must query, because each will/might match a different nested document of the same parent document:
{
"from": 0,
"query": {
"bool": {
"must": [
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asReference": {
"query": "8670ff39-6a0d-4ae8-e217-08d88efd4771"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "f51ca670-4223-4ea2-8007-d111dd38a14f"
}
}
}
]
}
}
]
}
}
}
},
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asShortString": {
"query": "RA-005"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "7ff3dbc1-3586-4475-9162-5430bb06c6d0"
}
}
}
]
}
}
]
}
}
}
}
]
}
},
"size": 10,
"sort": [
{
"modified": {
"order": "asc"
}
},
{
"created": {
"order": "asc"
}
}
]
}

Filter OUT matching documents in elasticsearch with aggregation

I'm attempting to query statistics about documents in elasticsearch with the following query. The problem is that I'm trying to ignore documents with certain values for the field logger, but I can't figure out how. The query below selects all the right documents into the set, but it doesn't exclude documents with the undesirable values.
Any suggestions very welcome.
{
"query": {
"bool": {
"filter": {
"bool": {
"must_not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
}
}
},
"must_not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
},
"must": {
"exists": {
"field": "count"
}
}
}
},
"aggs": {
"keys": {
"filter": {
"bool": {
"must_not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
}
}
},
"terms": {
"field": "logger"
},
"aggs": {
"hostnames": {
"terms": {
"field": "hostname"
},
"aggs": {
"pids": {
"terms": {
"field": "pid"
},
"aggs": {
"time_stats": {
"stats": {
"field": "timestamp"
}
},
"count_stats": {
"stats": {
"field": "count"
}
}
}
}
}
}
}
}
},
"size": 0
}
This should work for you as I removed filter and terms from the same level of aggregation.
{
"query": {
"bool": {
"filter": {
"not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
}
},
"must": {
"exists": {
"field": "count"
}
}
}
},
"aggs": {
"keys": {
"terms": {
"field": "logger"
}
}
},
"size": 0
}

Elasticsearch, combining nested filter with normal filter

I figured out how to map and filter on nested queries in Elasticsearch. Yay! But what isn't working out yet is to filter on both a 'normal' filter and a nested filter. The example you see here doesnt give an error and the second (nested) filter seems to be working, but the first one isn't. In this example I want both filters to be included, not just one. What am I doing wrong?
{
"size": 100,
"sort": [],
"query": {
"filtered": {
"query": {
"match_all": []
},
"filter": {
"bool": {
"must": [
{
"terms": {
"category.untouched": [
"Chargers"
]
}
}
],
"should": [],
"must_not": {
"missing": {
"field": "model"
}
}
}
},
"filter": {
"nested": {
"path":"phones",
"filter":{
"bool": {
"must": [
{
"term": {
"phones.name.untouched":"Galaxy S3 Neo I9301"
}
}
]
}
}
}
},
"strategy": "query_first"
}
},
"aggs": {
"category.untouched": {
"terms": {
"field": "category.untouched"
}
},
"brand.untouched": {
"terms": {
"field": "brand.untouched"
}
},
"price_seperate": {
"histogram": {
"field": "price_seperate",
"interval": 10,
"min_doc_count": 1
}
},
"phones.name.untouched": {
"nested": {
"path": "phones"
},
"aggs": {
"phones.name.untouched": {
"terms": {
"field": "phones.name.untouched"
}
}
}
}
}
}
You have two keys with the name "filter" (in "filtered"), so one of them is going to get ignored. You probably just need to wrap your two filters in a "bool" (bools can be nested as needed).
I can't test it without setting up some test data, but try this and see if it gets you closer:
{
"size": 100,
"sort": [],
"query": {
"filtered": {
"query": {
"match_all": []
},
"filter": {
"bool": {
"must": [
{
"terms": {
"category.untouched": [
"Chargers"
]
}
},
{
"nested": {
"path": "phones",
"filter": {
"term": {
"phones.name.untouched": "Galaxy S3 Neo I9301"
}
}
}
}
],
"should": [],
"must_not": {
"missing": {
"field": "model"
}
}
}
},
"strategy": "query_first"
}
},
"aggs": {
"category.untouched": {
"terms": {
"field": "category.untouched"
}
},
"brand.untouched": {
"terms": {
"field": "brand.untouched"
}
},
"price_seperate": {
"histogram": {
"field": "price_seperate",
"interval": 10,
"min_doc_count": 1
}
},
"phones.name.untouched": {
"nested": {
"path": "phones"
},
"aggs": {
"phones.name.untouched": {
"terms": {
"field": "phones.name.untouched"
}
}
}
}
}
}

Resources