Say I have documents like so, in a people index:
{
zip: string,
birthDate: Date,
graduationDate: Date,
marriedDate: Date,
deathDate: Date,
...
}
I want to be able to do a single query to elastic search where I retrieve several different counts of records, all with a birthDate within a specific range, then a secondary term query like graduationDate:* or marriedDate:*, then grouped by zip. The kicker is that I want to be able to sort by these counts.
So far I have this:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"query_string": {
"query": "birthDate:[1979-03-01 TO 1979-03-31]",
}
}
]
}
},
"aggs": {
“total”: {
"aggs": {
"group_by_zip": {
"composite": {
"sources": [
{
"zip": {
"terms": {
"field": "zip"
}
}
}
]
}
}
}
},
"graduated": {
"filter": {
"query_string": {
"query": "graduationDate:*",
}
},
"aggs": {
"group_by_zip": {
"composite": {
"sources": [
{
"zip": {
"terms": {
"field": "zip"
}
}
}
]
}
}
}
},
"married": {
"filter": {
"query_string": {
"query": "marriedDate:*",
}
},
"aggs": {
"group_by_zip": {
"composite": {
"sources": [
{
"zip": {
"terms": {
"field": "zip"
}
}
}
]
}
}
}
},
"died": {
"filter": {
"query_string": {
"query": "deathDate:*",
}
},
"aggs": {
"group_by_zip": {
"composite": {
"sources": [
{
"zip": {
"terms": {
"field": "zip"
}
}
}
]
}
}
}
}
}
}
But I can't figure out how to SORT this by, say, the _docCount of married:desc and get the same collection of zips for each of the aggs. There are 41692 zip codes, so this needs to page obviously.
I figured it out!
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"query_string": {
"query": "birthDate:[1979-03-28 TO 1979-03-31]",
}
}
]
}
},
"aggs": {
"group_by_zip": {
"composite": {
"sources": [
{
"zip": {
"terms": {
"field": "zip"
}
}
}
]
},
"aggs": {
"total": {
"filter": {
"query_string": {
"query": "*"
}
}
},
"graduated": {
"filter": {
"query_string": {
"query": "graduationDate:*" }
}
},
"married": {
"filter": {
"query_string": {
"query": "marriedDate:*"
}
}
},
"died": {
"filter": {
"query_string": {
"query": "deadDate:*"
}
}
},
"sort_it": {
"bucket_sort": {
"sort": [
{"graduated>_count": {"order": "desc"}}
]
}
}
}
}
}
}
Related
I am using this query to fetch aggregated result but because the result matching the query criteria is very large the number of buckets is larger than 10000.
How should I write/modify this query so that I can paginate the result?
I have read that bucket aggregation doesn't allow pagination but it can be converted into composite aggregation to support pagination of response.
Any alternate way to paginate the response would also be helpful.
{
"query": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": "2022-04-26T00:00:00.000Z",
"lte": "2022-04-26T23:59:59.999Z"
}
}
},
{
"terms": {
"job.keyword": [
"JOB_1",
"JOB_2",
"JOB_3"
]
}
},
{
"bool": {
"should": [
{
"nested": {
"path": "tags",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"tags.name.keyword": "jobType"
}
},
{
"term": {
"tags.value.keyword": "discrete"
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
},
"size": 0,
"aggs": {
"job": {
"terms": {
"field": "job.keyword"
},
"aggs": {
"accountId": {
"terms": {
"field": "accountId.keyword",
"size": 10000
},
"aggs": {
"accountUsageStats": {
"stats": {
"field": "count"
}
},
"tags": {
"top_hits": {
"size": 1,
"_source": {
"include": [
"tags"
]
}
}
}
}
}
}
}
}
}
I would like to have an intersection of 2 queries
I got 3 documents in the index:
"_id": "68c220aa-ea51-4f84-b880-29af3302cae9",
"_id": "b6c1c3c5-e959-480f-a145-f5598fafea66",
"_id": "2d30de72-0a2b-465c-8770-970ad9760d47",
Query1:
{
"from": 0,
"query": {
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asReference": {
"query": "8670ff39-6a0d-4ae8-e217-08d88efd4771"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "f51ca670-4223-4ea2-8007-d111dd38a14f"
}
}
}
]
}
}
]
}
}
}
},
"size": 10,
"sort": [
{
"modified": {
"order": "asc"
}
},
{
"created": {
"order": "asc"
}
}
]
}
returns all 3 documents as it should
"_id": "68c220aa-ea51-4f84-b880-29af3302cae9",
"_id": "b6c1c3c5-e959-480f-a145-f5598fafea66",
"_id": "2d30de72-0a2b-465c-8770-970ad9760d47",
Then I do query2:
{
"from": 0,
"query": {
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asShortString": {
"query": "RA-005"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "7ff3dbc1-3586-4475-9162-5430bb06c6d0"
}
}
}
]
}
}
]
}
}
}
},
"size": 10,
"sort": [
{
"modified": {
"order": "asc"
}
},
{
"created": {
"order": "asc"
}
}
]
}
returns 1 document:
"_id": "b6c1c3c5-e959-480f-a145-f5598fafea66"
But when I combine the queries to:
{
"from": 0,
"query": {
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asReference": {
"query": "8670ff39-6a0d-4ae8-e217-08d88efd4771"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "f51ca670-4223-4ea2-8007-d111dd38a14f"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asShortString": {
"query": "RA-005"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "7ff3dbc1-3586-4475-9162-5430bb06c6d0"
}
}
}
]
}
}
]
}
}
}
},
"size": 10,
"sort": [
{
"modified": {
"order": "asc"
}
},
{
"created": {
"order": "asc"
}
}
]
}
Here I do not get any documents
So the subqueries are working but combined it does not work (it produces 0 results)
What am I missing here?
Due to the way nested documents and queries work, you need to have two separate nested queries in your bool/must query, because each will/might match a different nested document of the same parent document:
{
"from": 0,
"query": {
"bool": {
"must": [
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asReference": {
"query": "8670ff39-6a0d-4ae8-e217-08d88efd4771"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "f51ca670-4223-4ea2-8007-d111dd38a14f"
}
}
}
]
}
}
]
}
}
}
},
{
"nested": {
"path": "attributes",
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match_phrase": {
"attributes.asShortString": {
"query": "RA-005"
}
}
},
{
"match_phrase": {
"attributes.attributeId": {
"query": "7ff3dbc1-3586-4475-9162-5430bb06c6d0"
}
}
}
]
}
}
]
}
}
}
}
]
}
},
"size": 10,
"sort": [
{
"modified": {
"order": "asc"
}
},
{
"created": {
"order": "asc"
}
}
]
}
I am trying to do an aggregation of the {"wildcare": {"data.addresses.ces.cp": "maria*"},
{"macth": { "data.addresses.ces.direction": "rodriguez"}} fields, but it does not return the results of the query.
{ "_source": "created_at",
"size": 1,
"sort": [
{
"created_at.keyword": {
"order": "desc"
}
}
],
"query": {
"nested": {
"path": "data.addresses",
"inner_hits": {
},
"query": {
"nested": {
"path": "data.addresses.ces",
"query":
{"wildcare": {"data.addresses.ces.cp": "maria*"},
{"macth": { "data.addresses.ces.direction": "rodriguez"}}
}
}
}
}
}
How can I perform an aggregation that returns the values of the query, and not all the values of the JSON?
In case the aggregations don't support inner_hits, how could I get wildcare and macth in aggs?
You need to repeat the filter conditions in the aggregation part so that the aggregation only runs on the selected nested documents:
{
"_source": "created_at",
"size": 1,
"sort": [
{
"created_at.keyword": {
"order": "desc"
}
}
],
"query": {
"nested": {
"path": "data.addresses",
"inner_hits": {},
"query": {
"nested": {
"path": "data.addresses.ces",
"query": {
"bool": {
"filter": [
{
"wildcard": {
"data.addresses.ces.cp": "maria*"
}
},
{
"match": {
"data.addresses.ces.direction": "rodriguez"
}
}
]
}
}
}
}
}
},
"aggs": {
"addresses": {
"nested": {
"path": "data.addresses"
},
"aggs": {
"ces": {
"nested": {
"path": "data.addresses.ces"
},
"aggs": {
"query": {
"filter": {
"bool": {
"filter": [
{
"wildcard": {
"data.addresses.ces.cp": "maria*"
}
},
{
"match": {
"data.addresses.ces.direction": "rodriguez"
}
}
]
}
},
"aggs": {
"cp": {
"terms": {
"field": "data.addresses.ces.cp"
}
},
"direction": {
"terms": {
"field": "data.addresses.ces.direction"
}
}
}
}
}
}
}
}
}
}
I'm attempting to query statistics about documents in elasticsearch with the following query. The problem is that I'm trying to ignore documents with certain values for the field logger, but I can't figure out how. The query below selects all the right documents into the set, but it doesn't exclude documents with the undesirable values.
Any suggestions very welcome.
{
"query": {
"bool": {
"filter": {
"bool": {
"must_not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
}
}
},
"must_not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
},
"must": {
"exists": {
"field": "count"
}
}
}
},
"aggs": {
"keys": {
"filter": {
"bool": {
"must_not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
}
}
},
"terms": {
"field": "logger"
},
"aggs": {
"hostnames": {
"terms": {
"field": "hostname"
},
"aggs": {
"pids": {
"terms": {
"field": "pid"
},
"aggs": {
"time_stats": {
"stats": {
"field": "timestamp"
}
},
"count_stats": {
"stats": {
"field": "count"
}
}
}
}
}
}
}
}
},
"size": 0
}
This should work for you as I removed filter and terms from the same level of aggregation.
{
"query": {
"bool": {
"filter": {
"not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
}
},
"must": {
"exists": {
"field": "count"
}
}
}
},
"aggs": {
"keys": {
"terms": {
"field": "logger"
}
}
},
"size": 0
}
This is my search:
{
"query": {
"filtered": {
"filter": {
"term": { "cityId": "10777"}
},
"query" : {
"query_string": {
"query": "pizza",
"fields": ["name", "main", "category.name"]
}
}
}
},
"sort": [
{ "premium": { "order": "desc" } }
]
}
This works perfectly.
He brings me several categories, and I would like to group by them.
example:
Group by category "pizzerias"
All you have to do is to add a terms aggregation to the mix and you're done.
Supposing your category field is the category.name one, you can do it like this.
{
"query": {
"filtered": {
"filter": {
"term": {
"cityId": "10777"
}
},
"query": {
"query_string": {
"query": "pizza",
"fields": [
"name",
"main",
"category.name"
]
}
}
}
},
"sort": [
{
"premium": {
"order": "desc"
}
}
],
"aggs": {
"categories": {
"terms": {
"field": "category.name"
}
}
}
}