Elasticsearch aggregations query for multiselection - elasticsearch

I have the following problem: In my Application, I have multiple multi-select-comboboxes for filtering search results.
the comboboxes show facets of the search results:
Depending of selections in a filter, the facet results decrease in the other filters. So far, so good. However, the results also degrease for the other possible selections in the combobox:
here, I would need the facets WITHOUT the already selected results. In this particulair field.
The query I use so far looks like that:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"exists": {
"field": "depictionID"
}
},
{
"terms": {
"cave.caveTypeID": [
4
]
}
},
{
"terms": {
"cave.siteID": [
1
]
}
},
{
"terms": {
"cave.districtID": [
1
]
}
},
{
"terms": {
"cave.regionID": [
1
]
}
}
]
}
},
"aggs": {
"CaveType": {
"terms": {
"field": "cave.caveTypeID"
}
},
"Region": {
"terms": {
"field": "cave.regionID"
}
},
"Site": {
"terms": {
"field": "cave.siteID"
}
},
"District": {
"terms": {
"field": "cave.districtID"
}
}
}
}
I figured so far, that I need to put the selected fields out of the query and filter for them in the aggregation-section. However, I do not understand, how that could work, when two or more comboboxes have already selections.
Has anybody a good Idea, how to solve that problem?
Sincerely,
Erik

You need to use post_filter instead, like this:
{
"size": 0,
"post_filter": {
"bool": {
"must": [
{
"exists": {
"field": "depictionID"
}
},
{
"terms": {
"cave.caveTypeID": [
4
]
}
},
{
"terms": {
"cave.siteID": [
1
]
}
},
{
"terms": {
"cave.districtID": [
1
]
}
},
{
"terms": {
"cave.regionID": [
1
]
}
}
]
}
},
"aggs": {
"CaveType": {
"terms": {
"field": "cave.caveTypeID"
}
},
"Region": {
"terms": {
"field": "cave.regionID"
}
},
"Site": {
"terms": {
"field": "cave.siteID"
}
},
"District": {
"terms": {
"field": "cave.districtID"
}
}
}
}

Well, I did solve the problem by shifting the filters into the aggregation part, however, I had to make a aggregation for every single combobox as every combobox needs an aggregation WITHOUT its own filter, thus the aggregations grew dramaticly:
{
"aggs": {
"caveType": {
"filter": {
"terms": {
"cave.districtID": [
4
]
}
},
"aggs": {
"site": {
"filter": {
"terms": {
"cave.siteID": [
1
]
}
},
"aggs": {
"caveType": {
"terms": {
"size": 10000,
"field": "cave.caveTypeID"
}
}
}
}
}
},
"site": {
"filter": {
"terms": {
"cave.districtID": [
4
]
}
},
"aggs": {
"caveType": {
"filter": {
"terms": {
"cave.caveTypeID": [
4
]
}
},
"aggs": {
"site": {
"terms": {
"size": 10000,
"field": "cave.siteID"
}
}
}
}
}
},
"district": {
"filter": {
"terms": {
"cave.siteID": [
1
]
}
},
"aggs": {
"caveType": {
"filter": {
"terms": {
"cave.caveTypeID": [
4
]
}
},
"aggs": {
"district": {
"terms": {
"size": 10000,
"field": "cave.districtID"
}
}
}
}
}
},
"region": {
"filter": {
"terms": {
"cave.districtID": [
4
]
}
},
"aggs": {
"site": {
"filter": {
"terms": {
"cave.siteID": [
1
]
}
},
"aggs": {
"caveType": {
"filter": {
"terms": {
"cave.caveTypeID": [
4
]
}
},
"aggs": {
"region": {
"terms": {
"size": 10000,
"field": "cave.regionID"
}
}
}
}
}
}
}
}
},
"size": 0
}
If anyone has a more "elegant" way to do that, be my guest.

Related

How to order serial_diff aggregation result in Elasticsearch?

I have build a query based on serial_diff aggregation. I am trying to sort the result based on the result of the serial_diff agg. I am struggling to get the result in order, below.
GET db/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"terms": {
"Name": [
"q"
]
}
}
],
"filter": [
{
"range": {
"ts": {
"gte": "2020-03-09T09:00:00.000Z",
"lte": "2020-03-09T12:40:00.000Z",
"format": "date_optional_time"
}
}
}
]
}
},
"aggs": {
"sourceNameCount": {
"cardinality": {
"field": "sourceName"
}
},
"sourceName": {
"terms": {
"size": 100,
"field": "sourceName"
},
"aggs": {
"timeseries": {
"date_histogram": {
"field": "ts",
"min_doc_count": 1,
"interval": "15m",
"order": {
"_key": "asc"
}
},
"aggs": {
"the_sum":{
"avg":{
"field": "libVal"
}
},
"ts_diff":{
"serial_diff": {
"buckets_path": "the_sum",
"lag": 1
}
}
}
}
}
}
}
}

FIlter is not being applied to aggregation

I'm trying to get the billing of a product selled by a specific user, but it seems that the query is not being applied to the sum aggregation.
Could someone help me, please?
{
"query": {
"bool": {
"filter": [
{ "term": { "seller": 1 } },
{"term": { "product": 2 } }
]
}
},
"size": 0,
"aggs": {
"product": {
"terms": {
"field": "product"
},
"aggregations": {
"billing": {
"sum": {
"field": "price"
}
},
"aggregation": {
"bucket_sort": {
"sort": [
{
"billing": {
"order": "desc"
}
}
]
}
}
}
}
}
}
Try nesting your existing aggregations within another terms aggregation on "seller".
{
"query": {
"bool": {
"filter": [
{
"term": {
"seller": 1
}
},
{
"term": {
"product": 2
}
}
]
}
},
"size": 0,
"aggs": {
"seller": {
"terms": {
"field": "seller",
"size": 1
},
"aggs": {
"product": {
"terms": {
"field": "product",
"size": 1
},
"aggregations": {
"billing": {
"sum": {
"field": "price"
}
},
"aggregation": {
"bucket_sort": {
"sort": [
{
"billing": {
"order": "desc"
}
}
]
}
}
}
}
}
}
}
}

Elasticsearch summing buckets

I have the following request which will return the count of all documents with a status of either "Accepted","Released" or closed.
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
}
],
"must_not": []
}
},
"aggs": {
"slices": {
"terms": {
"field": "status.raw",
"include": {
"pattern": "Accepted|Released|Closed"
}
}
}
}
}
In my case the response is:
"buckets": [
{
"key": "Closed",
"doc_count": 2216
},
{
"key": "Accepted",
"doc_count": 8
},
{
"key": "Released",
"doc_count": 6
}
]
Now I'd like to add all of them up into a single field.
I tried using pipeline aggregations and even tried the following sum_bucket (which apparently only works on multi-bucket):
"total":{
"sum_bucket":{
"buckets_path": "slices"
}
}
Anyone able to help me out with this?
With sum_bucket and your already existent aggregation:
"aggs": {
"slices": {
"terms": {
"field": "status.raw",
"include": {
"pattern": "Accepted|Released|Closed"
}
}
},
"sum_total": {
"sum_bucket": {
"buckets_path": "slices._count"
}
}
}
What I would do is to use the filters aggregation instead and define all the buckets you need, like this:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
}
],
"must_not": []
}
},
"aggs": {
"slices": {
"filters": {
"filters": {
"accepted": {
"term": {
"status.raw": "Accepted"
}
},
"released": {
"term": {
"status.raw": "Released"
}
},
"closed": {
"term": {
"status.raw": "Closed"
}
},
"total": {
"terms": {
"status.raw": [
"Accepted",
"Released",
"Closed"
]
}
}
}
}
}
}
}
You could add count with value_count sub aggregation and then use sum_bucket pipeline aggregation
{
"aggs": {
"unique_status": {
"terms": {
"field": "status.raw",
"include": "Accepted|Released|Closed"
},
"aggs": {
"count": {
"value_count": {
"field": "status.raw"
}
}
}
},
"sum_status": {
"sum_bucket": {
"buckets_path": "unique_status>count"
}
}
},
"size": 0
}

How to properly do sorting under aggregation?

I am still new to elasticSearch, and i have a doubt here. Would like to get assits. I have some error on properly do sorting under aggregation. Please advice me. Thank YOu
{
"size": 20,
"query": {
"bool": {
"filter": [
{
"range": {
"ts": {
"gt": "2016-08-22T00:00:00.000Z",
"lt": "2016-08-23T13:41:09.000Z"
}
}
}
]
}
},
"aggs": {
"group_by_ip": {
"terms": {
"field": "id_orig_h"
},
"aggs": {
"sum_volume": {
"sum": {
"field": "resp_bytes",
"sort": [
{
"resp_bytes": {
"order": "asc"
}
}
]
}
}
}
}
}
}
You can do it with the order setting in your terms aggregation referencing the sum_volume sub-aggregation:
{
"size": 20,
"query": {
"bool": {
"filter": [
{
"range": {
"ts": {
"gt": "2016-08-22T00:00:00.000Z",
"lt": "2016-08-23T13:41:09.000Z"
}
}
}
]
}
},
"aggs": {
"group_by_ip": {
"terms": {
"field": "id_orig_h",
"order": {
"sum_volume": "asc"
}
},
"aggs": {
"sum_volume": {
"sum": {
"field": "resp_bytes"
}
}
}
}
}
}

Filter OUT matching documents in elasticsearch with aggregation

I'm attempting to query statistics about documents in elasticsearch with the following query. The problem is that I'm trying to ignore documents with certain values for the field logger, but I can't figure out how. The query below selects all the right documents into the set, but it doesn't exclude documents with the undesirable values.
Any suggestions very welcome.
{
"query": {
"bool": {
"filter": {
"bool": {
"must_not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
}
}
},
"must_not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
},
"must": {
"exists": {
"field": "count"
}
}
}
},
"aggs": {
"keys": {
"filter": {
"bool": {
"must_not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
}
}
},
"terms": {
"field": "logger"
},
"aggs": {
"hostnames": {
"terms": {
"field": "hostname"
},
"aggs": {
"pids": {
"terms": {
"field": "pid"
},
"aggs": {
"time_stats": {
"stats": {
"field": "timestamp"
}
},
"count_stats": {
"stats": {
"field": "count"
}
}
}
}
}
}
}
}
},
"size": 0
}
This should work for you as I removed filter and terms from the same level of aggregation.
{
"query": {
"bool": {
"filter": {
"not": {
"terms": {
"logger": [
"experimentsplitsegmentlogger_errors",
"ExperimentLogger"
]
}
}
},
"must": {
"exists": {
"field": "count"
}
}
}
},
"aggs": {
"keys": {
"terms": {
"field": "logger"
}
}
},
"size": 0
}

Resources