Elasticsearch sort terms agg by arbitrary order - elasticsearch

I have a terms aggregation and they want some specific values to always be at the top.
Like:
POST _search
{ "size": 0,
"aggs": {
"pets": {
"terms": {
"field": "species",
"order": "Dogs, Cats"
}
}
}
}
Where the results would be like "Dog", "Cat", "Iguana".
Dog and Cat at the top and everything else below.
Is this possible without scripting?
Thanks!

One way to do it is by filtering values in the terms aggregation. You'd create two terms aggregations, one with the desired terms and another with all other terms.
{
"size": 0,
"aggs": {
"top_terms": {
"terms": {
"field": "species",
"include": ["Dogs", "Cats"],
"order": { "_key" : "desc" }
}
},
"other_terms": {
"terms": {
"field": "species",
"exclude": ["Dogs", "Cats"]
}
}
}
}
Try it out

A script wouldn't be too complicated though -- first boost the two species, then sort by the scores first and then by _count:
GET pets/_search
{
"size": 0,
"query": {
"bool": {
"should": [
{
"terms": {
"species": [
"dog",
"cat"
],
"boost": 10
}
},
{
"match_all": {}
}
]
}
},
"aggs": {
"pets": {
"terms": {
"field": "species.keyword",
"order": [
{
"max_score": "desc"
},
{
"_count": "desc"
}
]
},
"aggs": {
"max_score": {
"max": {
"script": "_score"
}
}
}
}
}
}

Related

How to get the last Elasticsearch document for each unique value of a field?

I have a data structure in Elasticsearch that looks like:
{
"name": "abc",
"date": "2022-10-08T21:30:40.000Z",
"rank": 3
}
I want to get, for each unique name, the rank of the document (or the whole document) with the most recent date.
I currently have this:
"aggs": {
"group-by-name": {
"terms": {
"field": "name"
},
"aggs": {
"max-date": {
"max": {
"field": "date"
}
}
}
}
}
How can I get the rank (or the whole document) for each result, and if possible, in 1 request ?
You can use below options
Collapse
"collapse": {
"field": "name"
},
"sort": [
{
"date": {
"order": "desc"
}
}
]
Top hits aggregation
{
"aggs": {
"group-by-name": {
"terms": {
"field": "name",
"size": 100
},
"aggs": {
"top_doc": {
"top_hits": {
"sort": [
{
"date": {
"order": "desc"
}
}
],
"size": 1
}
}
}
}
}
}

ElasticSearch Query to find intersection of two queries

Records exist in this format: {user_id, state}.
I need to write an elasticsearch query to find all user_id's that have both states present in the records list.
For example, if sample records stored are:
{1,a}
{1,b}
{2,a}
{2,b}
{1,a}
{3,b}
{3,b}
The output from running the query for this example would be
{"1", "2"}
I've tried this so far:
{
"size": 0,
"query": {
"bool": {
"filter": {
"terms": {
"state": [
"a",
"b"
]
}
}
}
},
"aggs": {
"user_id_intersection": {
"terms": {
"field": "user_id",
"min_doc_count": 2,
"size": 100
}
}
}
}
but this will return
{"1", "2", "3"}
Assuming you know the cardinality of the states set, here 2, you can use the
Bucket Selector Aggregation
GET test/_search
{
"size": 0,
"aggs": {
"user_ids": {
"terms": {
"field": "user_id"
},
"aggs": {
"states_card": {
"cardinality": {
"field": "state"
}
},
"state_filter": {
"bucket_selector": {
"buckets_path": {
"states_card": "states_card"
},
"script": "params.states_card == 2"
}
}
}
}
}
}

how to bucket empty and non empty fields in nested aggregation in elasticsearch?

I have the following set of nested subaggregations in elasticsearch (field2 is a subaggregation of field1 and field3 is a subaggregation of field2).
It turns out however that the terms aggregation for field3 will not bucket documents that dont have field3.
My understanding is that I have to use a Missing subaggregation query to bucket those in addition to the term query for field3.
But I am not sure how can I add it to the query below to bucket both.
{
"size": 0,
"aggregations": {
"f1": {
"terms": {
"field": "field1",
"size": 0,
"order": {
"_count": "asc"
},
"include": [
"123"
]
},
"aggregations": {
"field2": {
"terms": {
"field": "f2",
"size": 0,
"order": {
"_count": "asc"
},
"include": [
"tr"
]
},
"aggregations": {
"field3": {
"terms": {
"field": "f3",
"order": {
"_count": "asc"
},
"size": 0
},
"aggregations": {
"aggTopHits": {
"top_hits": {
"size": 1
}
}
}
}
}
}
}
}
}
}
In version 2.1.2 and later, you can use the missing parameter of the terms aggregation, which allows you to specify a default value for documents that are missing that field. (FYI, the missing parameter was available starting 2.0, but there was a bug which prevented it from working on sub-aggregations, which is how you would use it here.)
...
"aggregations": {
"field3": {
"terms": {
"field": "f3",
"order": {
"_count": "asc"
},
"size": 0,
"missing": "n/a" <----- provide a default here
},
"aggregations": {
"aggTopHits": {
"top_hits": {
"size": 1
}
}
}
}
}
However, if you are working with a pre-2.x ES cluster, you can use the missing aggregation at the same depth as your field3 aggregation to bucket the documents that are missing "f3" like this:
...
"aggregations": {
"field3": {
"terms": {
"field": "f3",
"order": {
"_count": "asc"
},
"size": 0
},
"aggregations": {
"aggTopHits": {
"top_hits": {
"size": 1
}
}
}
},
"missing_field3": {
"missing" : {
"field": "f3"
},
"aggregations": {
"aggTopMissingHit": {
"top_hits": {
"size": 1
}
}
}
}
}

Elasticsearch minBy

Is there a way in elasticsearch to get a field from a document containing the maximum value? (Basically working similarly to maxBy from scala)
For example (mocked):
{
"aggregations": {
"grouped": {
"terms": {
"field": "grouping",
"order": {
"docWithMin": "asc"
}
},
"aggregations": {
"withMax": {
"max": {
"maxByField": "a",
"field": "b"
}
}
}
}
}
}
For which {"grouping":1,"a":2,"b":5},{"grouping":1,"a":1,"b":10}
would return (something like): {"grouped":1,"withMax":5}, where the max comes from the first object due to "a" being higher there.
Assuming you just want the document back for which a is maximum, you can do this:
{
"size": 0,
"aggs": {
"grouped": {
"terms": {
"field": "grouping"
},
"aggs": {
"maxByA": {
"top_hits": {
"sort": [
{"a": {"order": "desc"}}
],
"size": 1
}
}
}
}
}
}

elasticsearch facets OR filter

I have a problem with my elasticsearch DSL, in that when using facet navigation, when I apply my facet filter, the next set of results don't include any further facets, even though I've asked for them.
When I do the initial search, I get the results I want back:
{
"sort": {
"_score": {},
"salesQuantity": {
"order": "asc"
}
},
"query": {
"filtered": {
"query": {
"match": {
"categoryTree": "D01"
}
},
"filter": {
"term": {
"publicwebEnabled": true,
"parentID": 0
}
}
}
},
"facets": {
"delivery_locations": {
"terms": {
"field": "delivery_locations",
"all_terms": true
}
},
"categories": {
"terms": {
"field": "categoryTree",
"all_terms": true
}
},
"collectable": {
"terms": {
"field": "collectable",
"all_terms": true
}
}
},
"from": 0,
"size": 12}
When I then apply a filter like so, the results I get back do not include the facets:
{
"sort": {
"_score": {},
"salesQuantity": {
"order": "asc"
}
},
"query": {
"filtered": {
"query": {
"match": {
"categoryTree": "D01"
}
},
"filter": {
"term": {
"publicwebEnabled": true,
"parentID": 0
},
"or": [
{
"range": {
"Retail_Price": {
"to": "49.99",
"from": "0"
}
}
}
]
}
}
},
"facets": {
"delivery_locations": {
"terms": {
"field": "delivery_locations",
"all_terms": true
}
},
"categories": {
"terms": {
"field": "categoryTree",
"all_terms": true
}
},
"collectable": {
"terms": {
"field": "collectable",
"all_terms": true
}
}
},
"from": 0,
"size": 12}
NOTE, I'm adding the OR filter above - because users may choose multiple price ranges to filter on.
Am I doing something wrong?
I want the new facets returned as altering the prices would obviously alter the facet counts of the other facets...
Add the original term-filter inside the or-filter, or add another boolean filter to wrap your whole filter inside a boolean expression. I dont think you can add the two filters just by comma-separating them like that.

Resources