How can I aggregate over the _score - elasticsearch

I tried to run an aggregate query over the _score field on Elastic Search with no results. Seems it is not possible to use the _score field, maybe because it is not a field of the document. How can I aggregate over the _score ?
This is my query:
{
"_source": false, "explain": false, "from": 0, "size": 0,
"aggs" : {
"score_ranges" : {
"range" : {
"field" : "_score",
"ranges" : [
{ "to" : 50 },
{ "from" : 50, "to" : 75 },
{ "from" : 75 }
]
}
}
},
"query": {
"function_score": {
"query": {
"match_all": { }
}
}
}
}

"aggs": {
"scores_histogram": {
"histogram": {
"script": "return _score.doubleValue() * 10",
"interval": 3
}
}
}
or, with ranges:
"aggs": {
"score_ranges": {
"range": {
"script": "_score",
"ranges": [
{
"to": 50
},
{
"from": 50,
"to": 75
},
{
"from": 75
}
]
}
}
}
And you need to enable dynamic scripting.

Related

How to count and do average for the result of aggregation in Elasticsearch?

I want to group by city and store, then get the latest record, then filter the result by price
the data example:
{"city_name" : "NY","store_name" : "xxx","price" : 150,"entry_time" : 1649792100000},
{"city_name" : "NY","store_name" : "xxx","price" : 120,"entry_time" : 1649792000000},
{"city_name" : "NY","store_name" : "yyy","price" : 220,"entry_time" : 1649792100000},
{"city_name" : "NY","store_name" : "yyy","price" : 120,"entry_time" : 1649792000000},
{"city_name" : "SF","store_name" : "xxx","price" : 250,"entry_time" : 1649792000000},
{"city_name" : "SF","store_name" : "xxx","price" : 200,"entry_time" : 1649792100000}
Below query is used to find out the latest record for every store and city combination. Then I use Java to calculate the store count of price<=100, the store count of 100<price<200 and the store count of price>=200 for every city. Then calculate the average of the price for every city.
I want to know can I achieve this directly in Elasticsearch query string?
{
"query": {
"match_all": {}
},
"aggs": {
"city_name": {
"terms": {
"field": "city_name"
},
"aggs": {
"store_name": {
"terms": {
"field": "store_name"
},
"aggs": {
"price": {
"top_hits": {
"sort": [
{
"entry_time": {
"order": "desc"
}
}
],
"size": 1
}
}
}
}
}
}
}
}
'
expected output:
{
"city_name": "NY",
"low_price_count": 0
"mid_price_count": 1
"high_price_count": 1
"average_price": 185
},
{
"city_name": "SF",
"low_price_count": 0
"mid_price_count": 0
"high_price_count": 1
"average_price": 200
}
Instead of using top_hits and compute the counts in your Java application, you could simply use the range aggregation and let ES do it:
{
"query": {
"match_all": {}
},
"aggs": {
"city_name": {
"terms": {
"field": "city_name"
},
"aggs": {
"store_name": {
"terms": {
"field": "store_name"
},
"aggs": {
"entry_time": {
"max": {
"field": "entry_time"
}
},
"price_ranges": {
"range": {
"field": "price",
"ranges": [
{
"to": 100
},
{
"from": 100,
"to": 200
},
{
"from": 200
}
]
}
}
}
}
}
}
}
}

Elastic aggregation on specific values from within one field

I am migrating my db from postgres to elasticsearch. My postgres query looks like this:
select site_id, count(*) from r_2332 where site_id in ('1300','1364') and date >= '2021-01-25' and date <= '2021-01-30'
The expected result is as follows:
site_id count
1300 1234
1364 2345
I am trying to derive the same result from elasticsearch aggs. I have tried the following:
GET /r_2332/_search
{
"query": {
"bool" : {
"should" : [
{"match" : {"site_id": "1300"}},
{"match" : {"site_id": "1364"}}
],"minimum_should_match": 1
}
},
"aggs" : {
"footfall" : {
"range" : {
"field" : "date",
"ranges" : [
{
"from":"2021-01-21",
"to":"2021-01-30"
}
]
}
}
}
}
This gives me the result as follows:
"aggregations":{"footfall":{"buckets":[{"key":"2021-01-21T00:00:00.000Z-2021-01-30T00:00:00.000Z","from":1.6111872E12,"from_as_string":"2021-01-21T00:00:00.000Z","to":1.6119648E12,"to_as_string":"2021-01-30T00:00:00.000Z","doc_count":2679}]}
and this:
GET /r_2332/_search
{
"query": {
"terms": {
"site_id": [ "1300", "1364" ],
"boost": 1.0
}
},
"aggs" : {
"footfall" : {
"range" : {
"field" : "date",
"ranges" : [
{
"from":"2021-01-21",
"to":"2021-01-30"
}
]
}
}
}
}
This provided the same result:
"aggregations":{"footfall":{"buckets":[{"key":"2021-01-21T00:00:00.000Z-2021-01-30T00:00:00.000Z","from":1.6111872E12,"from_as_string":"2021-01-21T00:00:00.000Z","to":1.6119648E12,"to_as_string":"2021-01-30T00:00:00.000Z","doc_count":2679}]}
How do I get the result separately for each site_id?
You can use a combination of terms and range aggregation to achieve your task
Adding a working example with index data, search query and search result
Index Data:
{
"site_id":1365,
"date":"2021-01-24"
}
{
"site_id":1300,
"date":"2021-01-22"
}
{
"site_id":1300,
"date":"2020-01-22"
}
{
"site_id":1364,
"date":"2021-01-24"
}
Search Query:
{
"size": 0,
"aggs": {
"siteId": {
"terms": {
"field": "site_id",
"include": [
1300,
1364
]
},
"aggs": {
"footfall": {
"range": {
"field": "date",
"ranges": [
{
"from": "2021-01-21",
"to": "2021-01-30"
}
]
}
}
}
}
}
}
Search Result:
"aggregations": {
"siteId": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 1300,
"doc_count": 2,
"footfall": {
"buckets": [
{
"key": "2021-01-21T00:00:00.000Z-2021-01-30T00:00:00.000Z",
"from": 1.6111872E12,
"from_as_string": "2021-01-21T00:00:00.000Z",
"to": 1.6119648E12,
"to_as_string": "2021-01-30T00:00:00.000Z",
"doc_count": 1 // note this
}
]
}
},
{
"key": 1364,
"doc_count": 1,
"footfall": {
"buckets": [
{
"key": "2021-01-21T00:00:00.000Z-2021-01-30T00:00:00.000Z",
"from": 1.6111872E12,
"from_as_string": "2021-01-21T00:00:00.000Z",
"to": 1.6119648E12,
"to_as_string": "2021-01-30T00:00:00.000Z",
"doc_count": 1 // note this
}
]
}
}
]
}
}
This might perform better
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"terms": {
"site_id": [
"1300",
"1365"
]
}
},
{
"range": {
"date": {
"gte": "2021-01-21",
"lte": "2021-01-24"
}
}
}
]
}
},
"aggs": {
"group_by": {
"terms": {
"field": "site_id"
}
}
}
}

How to aggregate until a certain value is reached in ElasticSearch?

I would like to aggregate a list of documents (each of them has two fields - timestamp and amount) by "amount" field until a certain value is reached. For example I would like to get list of documents sorted by timestamp which total amount is equal to 100. Is it possible to do in one query?
Here is my query which returns total amount - I would like to add here a condition to stop aggregation when a certain value is reached.
{
"query": {
"bool": {
"filter": [
{
"range": {
"timestamp": {
"gte": 1525168583
}
}
}
]
}
},
"aggs": {
"total_amount": {
"sum": {
"field": "amount"
}
}
},
"sort": [
"timestamp"
],
"size": 10000
}
Thank You
It's perfectly possible using a combination of function_score scripting for mimicking sorting, filter aggs for the range gte query and a healthy amount of scripted_metric aggs to limit the summation up to a certain amount.
Let's first set up a mapping and ingest some docs:
PUT summation
{
"mappings": {
"properties": {
"timestamp": {
"type": "date",
"format": "epoch_second"
}
}
}
}
POST summation/_doc
{
"context": "newest",
"timestamp": 1587049128,
"amount": 20
}
POST summation/_doc
{
"context": "2nd newest",
"timestamp": 1586049128,
"amount": 30
}
POST summation/_doc
{
"context": "3rd newest",
"timestamp": 1585049128,
"amount": 40
}
POST summation/_doc
{
"context": "4th newest",
"timestamp": 1585049128,
"amount": 30
}
Then perform the query:
GET summation/_search
{
"size": 0,
"aggs": {
"filtered_agg": {
"filter": {
"bool": {
"must": [
{
"range": {
"timestamp": {
"gte": 1585049128
}
}
},
{
"function_score": {
"query": {
"match_all": {}
},
"script_score": {
"script": {
"source": "return (params['now'] - doc['timestamp'].date.toMillis())",
"params": {
"now": 1587049676
}
}
}
}
}
]
}
},
"aggs": {
"limited_sum": {
"scripted_metric": {
"init_script": """
state['my_hash'] = new HashMap();
state['my_hash'].put('sum', 0);
state['my_hash'].put('docs', new ArrayList());
""",
"map_script": """
if (state['my_hash']['sum'] <= 100) {
state['my_hash']['sum'] += doc['amount'].value;
state['my_hash']['docs'].add(doc['context.keyword'].value);
}
""",
"combine_script": "return state['my_hash']",
"reduce_script": "return states[0]"
}
}
}
}
}
}
yielding
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"filtered_agg" : {
"meta" : { },
"doc_count" : 4,
"limited_sum" : {
"value" : {
"docs" : [
"newest",
"2nd newest",
"3rd newest",
"4th newest"
],
"sum" : 120
}
}
}
}
}
I've chosen here to only return the doc.contexts but you can adjust it to retrieve whatever you like -- be it IDs, amounts etc.

Aggregation using elastic search

I have my search query for fetch latest 5000 documents from my elastic DB as below
{
"size": 5000,
"from": 0,
"query": {
"range" : {
"hostTimestamp" : {
"gte" : 1499674634382,
"lte" : 1499680034000
}
}
},
"sort": [
{
"hostTimestamp": {
"order": "desc"
}
}
]
}
Now in the documents that are fetched as result of this query I want to count no of documents with eventSeverity as Alert or Critical. How can this be achieved?
You can achieve that with a terms aggregation on the eventSeverity field:
{
"size": 5000,
"from": 0,
"query": {
"range" : {
"hostTimestamp" : {
"gte" : 1499674634382,
"lte" : 1499680034000
}
}
},
"sort": [
{
"hostTimestamp": {
"order": "desc"
}
}
],
"aggs": { <--- add this part
"severities": {
"terms": {
"field": "eventSeverity"
}
}
}
}

how do I get the latest document grouped by a field?

I have an index with many documents in this format:
{
"userId": 1234,
"locationDate" "2016-07-19T19:24:51+0000",
"location": {
"lat": -47.38163,
"lon": 26.38916
}
}
In this index I have incremental positions from the user, updated every few seconds.
I would like to execute a search that would return me the latest position (sorted by locationDate) from each user (grouped by userId)
Is this possible with elastic search? the best I could do was get all the positions from the last 30 seconds, using this:
{"query":{
"filtered" : {
"query" : {
"match_all" : { }
},
"filter" : {
"range" : {
"locationDate" : {
"from" : "2016-07-19T18:54:51+0000",
"to" : null,
"include_lower" : true,
"include_upper" : true
}
}
}
}
}}
And then after that I sort them out by hand, but I would like to do this directly on elastic search
IMPORTANT: I am using elasticsearch 1.5.2
Try this (with aggregations):
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"range": {
"locationDate": {
"from": "2016-07-19T18:54:51+0000",
"to": null,
"include_lower": true,
"include_upper": true
}
}
}
}
},
"aggs": {
"byUser": {
"terms": {
"field": "userId",
"size": 10
},
"aggs": {
"firstOne": {
"top_hits": {
"size": 1,
"sort": [
{
"locationDate": {
"order": "desc"
}
}
]
}
}
}
}
}
}

Resources