How to return only aggregation stats in an ElasticSearch query? - elasticsearch

Is it possible to exclude documents from an aggregation query? I just need to know "count" and "sum" and do not need hits. I did it like this:
{
"query": {
"match_all": {
}
},
"aggs": {
"my_agg": {
"stats": {
"field": "country_id"
}
}
}
}

To focus only on aggregation with a match_all query, you could simply use "size":0 (this specifies you want no query results) with no query:
curl -XPOST "http://localhost:9200/indexname/doctype/_search" -d'
{
"size": 0,
"aggs": {
"my_agg": {
"stats": {
"field": "country_id"
}
}
}
}'

Add to your query ?search_type=count.
For example:
GET /my_index/countries/_search?search_type=count
{
"query": {
"match_all": {
}
},
"aggs": {
"my_agg": {
"stats": {
"field": "country_id"
}
}
}
}

Related

How to define percentage of result items with specific field in Elasticsearch query?

I have a search query that returns all items matching users that have type manager or lead.
{
"from": 0,
"size": 20,
"query": {
"bool": {
"should": [
{
"terms": {
"type": ["manager", "lead"]
}
}
]
}
}
}
Is there a way to define what percentage of the results should be of type "manager"?
In other words, I want the results to have 80% of users with type manager and 20% with type lead.
I want to make a suggestion to use bucket_path aggregation. As I know this aggregation needs to be run in sub-aggs of a histogram aggregation. As you have such field in your mapping so I think this query should work for you:
{
"size": 0,
"aggs": {
"NAME": {
"date_histogram": {
"field": "my_datetime",
"interval": "month"
},
"aggs": {
"role_type": {
"terms": {
"field": "type",
"size": 10
},
"aggs": {
"count": {
"value_count": {
"field": "_id"
}
}
}
},
"role_1_ratio": {
"bucket_script": {
"buckets_path": {
"role_1": "role_type['manager']>count",
"role_2": "role_type['lead']>count"
},
"script": "params.role_1 / (params.role_1+params.role_2)*100"
}
},
"role_2_ratio": {
"bucket_script": {
"buckets_path": {
"role_1": "role_type['manager']>count",
"role_2": "role_type['lead']>count"
},
"script": "params.role_2 / (params.role_1+params.role_2)*100"
}
}
}
}
}
}
Please let me know if it didn't work well for you.

Elasticsearch term aggregation and range with timestamp

I'm trying to count # of logs grouped by user agent.
This is what I have.
GET /myindex/_search
{
"size": 30,
"stored_fields": ["req.headers.user-agent.keyword"],
"aggs": {
"group_by_userAgent": {
"terms": {
"field": "req.headers.user-agent.keyword"
}
}
}
}
I wanted to add "Query last 15 mins" feature. I've tried to add 'range' query and I ended up the following query, which does not work.
GET /myindex/_search
{
"size": 30,
"stored_fields": ["req.headers.user-agent.keyword"],
"aggs": {
"group_by_userAgent": {
"terms": {
"field": "req.headers.user-agent.keyword"
},
"range": {
"timestamp": {
"gt": "now-15m"
}
}
}
}
}
How do I query terms aggregation with range with "now-x15min" syntax?
The range should go inside the query section, not aggs. The time range is good as it is
I think what you're looking for is this, the number of docs in the first 30 user-agent buckets, i.e. the top 30 user agents producing the most logs
GET /myindex/_search
{
"size": 0,
"query": {
"range": {
"#timestamp": {
"gt": "now-15m"
}
}
},
"aggs": {
"group_by_userAgent": {
"terms": {
"field": "req.headers.user-agent.keyword",
"size": 30
}
}
}
}
you can do this in two ways to achieve aggregation results for user-agent.
POST phrase_index/_search
{
"aggs": {
"date_range_filtered_agg": {
"filter": {
"range": {
"timestamp": {
"gte": "now-15m/m"
}
}
},
"aggs": {
"group_by_userAgent": {
"terms": {
"field": "req.headers.user-agent.keyword",
"size": 10
}
}
}
}
},
"size": 30,
"stored_fields": ["req.headers.user-agent.keyword"]
}
POST phrase_index/_search
{
"query": {
"range": {
"timestamp": {
"gte": "now-15m/m"
}
}
},
"aggs": {
"group_by_userAgent": {
"terms": {
"field": "req.headers.user-agent.keyword",
"size": 10
}
}
},
"size": 30,
"stored_fields": ["req.headers.user-agent.keyword"]
}
You need a filter aggregation first to apply the range query, then add a terms sub-aggregation.
See: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-filter-aggregation.html

bucket script not working - elasticsearch 2.4.2

I have tried to subtract the aggregations
{
"query": {
"match_all": {}
},
"size": 0,
"aggs": {
"total_query_id": {
"sum": {
"field": "query_id"
}
},
"total_num_results": {
"sum": {
"field": "num_results"
}
},
"minus_value": {
"bucket_script": {
"buckets_path": {
"qid": "total_query_id",
"nrs": "total_num_results"
},
"script": "qid - nrs"
}
}
}
}
it throws the below error
"reason": "Invalid pipeline aggregation named [minus_value] of type [bucket_script]. Only sibling pipeline aggregations are allowed at the top level"
I have moved to back and forth minus_value node to aggs node but it does not solve my problem.
can anyone help me on this?
The idea is that pipeline aggregations must work on a parent bucket aggregation.
It is not the case in your example, so you must have one parent aggregation. Since you have a match_all query, you could try using a global bucket aggregation and then embed your 3 aggregations inside it, like this:
{
"query": {
"match_all": {}
},
"size": 0,
"aggs": {
"all": {
"global": {},
"aggs": {
"total_query_id": {
"sum": {
"field": "query_id"
}
},
"total_num_results": {
"sum": {
"field": "num_results"
}
},
"minus_value": {
"bucket_script": {
"buckets_path": {
"qid": "total_query_id",
"nrs": "total_num_results"
},
"script": "qid - nrs"
}
}
}
}
}
}

elasticsearch filter aggs by doc count

I have a query that counts the number of images per user:
GET images/_search
{
"query": {
"bool": {
"must": [
{
"term": {
"appID.raw": "myApp"
}
}
]
}
},
"size": 0,
"aggs": {
"perDeviceAggregation": {
"terms": {
"field": "deviceID"
}
}
}
}
It basically works fine, but I would like to exclude all aggregation results for users that have less than 200 images. How can I tweak the query above to achieve this?
Thanks.
You can achieve this by using a Minimum Document Count option.
"aggs": {
"perDeviceAggregation": {
"terms": {
"field": "deviceID",
"min_doc_count": 200
}
}
}
Add a filter aggregation to your terms aggregation with the query clause.
Filter Aggregations
You can modify your above query to look like this.
{
"query": {
"bool": {
"must": [
{
"term": {
"appID.raw": "myApp"
}
}
]
}
},
"size": 0,
"aggs": {
"filtered_users_with_images_count": {
"filter": {
"term": {
"count": 200
}
},
"aggs": {
"perDeviceAggregation": {
"terms": {
"field": "deviceID"
}
}
}
}
}
}
You can modify the filter inside filtered_users_with_images_count to match documents with images greater than 200.
Please also consider to post your data mappings along with query to support your questions.

sorting elasticsearch top hits results

I am trying to execute a query in elasticsearch to get reuslt of specific users from certain date range. the results should be grouped by userId and sorted on trackTime field, I am able to use group by using aggregation but i am not able to sort aggregation buckets on tracktime, i write down the following query
GET _search
{
"size": 0,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"range": {
"trackTime": {
"from": "2016-02-08T05:51:02.000Z"
}
}
}
]
}
},
"filter": {
"terms": {
"userId": [
9,
10,
3
]
}
}
}
},
"aggs": {
"by_district": {
"terms": {
"field": "userId"
},
"aggs": {
"tops": {
"top_hits": {
"size": 2
}
}
}
}
}
}
what more should i have to use to sort the top hits result? Thanks in advance...
You can use sort like .
"aggs": {
"by_district": {
"terms": {
"field": "userId"
},
"aggs": {
"tops": {
"top_hits": {
"sort": [
{
"fieldName": {
"order": "desc"
}
}
],
"size": 2
}
}
}
}
}
Hope it helps

Resources