I am new to Elastic Search so please forgive me if the answer is obvious.
I have modified a query to use aggs to show 'distinct' results. However, after adding the aggs the size doesn't seem to work anymore - it always returns 10 results no matter what I set size to.
Would anyone know how I could use both aggs and size together?
My query is:
{
"size": "15",
"from": "0",
"query": {
"bool": {
"filter": [
{
"term": {
"category": "Cars"
}
},
{
"term": {
"location": "Sydney"
}
},
{
"term": {
"status": true
}
}
]
}
},
"sort": [
{
"_score": "desc"
},
{
"brand": "asc"
}
],
"aggs": {
"brand": {
"terms": {
"field": "brand",
"order": {
"price": "asc"
}
},
"aggs": {
"brand": {
"top_hits": {
"size": 1,
"sort": [
{
"price": {
"order": "asc"
}
}
]
}
},
"price": {
"min": {
"field": "price"
}
}
}
}
}
}
The size parameter you have mentioned before the query, is used to set the size for the query hits, and will not affect the aggregations bucket size.
Use the size parameter inside the parent aggregation just like you have mentioned in the sub-aggregation as "size":1
The modified query to get top 10 aggs is :
{
"size": "15",
"from": "0",
"query": {
"bool": {
"filter": [
{
"term": {
"category": "Cars"
}
},
{
"term": {
"location": "Sydney"
}
},
{
"term": {
"status": true
}
}
]
}
},
"sort": [
{
"_score": "desc"
},
{
"brand": "asc"
}
],
"aggs": {
"brand": {
"terms": {
"field": "brand",
"size": 10,
"order": {
"price": "asc"
}
},
"aggs": {
"brand": {
"top_hits": {
"size": 1,
"sort": [
{
"price": {
"order": "asc"
}
}
]
}
},
"price": {
"min": {
"field": "price"
}
}
}
}
}
}
Hope this helps.
Related
I have tried the below query for the Pagination on Aggregations but not working properly.
I Am getting the error "reason": "[40:7] [terms] unknown field [from], parser not found"
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"answer.keyword": "UNHANDLED"
}
},
{
"term": {
"source.keyword": "QUAL2"
}
}
]
}
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "question.keyword",
"order": {
"_count": "asc"
},
"size": "10"
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "timestamp",
"order": {
"_count": "asc"
},
"size": "3",
"from": 8
}
}
}
}
}
}
Only size is supported, you have to remove the param from from the aggregation query.
You can try using partitions in the aggreagtion
Try out the below query:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"answer.keyword": "UNHANDLED"
}
},
{
"term": {
"source.keyword": "QUAL2"
}
}
]
}
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "question.keyword",
"order": {
"_count": "asc"
},
"size": "10"
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "timestamp",
"order": {
"_count": "asc"
},
"size": "3",
"include": {
"partition": 1,
"num_partitions": 10
}
}
}
}
}
}
}
I am querying a time series data using the aggregation functionality. The data to be queried is of categorical nature.
I use date histogram to first create buckets. From these generated buckets, I wish to extract actual values from the documents found, forming an array.
A workaround to the solution might be pushing hits object within the aggs object. Don't know how to do this either.
The query:
GET elastiflow-*/_search
{
"size": 10000,
"sort": [
{
"#timestamp": {
"order": "desc",
"unmapped_type":"boolean"
}
}
],
"_source": {
"includes": ["time", "data" ]
}
, "query": {
"bool": {
"filter": {
"range": {
"#timestamp": {
"gte": "now-2d/d",
"lte": "now"
}
}
}
}
}
, "aggs": {
"30secbuckets": {
"date_histogram": {
"field": "time",
"fixed_interval": "30s"
}
, "aggs": {
"average": {
"terms": {
"field": "data"
}
}
}
}
}
}
Thanks!
I think you're looking for the top_hits metric aggregation:
{
"size": 0,
"_source": false,
"query": {
"bool": {
"filter": {
"range": {
"#timestamp": {
"gte": "now-2d/d",
"lte": "now"
}
}
}
}
},
"aggs": {
"30secbuckets": {
"date_histogram": {
"field": "time",
"fixed_interval": "30s"
},
"aggs": {
"hits": {
"top_hits": {
"size": 100,
"sort": [
{
"#timestamp": {
"order": "desc",
"unmapped_type": "boolean"
}
}
],
"_source": {
"includes": [
"time",
"data"
]
}
}
},
"average": {
"terms": {
"field": "data"
}
}
}
}
}
}
I am trying to figure out how to perform a complex query in elastic search, let say I have the following table of data:
Which I got from the following query
{
"aggs": {
"3": {
"terms": {
"field": "ColumnA",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"4": {
"terms": {
"field": "ColumnB",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"5": {
"terms": {
"field": "ColumnC",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"sum_of_views": {
"sum": {
"field": "views"
}
},
"sum_of_costs": {
"sum": {
"field": "cost"
}
},
"sum_of_clicks": {
"sum": {
"field": "clicks"
}
},
"sum_of_earned": {
"sum": {
"field": "earned"
}
},
"sum_of_adv_earned": {
"sum": {
"field": "adv_earned"
}
}
}
}
}
}
}
}
},
"size": 0,
"_source": {
"excludes": []
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "hour",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"range": {
"hour": {
"format": "strict_date_optional_time",
"gte": "2019-08-08T06:29:34.723Z",
"lte": "2020-08-08T06:29:34.724Z"
}
}
}
],
"should": [],
"must_not": []
}
}
}
Now for example, if I want to get the records that have the following condition
(sum_of_clicks / sum_of_views) * (sum_of_earned2 / sum_of_earned1) < 0.5
What should I query?
Think the below should help. My understanding is that you would want to first group based on ColumnA, ColumnB, ColumnC, calculate the sum for clicks, views, earned1 and earned2 fields and then apply the custom aggregation logic you are looking for.
I've been able to come up with the below query where I've made use of Bucket Selector Aggregation.
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"3": {
"terms": {
"field": "ColumnA",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"4": {
"terms": {
"field": "ColumnB",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"5": {
"terms": {
"field": "ColumnC",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"sum_views": {
"sum": {
"field": "views"
}
},
"sum_clicks": {
"sum": {
"field": "clicks"
}
},
"sum_earned1": {
"sum": {
"field": "earned1"
}
},
"sum_earned2": {
"sum": {
"field": "earned2"
}
},
"custom_sum_bucket_filter": {
"bucket_selector": {
"buckets_path": {
"sum_of_views": "sum_views",
"sum_of_clicks": "sum_clicks",
"sum_of_earned1": "sum_earned1",
"sum_of_earned2": "sum_earned2"
},
"script": "(params.sum_of_views/params.sum_of_clicks) * (params.sum_of_earned1/params.sum_of_earned2) < 0.5"
}
}
}
},
"min_bucket_selector": {
"bucket_selector": {
"buckets_path": {
"valid_docs_count": "5._bucket_count"
},
"script": {
"source": "params.valid_docs_count >= 1"
}
}
}
}
},
"min_bucket_selector": {
"bucket_selector": {
"buckets_path": {
"valid_docs_count": "4._bucket_count"
},
"script": {
"source": "params.valid_docs_count >= 1"
}
}
}
}
}
}
}
Note that to get the exact result you are looking for, I've had to add the filter conditions of buckets at 4 and 5.
The aggregations I've made use are
Bucket Selector to calculate the condition you've mentioned
Again Bucket Selector so as to not display empty buckets at aggregation 5
Again a bucket selector so as to now show empty buckets aggregation at level 4.
In order to test why I've added the additional empty bucket filters, you can just remove them and see what results you observe.
Note that for sake of simplicity I have ignored the query part as well as the cost field. Please feel free to add them and test it.
I'm using the following terms aggregations to get views and clicks of each campaign ( by campaign_id ) :
"aggregations": {
"campaigns": {
"terms": {
"field": "campaign_id",
"size": 10,
"order": {
"_term": "asc"
}
},
"aggregations": {
"actions": {
"terms": {
"field": "action",
"size": 10
}
}
}
}}
This is the response I get:
"aggregations": {
"campaigns": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "someId",
"doc_count": 12,
"actions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "click",
"doc_count": 3
},
{
"key": "view",
"doc_count": 9
}
]
}
}
]
}
}
EDIT:
Here is an example of a document ( only the relevant parts of it..):
{
"_index": "action",
"_type": "click",
"_id": "AVI2XOTl8otXlszOjypT",
"_score": 1,
"_source": {
"ip": "127.0.0.1",
"timestamp": "2016-01-12T15:03:23.622743524Z",
"action": "click",
"campaign_id": "IypmiroC"
}}
I need to be able to retrieve the conversion rate of each campaign ( clicks / views ) , and I can't do it on the client side since I need to be able to sort by conversion rate.
Any help would be much appreciated.
This will require use of various aggregations and ES 2.x. First I am getting all unique campaign_id with terms aggregation. Then I am filtering with actions and getting the count of documents with that particular action. Then You need to use pipeline aggregation introduced in ES 2.0, mainly bucket script aggregation to take the ratio. This is how it looks.
{
"size": 0,
"aggs": {
"unique_campaign": {
"terms": {
"field": "campaign_id",
"size": 10
},
"aggs": {
"click_bucket": {
"filter": {
"term": {
"action": "click"
}
},
"aggs": {
"click_count": {
"value_count": {
"field": "action"
}
}
}
},
"view_bucket": {
"filter": {
"term": {
"action": "view"
}
},
"aggs": {
"view_count": {
"value_count": {
"field": "action"
}
}
}
},
"conversion_ratio": {
"bucket_script": {
"buckets_path": {
"total_clicks": "click_bucket>click_count",
"total_views": "view_bucket>view_count"
},
"script": "total_clicks/total_views"
}
}
}
}
}
}
Also, you need to have not_analyzed mapping for action as Click wont match click.
Hope this helps!!
As for now 7.x, sorting can be achieved as follows, just a demo for reference:
bucket_script
bucket_sort
{
"size": 0,
"aggs": {
"mallBucket": {
"terms": {
"field": "mallId",
"size": 20,
"min_doc_count": 3,
"shard_size": 10000
},
"aggs": {
"totalOrderCount": {
"value_count": {
"field": "orderSn"
}
},
"filteredCoupon": {
"filter": {
"terms": {
"tags": [
"hello",
"cool"
]
}
},
"aggs": {
"couponCount": {
"value_count": {
"field": "orderSn"
}
}
}
},
"countRatio": {
"bucket_script": {
"buckets_path": {
"orderCount": "totalOrderCount",
"couponCount": "filteredCoupon>couponCount"
},
"script": "params.couponCount/params.orderCount"
}
},
"ratio_bucket_sort": {
"bucket_sort": {
"sort": [
{
"countRatio": {
"order": "desc"
}
}
],
"size": 20
}
}
}
}
}
}
I am doing an Elasticsearch query and having problems with the aggs going missing.
If I do the query below I get the aggs back without issue:
{
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*wet*",
"fields": [
"Name",
"Summary",
"Description",
"Location",
"Features",
"TypeName",
"CategoryName"
]
},
"filter": {
"term": {
"TypeID": "13"
}
}
}
}
},
"aggs": {
"type": {
"terms": {
"field": "TypeID"
}
},
"category": {
"terms": {
"field": "CategoryID"
}
},
"max_price": {
"max": {
"field": "Price"
}
},
"min_price": {
"min": {
"field": "Price"
}
},
"filter-type": {
"term": {
"TypeID": "13"
}
}
},
"from": 0,
"size": 50,
"sort": {
"_score": {
"order": "desc"
}
},
"explain": false
}
However, as soon as I add a filter, the aggs are no longer returned. I can't see what I am doing wrong so any help would be really appreciated.
The one that doesn't return aggs looks like this:
{
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*wet*",
"fields": [
"Name",
"Summary",
"Description",
"Location",
"Features",
"TypeName",
"CategoryName"
]
},
"filter": {
"term": {
"TypeID": "13"
}
}
}
}
},
"aggs": {
"type": {
"terms": {
"field": "TypeID"
}
},
"category": {
"terms": {
"field": "CategoryID"
}
},
"max_price": {
"max": {
"field": "Price"
}
},
"min_price": {
"min": {
"field": "Price"
}
},
"filter-type": {
"term": {
"TypeID": "13"
}
}
},
"from": 0,
"size": 50,
"sort": {
"_score": {
"order": "desc"
}
},
"explain": false
}
This is the correct syntax for filters aggregation. You have to wrap all you aggregation inside filter like this
{
"size": 0,
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*wet*",
"fields": [
"Name",
"Summary",
"Description",
"Location",
"Features",
"TypeName",
"CategoryName"
]
}
},
"filter": {
"term": {
"TypeID": "13"
}
}
}
},
"aggs": {
"filter-type": {
"filter": {
"term": {
"TypeID": "13"
}
},
"aggs": {
"type": {
"terms": {
"field": "TypeID"
}
},
"category": {
"terms": {
"field": "CategoryID"
}
},
"max_price": {
"max": {
"field": "Price"
}
},
"min_price": {
"min": {
"field": "Price"
}
}
}
}
}
}
Also in this case filter in aggs is redundant as you are already using the same filter in your query.