How to group documents by hour without day in elasticsearch? - elasticsearch

i have a application who a survey is asked every day by users, and i want to have average answers hours. I tried some request but i can't group all documents by hours, it's grouped by hour by day..
I do this :
{
"aggs": {
"byHour": {
"date_histogram": {
"field": "date",
"interval": "hour",
"format" : "H"
}
}
}
}
}
It's wrapped by hour but also by date, and i want day as ignored.
[
{
"key_as_string": "0",
"key": 1533945600000,
"doc_count": 40,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "1",
"key": 1533949200000,
"doc_count": 345,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "23",
"key": 1534028400000,
"doc_count": 15,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "0",
"key": 1534032000000,
"doc_count": 0,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "1",
"key": 1534035600000,
"doc_count": 2,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
},
{
"key_as_string": "2",
"key": 1534039200000,
"doc_count": 3,
"group_by_state": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
]
Mapping of type
{
"myIndex": {
"mappings": {
"answer": {
"properties": {
"date": {
"type": "date"
},
"lang": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"level": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"offset": {
"type": "long"
},
"patientCaretrackId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"protocolId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"query": {
"properties": {
"constant_score": {
"properties": {
"filter": {
"properties": {
"bool": {
"properties": {
"must": {
"properties": {
"term": {
"properties": {
"questionId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"questionnaireId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
}
}
}
}
}
},
"questionId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"questionnaireId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"surgeonId": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"fielddata": true
}
}
}
}
}
}
Example of documents :
[
{
"date": "2018-09-11T00:00:00.000Z",
"lang": "fr",
"level": "red",
"offset": 21,
"patientCaretrackId": "5b894b10a9f7afec73762113",
"protocolId": "ptg-koos-long-v1",
"questionnaireId": "j21",
"surgeonId": "699362de-f040-4799-b1ea-53f5b4a2fe03",
"value": "permanentes",
"questionId": "frequence-douleur-2"
},
{
"date": "2018-09-11T00:00:00.000Z",
"lang": "fr",
"level": "red",
"offset": 21,
"patientCaretrackId": "5b894b10a9f7afec73762113",
"protocolId": "ptg-koos-long-v1",
"questionnaireId": "j21",
"surgeonId": "699362de-f040-4799-b1ea-53f5b4a2fe03",
"value": "permanentes",
"questionId": "frequence-douleur-2"
}
]
It's possible to do this with an query Elasticsearch ?
Thank you,

You can use a terms aggregation with a script:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-script
GET /_search
{
"aggs" : {
"hours" : {
"terms" : {
"script" : {
"source": "doc['date'].getHour()",
"lang": "painless"
}
}
}
}
}
(Just to give you an idea, not sure at all about the script itself ..)

I found thx
{
"size": 0,
"aggs": {
"byHour": {
"date_histogram": {
"field": "date",
"interval": "hour",
"format" : "H",
"keyed": true,
"time_zone": "+02:00"
}
}
}
}
The response :
{
"took": 9,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2796,
"max_score": 0,
"hits": []
},
"aggregations": {
"byHour": {
"buckets": {
"17": {
"key_as_string": "17",
"key": 1536159600000,
"doc_count": 2006
},
"18": {
"key_as_string": "18",
"key": 1536163200000,
"doc_count": 790
}
}
}
}
}

Related

elastic search nested sub aggregations

We are using elastic search which holds records as documents with following definition
{
"loadtender": {
"aliases": {},
"mappings": {
"_doc": {
"_meta": {
"version": 20
},
"properties": {
"carrierId": {
"type": "long"
},
"destinationData": {
"type": "keyword"
},
"destinationZip": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 50
}
}
},
"effStartTime": {
"type": "date"
},
"endTime": {
"type": "date"
},
"id": {
"type": "long"
},
"mustRespondByTime": {
"type": "date"
},
"orgdiv": {
"type": "keyword"
},
"originData": {
"type": "keyword"
},
"originZip": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 50
}
}
},
"purchaseOrderNum": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 255
}
}
},
"startTime": {
"type": "date"
},
"tenderStatus": {
"type": "keyword"
},
"tenderedTime": {
"type": "date"
}
}
}
},
"settings": {
"index": {
"creation_date": "1655105542470",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "ohcXgA8EQ5iJj0X6_4BqXA",
"version": {
"created": "6080499"
},
"provided_name": "loadtender"
}
}
}
}
I am trying to search records to return me following filtered results
Input Parameter : startDate (yesterday), originData.originCity and originData.destinationCity
Output Required:
Three buckets for 0-30 days, 30-60 days and 60-90 days
buckets of distinct originData.city and destinationData.city combinations under each of the above
Under each of the above, buckets of data for each unique carrierId and the corresponding record list / count
Basically I was trying to achieve something like the below
{
"aggregations": {
"aggr": {
"buckets": [
{
"key": "0-30 days",
"doc_count": 10,
"aggr": {
"buckets": [
{
"key": "(originCity)Menasha, WI, US|Hanover, MD, US (DestinationCity)",
"aggr": {
"buckets": [
{
"key": "10183-carrierId",
"count": 10
}
]
}
}
]
}
},
{
"key": "30-60 days",
"doc_count": 11,
"aggr": {
"buckets": [
{
"key": "Dallas, TX, US|Houston, TX, US",
"aggr": {
"buckets": [
{
"key": "10183-carrierId",
"count": 10
},
{
"key": "10022-carrierId",
"count": 1
}
]
}
}
]
}
}
]
}
}
}
I've tried the following but I think I am not finding a way to filter it further using the sub aggregators.
{
"_source":["id", "effStartTime", "carrierId", "originData", "destinationData"],
"size": 100,
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"range": {
"startTime": {
"from": "2021-08-27T23:59:59.000Z",
"to": "2022-09-01T00:00:00.000Z",
"include_lower": true,
"include_upper": true,
"boost": 1
}
}
}
],
"adjust_pure_negative": true,
"boost": 1
}
}
],
"must_not": [
{
"term": {
"tenderStatus": {
"value": "REMOVED",
"boost": 1
}
}
}
],
"filter" : {
"exists" : {
"field" : "carrierId"
}
},
"adjust_pure_negative": true,
"boost": 1
}
},
"aggregations": {
"aggr": {
"terms": {
"script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]"
}
}
}
}
I started beginning to think if this is even possible OR should I shift to issuing multiple queries for the same
I was able to achieve the same using the following sub-aggregations:
"aggregations": {
"aggr":{
"date_range": {
"field": "startTime",
"format": "MM-yyyy",
"ranges": [
{"to": "now-1M/M", "from": "now"}, --> now to 30 days back
{"to": "now-1M/M", "from": "now-2M/M"}, from 30 days back to 60 days back
{"to": "now-2M/M", "from": "now-3M/M"}, from 60 days back to 90 days back
{"to": "now-3M/M", "from": "now-12M/M"}
]
},
"aggregations": {
"aggr":{
"terms": {
"script": "doc['originData'].values[0] + '|' + doc['destinationData'].values[0]" --> concatenated origin and destination address as a key
},
"aggregations": {
"aggr": {
"terms": {
"field": "carrierId" --> nested carrier count
}
}
}
}
}
}
}
Following is the response template that I receive.
"aggregations": {
"aggr": {
"buckets": [
{
"key": "09-2021-06-2022",
"from": 1630454400000,
"from_as_string": "09-2021",
"to": 1654041600000,
"to_as_string": "06-2022",
"doc_count": 1,
"aggr": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Dallas, TX, US|Houston, TX, US",
"doc_count": 14,
"aggr": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 10022,
"doc_count": 14
}
]
}
}
]
}
}
]
}
}
Thank you to all of you for your efforts and time. Do let me know if you discover any better way.

Elasticsearch filtered aggregations, returned bucketed keys are not split up specifically

I have many file assets stored across multiple folders. What I'm trying to do is run a text string query on this set of filenames, return the matched file parameters, and how often it appears in each folder. However with the attached query, I am not getting the full filename parameters for each filtered result:
Here's the query:
"aggs": {
"filenames": {
"filter": {
"term": {"filename": "foo"}
},
"aggs": {
"files_count": {
"terms": {
"field": "filename",
"size": 100
},
"aggs": {
"folder_count": {
"terms": {
"field": "folder"
}
}
}
}
}
}
},
"size": 0
}
And results looks like something this:
"aggregations": {
"filenames": {
"doc_count": 1218,
"files_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "foo",
"doc_count": 1218,
"folder_count": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 1139,
"buckets": [
{
"key": "1575569706838",
"doc_count": 8
},
{
"key": "1575656106314",
"doc_count": 8
},
{
"key": "1575742506771",
"doc_count": 8
},
{
"key": "1575828907233",
"doc_count": 8
},
{
"key": "1575915306570",
"doc_count": 8
},
{
"key": "1576001707455",
"doc_count": 8
},
{
"key": "1576088108154",
"doc_count": 8
},
{
"key": "1576174506235",
"doc_count": 8
},
{
"key": "1576347307560",
"doc_count": 8
},
{
"key": "1576260907130",
"doc_count": 7
}
]
}
},
...
Here is a sample of my index data:
{
"screens": {
"mappings": {
"properties": {
"date": {
"type": "date"
},
"extension": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"filename": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"fielddata": true
},
"folder": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"fielddata": true
},
"format": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"path": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
The key: queryString returned is only a partial, or different snippet of a filename field. What do I need to include in order to get the fully matched filename in this query? Ideally instead of key: queryString, I'd like to separate it by unique filenames instead of matching everything together. Do I need another level of aggs for filenames between the filtered results and folders? How would I do that?
Thanks in advance.
The filename field is most probably of type text, hence analyzed and indexed into tokens, which is why your bucket keys are the way they are.
You need to run the terms aggregation on the filename.keyword sub-field, like this:
{
"aggs": {
"filenames": {
"filter": {
"term": {
"filename.keyword": "queryString" <---- change the field name here
}
},
"aggs": {
"files_count": {
"terms": {
"field": "filename.keyword", <---- change the field name here
"size": 100
},
"aggs": {
"folder_count": {
"terms": {
"field": "folder.keyword" <---- change the field name here
}
}
}
}
}
}
},
"size": 0
}

how do I implement a single-word auto complete using Elasticsearch 6

I would like to implement an single word autocomplete using elasticsearch 6. I have seen a fair amount of posts on how to do this using lesser versions however, it seems that autocomplete has changed significantly in the last version.
I am using the standard mapping for autocomplete:
PUT advertising_tins
{
"settings": {
"analysis": {
"analyzer": {
"completion_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"completion_filter"
],
"tokenizer": "keyword"
}
},
"filter": {
"completion_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 24
}
}
}
},
"mappings": {
"item": {
"properties": {
"date": {
"type": "long"
},
"id": {
"type": "text"
},
"title": {
"type": "text"
},
"suggest": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
},
"completion": {
"type": "text",
"analyzer": "completion_analyzer",
"search_analyzer": "standard"
}
}
}
}
}
}
}
I am indexing like this:
POST advertising_tins/item/_bulk
{"index":{}}
{"date": 20180217, "title": "Vintage Spice Cardboard Tin of Mace Dainty Brand St. Paul, MN 1 oz.","id": "305232814","suggest": [ "spice","cardboard","tin","mace","dainty","brand","st","paul","mn","oz"]}
And querying like this:
POST advertising_tins/_search?pretty
{
"size": 0,
"query": {
"term": {
"suggest.completion": "car"
}
},
"aggs": {
"suggestions": {
"terms": {
"field": "suggest.raw"
}
}
}
}
However my results return all terms in the suggest field instead of just single term "cardboard".
{
"took": 4,
"hits": {
"total": 1,
"max_score": 0,
"hits": []
},
"aggregations": {
"suggestions": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "brand",
"doc_count": 1
},
{
"key": "cardboard",
"doc_count": 1
},
{
"key": "dainty",
"doc_count": 1
},
{
"key": "mace",
"doc_count": 1
},
{
"key": "mn",
"doc_count": 1
},
{
"key": "oz",
"doc_count": 1
},
{
"key": "paul",
"doc_count": 1
},
{
"key": "spice",
"doc_count": 1
},
{
"key": "st",
"doc_count": 1
},
{
"key": "tin",
"doc_count": 1
}
]
}
}
}
And idea how I fix this and get just a single term match?
You are almost there. It can be achieved with the default Completion Suggester, you only need to change the type of your completion field to "completion":
"mappings": {
"item": {
"properties": {
"date": {
"type": "long"
},
"id": {
"type": "text"
},
"title": {
"type": "text"
},
"suggest": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
},
"completion": {
"type" : "completion", <--- here
"analyzer": "completion_analyzer",
"search_analyzer": "standard"
}
}
}
}
}
}
And add a "suggest" part into your query:
POST advertising_tins/_search
{
"size": 0,
"query": {
"term": {
"suggest.completion": "car"
}
},
"suggest" : { <--- Here goes he suggest query
"my-suggestion" : {
"text" : "car",
"completion" : {
"field" : "suggest.completion"
}
}
},
"aggs": {
"suggestions": {
"terms": {
"field": "suggest.raw"
}
}
}
}
The response will look like this:
{
// ...
"hits": //... ,
"aggregations": // ...,
"suggest": {
"my-suggestion": [
{
"text": "car",
"offset": 0,
"length": 3,
"options": [
{
"text": "cardboard", <--- here is the suggestion
"_index": "advertising_tins",
"_type": "item",
"_id": "GLeUqGEBVrFe7u7pR5uA",
"_score": 1,
"_source": {
"date": 20180217,
"title": "Vintage Spice Cardboard Tin of Mace Dainty Brand St. Paul, MN 1 oz.",
"id": "305232814",
"suggest": [
"spice",
"cardboard",
"tin",
"mace",
"dainty",
"brand",
"st",
"paul",
"mn",
"oz"
]
}
}
]
}
]
}
}
The response also includes the _source of the suggested document, so you might not even need to use "query" and "aggs" parts.
Hope that helps!

Elasticsearch: Ordering with respect to Nested Sum Aggregation

The use case is to create aggregations to find top selling products in a region sorted by the counts of products sold. The data is stored in an index in elasticsearch.
I want to sort my 'group_by_name' aggregations output by the sum_quantity value aggregation which is in the last nested aggregation/ two levels in after 'group_by_name' with an intermediate aggregation 'group_by_sku'. The default output is sorted by doc_count. I want the aggregation to be sorted by the 'sum_quantity' aggregation value.
I have an index with the following mapping:
{"settings": {
"index": {
"number_of_shards": 2,
"number_of_replicas": 0
},
"analysis":{
"analyzer":{
"autocomplete":{
"type":"custom",
"tokenizer":"standard",
"filter":[ "standard", "lowercase", "ngram" ]
}
},
"filter":{
"ngram":{
"type":"ngram",
"min_gram":3,
"max_gram":25
}
}
} },"mappings": {
"farmer_products_map":{
"properties": {
"state": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"district": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"taluka": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"village": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"createdOn": {
"type": "date",
"format": "epoch_millis"
},
"category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"brand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"productName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
},
"autocomplete":{
"analyzer": "autocomplete",
"type": "string"
}
}
},
"crop": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"sku": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"quantity": {
"type": "integer"
},
"farmerId": {
"type": "integer"
},
"orderId": {
"type": "integer"
}
}} }}
The following are the sample documents:
[{
"_index": "farmer_products_index_adv",
"_type": "farmer_products_map",
"_id": "AVtCttf0IP9v8cUTtoiz",
"_score": 1,
"_source": {
"orderId": 469173,
"category": "Hardware",
"farmerId": 509583,
"district": "",
"brand": "Honda",
"taluka": "",
"crop": "",
"productName": "Honda BRUSH CUTTER UMK 435-T U2NT",
"state": "",
"sku": "AGS-HW-471",
"village": "",
"quantity": 1
},{
"_index": "farmer_products_index_adv",
"_type": "farmer_products_map",
"_id": "AVtCttf0IP9v8cUTtoi1",
"_score": 1,
"_source": {
"orderId": 469177,
"category": "Crop Nutrition",
"farmerId": 13732,
"district": "Banaskantha",
"brand": "Unassigned Brand",
"taluka": "Kankrej",
"crop": "",
"productName": "Free Power Gel - Plant Nutrient (500 Ml)",
"state": "Gujarat",
"sku": "AGS-CN-006",
"village": "Nanota",
"quantity": 1
}}]
I wish to perform the following query aggregation:
{
"query": {
"bool": {
"must": [{
"match": {
"state": {
"query": "Maharashtra",
"fuzziness": 3,
"prefix_length": 2
}
}
}, {
"match": {
"district": {
"query": "Wardha",
"fuzziness": 3,
"prefix_length": 2
}
}
}, {
"match": {
"taluka": {
"query": "Wardha",
"fuzziness": 3,
"prefix_length": 2
}
}
}]
}
},
"size": 0,
"aggs": {
"group_by_state": {
"terms": {
"field": "state.keyword"
},
"aggs": {
"group_by_district": {
"terms": {
"field": "district.keyword"
},
"aggs": {
"group_by_taluka": {
"terms": {
"field": "taluka.keyword"
},
"aggs": {
"group_by_name": {
"terms": {
"field": "productName.keyword"
},
"aggs": {
"group_by_sku": {
"terms": {
"field": "sku.keyword"
},
"aggs": {
"sum_quantity": {
"sum": {
"field": "quantity"
}
}
}
}
}
}
}
}
}
}
}
}
}}
The current output of the aggregation is:
[{
"key": "Free MH HDPE Tarpaulin Tape Black 3mtr roll",
"doc_count": 13,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-410",
"doc_count": 13,
"sum_quantity": {
"value": 13
}
}]}}, {
"key": "Tarpaulin Sheet 11*15 (Tadpatri) 250 GSM",
"doc_count": 10,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-326",
"doc_count": 10,
"sum_quantity": {
"value": 10
}
}]
}}, {
"key": "Free Humic power Advanced powder 95% (250 Gms)",
"doc_count": 6,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-CN-036",
"doc_count": 6,
"sum_quantity": {
"value": 18
}
}]
}}]
I want the output to be sorted by the sum_quantity value:
[{
"key": "Free Humic power Advanced powder 95% (250 Gms)",
"doc_count": 6,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-CN-036",
"doc_count": 6,
"sum_quantity": {
"value": 18
}
}]}}, {
"key": "Free MH HDPE Tarpaulin Tape Black 3mtr roll",
"doc_count": 13,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-410",
"doc_count": 13,
"sum_quantity": {
"value": 13
}
}]
}}, {
"key": "Tarpaulin Sheet 11*15 (Tadpatri) 250 GSM",
"doc_count": 10,
"group_by_sku": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [{
"key": "AGS-HW-326",
"doc_count": 10,
"sum_quantity": {
"value": 10
}
}]
}}]
How can I achieve this? I tried approaches suggested in other stackoverflow questions using "reverse_nested" but was unable to arrive at a solution.

how to compare two aggregations in elastic search

I have a stream of transaction data, which I'm grouping my 10m interval and counting the number of transactions in one aggregation, and moving average in another. I would like to query the results only for the case where total_count is > moving average.
This query returns just fine.
GET /_search
{
"aggs": {
"my_date_histo":{
"date_histogram":{
"field":"created_at",
"interval":"10m"
},
"aggs":{
"the_count":{
"value_count" : {"field" : "user_id"}
},
"the_movavg":{
"moving_avg":{
"buckets_path": "the_count" ,
"window": 5,
"model": "simple"
}
}
}
}
}
}
But when I try the following it throws error,
GET /_search
{
"aggs": {
"my_date_histo":{
"date_histogram":{
"field":"created_at",
"interval":"10m"
},
"aggs":{
"the_count":{
"value_count" : {"field" : "user_id"}
},
"the_movavg":{
"moving_avg":{
"buckets_path": "the_count" ,
"window": 5,
"model": "simple"
}
},
"final_filter": {
"bucket_selector": {
"buckets_path": {
"TheCount": "the_count",
"TheMovAvg": "the_movavg"
},
"script": "params.TheCount > params.TheMovAvg"
}
}
}
}
}
}
EDIT :
Mapping
{
"transaction-live": {
"mappings": {
"logs": {
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"correspondent_id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"created_at": {
"type": "date"
},
"discount": {
"type": "float"
},
"endpoint": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"event_type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fees": {
"type": "float"
},
"from_country_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"from_currency_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"fx_sent_receive": {
"type": "float"
},
"receive_amount": {
"type": "float"
},
"response_code": {
"type": "long"
},
"send_amount": {
"type": "float"
},
"source": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"source_version": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"startedtransaction_id": {
"type": "long"
},
"to_country_code": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_agent": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user_id": {
"type": "long"
}
}
}
}
}
}
ERROR:
{
"error": {
"root_cause": [],
"type": "reduce_search_phase_exception",
"reason": "[reduce] ",
"phase": "fetch",
"grouped": true,
"failed_shards": [],
"caused_by": {
"type": "script_exception",
"reason": "runtime error",
"caused_by": {
"type": "null_pointer_exception",
"reason": null
},
"script_stack": [
"params.TheCount > params.TheMovAvg",
" ^---- HERE"
],
"script": "params.TheCount > params.TheMovAvg",
"lang": "painless"
}
},
"status": 503
}
I played around with your query a bit and found the issue.
Following is the working query you can use
{
"size": 0,
"aggs": {
"my_date_histo": {
"date_histogram": {
"field": "created_at",
"interval": "10m"
},
"aggs": {
"the_count": {
"value_count": {
"field": "user_id"
}
},
"the_movavg": {
"moving_avg": {
"buckets_path": "the_count",
"window": 5,
"model": "simple"
}
},
"final_filter": {
"bucket_selector": {
"buckets_path": {
"TheCount": "the_count",
"TheMovAvg": "the_movavg"
},
"script": "params.TheCount > (params.TheMovAvg == null ? 0 : params.TheMovAvg)"
}
}
}
}
}
}
Now to understand the issue, take the look at the following result of aggregation without the bucket_selector aggregation.
{
"took": 10,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 42,
"max_score": 0,
"hits": []
},
"aggregations": {
"my_date_histo": {
"buckets": [
{
"key_as_string": "2017-03-06T15:30:00.000Z",
"key": 1488814200000,
"doc_count": 14,
"the_count": {
"value": 14
}
},
{
"key_as_string": "2017-03-06T15:40:00.000Z",
"key": 1488814800000,
"doc_count": 0,
"the_count": {
"value": 0
}
},
{
"key_as_string": "2017-03-06T15:50:00.000Z",
"key": 1488815400000,
"doc_count": 14,
"the_count": {
"value": 14
},
"the_movavg": {
"value": 7
}
},
{
"key_as_string": "2017-03-06T16:00:00.000Z",
"key": 1488816000000,
"doc_count": 3,
"the_count": {
"value": 3
},
"the_movavg": {
"value": 14
}
},
{
"key_as_string": "2017-03-06T16:10:00.000Z",
"key": 1488816600000,
"doc_count": 8,
"the_count": {
"value": 7
},
"the_movavg": {
"value": 8.5
}
},
{
"key_as_string": "2017-03-06T16:20:00.000Z",
"key": 1488817200000,
"doc_count": 3,
"the_count": {
"value": 3
},
"the_movavg": {
"value": 6.375
}
}
]
}
}
}
if you observe the result above the first two buckets don't compute the moving_aggs for that window/setting for moving_agg. So when your filter selector was comparing it was throwing null pointer exception on runtime as JAVA compare operator throws null pointer exception.
Hope this helps you.
Thanks

Resources