Percentage for each bucket - elasticsearch

Im trying to get the percentage for each bucket in elasticsearch 7.1 with this query:
{
"size":0,
"aggs":{
"group_by_status":{
"terms":{
"field":"status.keyword"
},
"aggs":{
"percentage":{
"sum":{
"script":"100/total"
}
}
}
},
"total":{
"sum_bucket":{
"buckets_path":"group_by_status>_count"
}
}
}
}
This one doesnt work because you cant use the total count of status and I get error that the variable total is not define but I want to know if theres a way to get this result with the percentage of each bucket:
{
"aggregations":{
"group_by_status":{
"doc_count_error_upper_bound":0,
"sum_other_doc_count":0,
"buckets":[
{
"key":"Abierto",
"doc_count":2,
"percentage":{
"value":40.0
}
},
{
"key":"Cerrado",
"doc_count":2,
"percentage":{
"value":40.0
}
},
{
"key":"Pausado",
"doc_count":1,
"percentage":{
"value":20.0
}
}
]
},
"total":{
"value":5.0
}
}
}

Related

Nested aggregation with term agg

I have a document with 2 nested paths - path.to.node and different.path.
I want to be able to get a date histogram based on the path.to.node.date field but then group the buckets based on different.path.to.name.
Is that possible?
I tried something like this but it doesn't seem to work...
{
"size":0,
"query":{...},
"aggregations":{
"path.to.node.date":{
"nested":{
"path":"path.to.node"
},
"aggregations":{
"path.to.node.date":{
"filter":{
"range":{...}
}
},
"aggregations":{
"different.path.name":{
"nested":{
"path":"different.path"
},
"terms":{
"field":"different.path.name"
...
},
"aggregations":{
"path.to.node.date":{
"date_histogram":{
"field":"path.to.node.date",
"interval":"1M",
"offset":0,
"order":{"_key":"asc"},
"keyed":false,"min_doc_count":0}
}
}
}
}
}
}
}
}
}

Why search performance is difference between from&size and search&after

There are hundreds of millions of documents in my index. When I search, I find that search&after is much slower than from&size。 Use from&size,search is quick,took several ms return,but use search after,it took 20 seconds。My search result is sort by time and key(a keyword copy of _id) ,Why?what's the difference?
search cmd:
{
"query":{
"bool":{
"filter":[
{
"query_string":{
"query":"*"
}
},
{
"range":{
"__time__":{
"gte":1324958207,
"lte":1724958207
}
}
}
]
}
},
"size":10,
"sort":[
{
"__time__":{
"order":"desc"
}
},
{
"__key__":{
"order":"desc"
}
}
],
"search_after":[
1630594662000,
"6130e666-2-67e9e3-f5-1"
],
"profile":true
}
profile:
{"searches":[
{
"query":[
{
"type":"BoostQuery",
"description":"(ConstantScore(DocValuesFieldExistsQuery [field=__time__]))^0.0",
"time_in_nanos":45722536283,
"breakdown":{
"set_min_competitive_score_count":0,
"match_count":0,
"shallow_advance_count":0,
"set_min_competitive_score":0,
"next_doc":45722241414,
"match":0,
"next_doc_count":410919487,
"score_count":0,
"compute_max_score_count":0,
"compute_max_score":0,
"advance":19517,
"advance_count":39,
"score":0,
"build_scorer_count":78,
"create_weight":14271,
"shallow_advance":0,
"create_weight_count":1,
"build_scorer":261081
},
"children":[
{
"type":"DocValuesFieldExistsQuery",
"description":"DocValuesFieldExistsQuery [field=__time__]",
"time_in_nanos":16571715415,
"breakdown":{
"set_min_competitive_score_count":0,
"match_count":0,
"shallow_advance_count":0,
"set_min_competitive_score":0,
"next_doc":16571493898,
"match":0,
"next_doc_count":410919487,
"score_count":0,
"compute_max_score_count":0,
"compute_max_score":0,
"advance":15074,
"advance_count":39,
"score":0,
"build_scorer_count":78,
"create_weight":517,
"shallow_advance":0,
"create_weight_count":1,
"build_scorer":205926
}
}
]
}
],
"rewrite_time":116538,
"collector":[
{
"name":"PagingFieldCollector",
"reason":"search_top_hits",
"time_in_nanos":30851166561
}
]
}
],
"aggregations":[
]
}
Because search_after use scroll approach for filtering which supposed to sort all the data before filtering, instead of from/size which only retrieved elements as a stream, and will be slower and hit memory as much as you go deeper with a from

Useing filtering bucket to filter a moving average result

I do need to know if I can filter based on my moving average results? ( I just want the ones that are = 100 ) I try to use filtering bucket but always receive an error
"Unknown key for a START_OBJECT in [hostname_bucket_filter]."
Do not know what else to do, this is the query:
"aggs":{
"aggs_host":{
"terms":{
"field":"dimensions.hostname.value",
"size":10,
"order":{"_key":"desc"},
"min_doc_count":1},
"aggs":{
"aggs_fs":{
"terms":{
"field":"dimensions.mount.value",
"size":10,
"order":{"_key":"desc"},
"min_doc_count":1
},
"aggs":{
"agg_date":{
"date_histogram":{
"interval":"1h",
"field":"timestamp",
"min_doc_count":0
},
"aggs":{
"agg_ave":{
"avg":{
"field":"value.double"
}
},
"aggs_ma":{
"moving_avg":{
"buckets_path":"agg_ave",
"window":6,
"model":"holt_winters",
"settings" : {
"type": "mult",
"alpha": 0.6,
"beta": 0.8,
"gamma": 0.6,
"period" : 3
},
"predict":7,
"minimize":false
}
}
}
}
}
}
}
}
},
"hostname_bucket_filter" : {
"bucket_selector" : {
"bucket_path" : {
"the_hosts_filt": "aggs_host>aggs_fs>agg_date>agg_ave.aggs_ma"
},
"script" : "params.the_hosts_filt >= 100"
}
}

Has Child join field issue

Good day:
I've setup a Parent/Child relationship model between Facility and FacilityType. Currently I'm trying to query the Facility and at the same time trying to load the children by using the HasChild query but, I'm getting the following error:
{
"aggs":{
"Capacity":{
"children":{
"type":"facilitytype"
},
"aggs":{
"Capacity":{
"histogram":{
"field":"capacity",
"interval":10.0,
"missing":0.0
}
}
}
},
"Distance":{
"histogram":{
"field":"businessLocation",
"interval":10.0,
"order":{
"_count":"desc"
}
}
}
},
"query":{
"bool":{
"should":[
{
"bool":{
"must":[
{
"geo_distance":{
"boost":1.1,
"distance":"200.0m",
"distance_type":"arc",
"businessLocation":{
"lat":38.958878299999988,
"lon":-77.365260499999991
}
}
},
{
"has_child":{
"_name":"FacilityType",
"type":"doc",
"query":{
"match_all":{
}
}
}
}
]
}
},
{
"geo_distance":{
"boost":1.1,
"distance":"200.0m",
"distance_type":"arc",
"serviceAreas":{
"lat":38.958878299999988,
"lon":-77.365260499999991
}
}
}
]
}
}
}
I'm getting this error when I execute the query:
[has_child] join field [joinField] doesn't hold [doc] as a child

Mixed filters, using OR as well as AND, in ElasticSearch

In your opinion what would be the best way to do the following?
I want to filter an ElasticSearch query by several ranges that are grouped in an OR filter, and then by one final range that needs to be included as an AND filter. The explanation is a bit crappy but hopefully the pseudo-code below will help...
Basically I tried structuring the following query:
{
"query":{
"multi_match":{
"query":"blue",
"fields":[
"name"
]
}
},
"sort":{
"_score":{
"order":"desc",
"missing":"_last"
}
},
"from":"0",
"size":"24",
"facets":{
"rating":{
"range":{
"field":"rating",
"ranges":[
{
"from":1
},
{
"from":2
},
{
"from":3
},
{
"from":4
}
]
}
},
"price":{
"range":{
"field":"price",
"ranges":[
{
"to":10
},
{
"from":10,
"to":100
},
{
"from":100,
"to":1000
}
{
"from":1000
}
]
}
}
},
"filter":{
"or":[
{
"range":{
"price":{
"from":"10",
"to":"100"
}
}
},
{
"range":{
"price":{
"from":"100",
"to":"1000"
}
}
}
],
"and":{
"numeric_range":{
"rating":{
"gte":"4"
}
}
}
}
}
This failed with the error that there was "No parser for element [numeric_range]". So I tried replacing:
"and":{
"numeric_range":{
"rating":{
"gte":"4"
}
}
}
with:
"numeric_range":{
"rating":{
"gte":"4"
}
}
The query now returns results but it's returning results with prices in the ranges 10-100, 100-1000 and ANY results with a rating greater than 4 (even if their price is outside of the defined range).
Any clues on how I could do this query? Do I need to be using a bool filter?
Ah ha, figured it out, with the help of Boaz Leskes over on the ElasticSearch mailing list!
It should be structured like this:
filter: {
bool: {
must: [
{
"numeric_range":{
"rating":{
"gte":"4"
}
}
}
],
should: [
{
"range":{
"price":{
"from":"10",
"to":"100"
}
}
},
{
"range":{
"price":{
"from":"100",
"to":"1000"
}
}
}
]
}
}

Resources