Elasticsearch query aggregation only terms with a specific pattern - elasticsearch

I have made this query in elasticsearch:
{
"size": 0,
"query": {
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
"_source": {
"excludes": []
},
"aggs": {
"2": {
"terms": {
"field": "tokens.keyword",
"size": 50,
"order": {
"_count": "desc"
}
}
}
}
}
It returns me the 50 most present tokens in my documents.
I want the 50 most present token that start with the specific characters "$".
How can I transform my query to get what I want with the best performances?

This should work !
{
"size": 0,
"query": {
"wildcard": {
"tokens.keyword": "$*"
}
},
"_source": {
"excludes": []
},
"aggs": {
"2": {
"terms": {
"field": "tokens.keyword",
"size": 50,
"order": {
"_count": "desc"
}
}
}
}
}

Related

Elastic (v6.4) aggregation: is it possible to return buckets for documents that are requested but aren't found?

The purpose of this query is to aggregate how many documents of the specified document type each division of the specified leading division has.
{
"query": {
"bool": {
"must": [
{
"term": {
"division.leadingDivisionId": "554"
}
},
{
"terms": {
"documentType.id": [
"72"
]
}
},
{
"exists": {
"field": "registrationNumber"
}
}
]
}
},
"aggs": {
"body": {
"terms": {
"field": "division.leadingDivisionId",
"size": 1500,
"order": {
"_count": "desc"
},
"min_doc_count": 1
},
"aggs": {
"body": {
"terms": {
"field": "division.id",
"size": 1500,
"order": {
"_count": "desc"
},
"min_doc_count": 1
},
"aggs": {
"body": {
"terms": {
"field": "documentType.id",
"size": 1500,
"order": {
"_term": "asc"
},
"min_doc_count": 0
}
}
}
}
}
}
}
}
The issue with this query is that when no documents of the specified type exist in the division, a bucket like this isn't returned:
{
"Key": "72",
"doc_count": 0,
"key_as_string": null,
"Body": null
}
It is only returned when at least some documents exist, but do not correspond the criteria of having a registration number, for example.
Is it possible to get the bucket even in this case, when no such documents exist? The goal is to get the same amount and order of buckets for each division.

Elastic search query Pagination help on Aggregations

I have tried the below query for the Pagination on Aggregations but not working properly.
I Am getting the error "reason": "[40:7] [terms] unknown field [from], parser not found"
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"answer.keyword": "UNHANDLED"
}
},
{
"term": {
"source.keyword": "QUAL2"
}
}
]
}
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "question.keyword",
"order": {
"_count": "asc"
},
"size": "10"
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "timestamp",
"order": {
"_count": "asc"
},
"size": "3",
"from": 8
}
}
}
}
}
}
Only size is supported, you have to remove the param from from the aggregation query.
You can try using partitions in the aggreagtion
Try out the below query:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"answer.keyword": "UNHANDLED"
}
},
{
"term": {
"source.keyword": "QUAL2"
}
}
]
}
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "question.keyword",
"order": {
"_count": "asc"
},
"size": "10"
},
"aggs": {
"MyBuckets": {
"terms": {
"field": "timestamp",
"order": {
"_count": "asc"
},
"size": "3",
"include": {
"partition": 1,
"num_partitions": 10
}
}
}
}
}
}
}

How to perform complex query on aggregated fields in ElasticSearch

I am trying to figure out how to perform a complex query in elastic search, let say I have the following table of data:
Which I got from the following query
{
"aggs": {
"3": {
"terms": {
"field": "ColumnA",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"4": {
"terms": {
"field": "ColumnB",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"5": {
"terms": {
"field": "ColumnC",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"sum_of_views": {
"sum": {
"field": "views"
}
},
"sum_of_costs": {
"sum": {
"field": "cost"
}
},
"sum_of_clicks": {
"sum": {
"field": "clicks"
}
},
"sum_of_earned": {
"sum": {
"field": "earned"
}
},
"sum_of_adv_earned": {
"sum": {
"field": "adv_earned"
}
}
}
}
}
}
}
}
},
"size": 0,
"_source": {
"excludes": []
},
"stored_fields": [
"*"
],
"script_fields": {},
"docvalue_fields": [
{
"field": "hour",
"format": "date_time"
}
],
"query": {
"bool": {
"must": [],
"filter": [
{
"match_all": {}
},
{
"range": {
"hour": {
"format": "strict_date_optional_time",
"gte": "2019-08-08T06:29:34.723Z",
"lte": "2020-08-08T06:29:34.724Z"
}
}
}
],
"should": [],
"must_not": []
}
}
}
Now for example, if I want to get the records that have the following condition
(sum_of_clicks / sum_of_views) * (sum_of_earned2 / sum_of_earned1) < 0.5
What should I query?
Think the below should help. My understanding is that you would want to first group based on ColumnA, ColumnB, ColumnC, calculate the sum for clicks, views, earned1 and earned2 fields and then apply the custom aggregation logic you are looking for.
I've been able to come up with the below query where I've made use of Bucket Selector Aggregation.
POST <your_index_name>/_search
{
"size": 0,
"aggs": {
"3": {
"terms": {
"field": "ColumnA",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"4": {
"terms": {
"field": "ColumnB",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"5": {
"terms": {
"field": "ColumnC",
"order": {
"_key": "desc"
},
"size": 50
},
"aggs": {
"sum_views": {
"sum": {
"field": "views"
}
},
"sum_clicks": {
"sum": {
"field": "clicks"
}
},
"sum_earned1": {
"sum": {
"field": "earned1"
}
},
"sum_earned2": {
"sum": {
"field": "earned2"
}
},
"custom_sum_bucket_filter": {
"bucket_selector": {
"buckets_path": {
"sum_of_views": "sum_views",
"sum_of_clicks": "sum_clicks",
"sum_of_earned1": "sum_earned1",
"sum_of_earned2": "sum_earned2"
},
"script": "(params.sum_of_views/params.sum_of_clicks) * (params.sum_of_earned1/params.sum_of_earned2) < 0.5"
}
}
}
},
"min_bucket_selector": {
"bucket_selector": {
"buckets_path": {
"valid_docs_count": "5._bucket_count"
},
"script": {
"source": "params.valid_docs_count >= 1"
}
}
}
}
},
"min_bucket_selector": {
"bucket_selector": {
"buckets_path": {
"valid_docs_count": "4._bucket_count"
},
"script": {
"source": "params.valid_docs_count >= 1"
}
}
}
}
}
}
}
Note that to get the exact result you are looking for, I've had to add the filter conditions of buckets at 4 and 5.
The aggregations I've made use are
Bucket Selector to calculate the condition you've mentioned
Again Bucket Selector so as to not display empty buckets at aggregation 5
Again a bucket selector so as to now show empty buckets aggregation at level 4.
In order to test why I've added the additional empty bucket filters, you can just remove them and see what results you observe.
Note that for sake of simplicity I have ignored the query part as well as the cost field. Please feel free to add them and test it.

Combine two elastic queries into 1. How?

I have two queries which fetched results when performed a GET operation.
The 1st query is -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"database-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "database-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "database-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "database-status.status-properties.rate-properties.cache-properties.compressed-tree-cache-hit-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
and the 2nd query is -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"database-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "database-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"3": {
"terms": {
"field": "database-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "database-status.status-properties.rate-properties.cache-properties.compressed-tree-cache-miss-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
How do I combine two queries into 1 query and get both the results in the same result sets? Based on this I'll try to replicate the method with other queries and even try to combine 3 or more queries into 1.
There are two options to do that:
using multi search (msearch) will allow you to run one request to ES containing both queries. The response of the msearch will contain both queries responses separately, and you can then choose how to combine the answers.
combine the queries in a single bool:
so lets say you have:
Q1->bool->must->inner-q-1
and Q2->bool->must->inner-q-2
then you can combine them with should:
Q3->bool->should->[inner-q-1, inner-q-2], with minimum_should_match equals 1 (very important!)
I made use of nested aggregation.
Here is the combined code -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"range": {
"server-status.meta.current-time": {
"lte": "now-91d/d"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"time-interval": {
"date_histogram": {
"field": "server-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"http-server": {
"terms": {
"field": "server-status.type.keyword",
"include": "http-server",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-hit-rate.value",
"script": "_value/60"
}
},
"2": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-miss-rate.value",
"script": "_value/60"
}
},
"3": {
"terms": {
"field": "server-status.name.keyword",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-hit-rate.value",
"script": "_value/60"
}
},
"2": {
"sum": {
"field": "server-status.status-properties.expanded-tree-cache-miss-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
}
}

Aggregation query in Kibana

I am having Kibana 5.3. I have created elasticsearch bucket aggs query.
Can I execute this query from kibana to create chart?
Edit:
I mean is there a way just to insert it somewhere? Like JSON input or external script file?
My query:
{"query": {
"bool": {
"must": [
{
"query_string": {
"query": "XXX",
}
},
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"terms": {
"field": "YYY",
"size": 5,
"order": {
"_count": "desc"
}
},
"aggs": {
"3": {
"terms": {
"field": "ZZZ",
"size": 5,
"order": {
"_count": "desc"
}
},
"aggs": {
"4": {
"terms": {
"field": "_type",
"size": 5,
"order": {
"_count": "desc"
}
}
}
}
}
}
}
}
}

Resources