Elastic search find sum of two fields in single query - elasticsearch

I have a requirement of find sum of two fields in a single query. I have managed to find the sum of one field, but facing difficulty to add two aggression in a single query.
My json look like the following way
{
"_index": "outboxprov1",
"_type": "message",
"_id": "JXpDpNefSkKO-Hij3T9m4w",
"_score": 1,
"_source": {
"team_id": "1fa86701af05a863f59dd0f4b6546b32",
"created_user": "1a9d05586a8dc3f29b4c8147997391f9",
"created_ip": "192.168.2.245",
"folder": 1,
"post_count": 5,
"sent": 3,
"failed": 2,
"status": 6,
"message_date": "2014-08-20T14:30Z",
"created_date": "2014-06-27T04:34:30.885Z"
}
}
My search query
{
"query": {
"filtered": {
"query": {
"match": {
"team_id": {
"query": "1fa86701af05a863f59dd0f4b6546b32"
}
}
},
"filter": {
"and": [
{
"term": {
"status": "6"
}
}
]
}
}
},
"aggs": {
"intraday_return": {
"sum": {
"field": "sent"
}
}
},
"aggs": {
"intraday_return": {
"sum": {
"field": "failed"
}
}
}
}
How to put two aggression in one query? Please help me to solve this issue. Thank you

You can compute the sum using script
Example:
{
"size": 0,
"aggregations": {
"age_ranges": {
"range": {
"script": "DateTime.now().year - doc[\"birthdate\"].date.year",
"ranges": [
{
"from": 22,
"to": 25
}
]
}
}
}
}
your query should contain
"script" : "doc['sent'].value+doc['failed'].value"

There can be multiple sub aggregates
{
"query": {
"filtered": {
"query": {
"match": {
"team_id": {
"query": "1fa86701af05a863f59dd0f4b6546b32"
}
}
},
"filter": {
"and": [
{
"term": {
"status": "6"
}
}
]
}
}
},
"aggs": {
"intraday_return_sent": {
"sum": {
"field": "sent"
}
},
"intraday_return_failed": {
"sum": {
"field": "failed"
}
}
}
}

Related

ElasticSearch: Aggregations for the count of documents that has empty nested array

Question- I want a count of documents where the nested array MATCHES is empty like "MATCHES": [ ].
My document structure looks like this(shows two records for simplicity) -
{
"hits": [
{
"_type": "_doc",
"_id": "ef0a2c44179a513476b080cc2a585d95",
"_source": {
"DIVISION_NUMBER": 44,
"MATCHES": [
{
"MATCH_STATUS": "APPROVED",
"UPDATED_ON": 1599171303000
}
]
}
},
{
"_type": "_doc",
"_id": "ef0a2c44179a513476b080cc2a585d95",
"_source": {
"DIVISION_NUMBER": 44,
"MATCHES": [ ]
}
}
]
}
Solution tried- I tried following different ways (workaround) of aggregation (empty-match-agg1,empty-match-agg2 ...) but none of these gave correct results. Please help!
"aggs": {
"sku": {
"nested": {
"path": "MATCHES"
},
"aggs": {
"empty-match-agg1": {
"missing": {
"field": "MATCHES"
}
},
"empty-match-agg2": {
"terms": {
"field": "MATCHES",
"missing": "N/A"
}
},
"empty-match-agg3": {
"sum": {
"script": {
"lang": "painless",
"source": "params['_source'].MATCHES"
}
}
},
"empty-match-agg4": {
"filter": {
"bool": {
"must_not": {
"nested": {
"query": {
"match_all": {}
},
"path": "MATCHES"
}
}
}
}
},
"empty-match-agg5": {
"terms": {
"field": "MATCHES"
}
}
}
}
}
Missing aggregation does not support nested field for now. There is open issue as of now.
To get count of empty matches, you can use a filter aggregation with the nested query wrapped into the must_not clause of the bool query.
{
"aggs": {
"missing_matches_agg": {
"filter": {
"bool": {
"must_not": {
"nested": {
"query": {
"match_all": {}
},
"path": "MATCHES"
}
}
}
}
}
}
}

ElasticSearch query with prefix for aggregation

I am trying to add a prefix condition for my ES query in a "must" clause.
My current query looks something like this:
body = {
"query": {
"bool": {
"must":
{ "term": { "article_lang": 0 }}
,
"filter": {
"range": {
"created_time": {
"gte": "now-3h"
}
}
}
}
},
"aggs": {
"articles": {
"terms": {
"field": "article_id.keyword",
"order": {
"score": "desc"
},
"size": 1000
},
"aggs": {
"score": {
"sum": {
"field": "score"
}
}
}
}
}
}
I need to add a mandatory condition to my query to filter articles whose id starts with "article-".
So, far I have tried this:
{
"query": {
"bool": {
"should": [
{ "term": { "article_lang": 0 }},
{ "prefix": { "article_id": {"value": "article-"} }}
],
"filter": {
"range": {
"created_time": {
"gte": "now-3h"
}
}
}
}
},
"aggs": {
"articles": {
"terms": {
"field": "article_id.keyword",
"order": {
"score": "desc"
},
"size": 1000
},
"aggs": {
"score": {
"sum": {
"field": "score"
}
}
}
}
}
}
I am fairly new to ES and from the documentations online, I know that "should" is to be used for "OR" conditions and "must" for "AND". This is returning me some data but as per the condition it will be consisting of either article_lang=0 or articles starting with article-. When I use "must", it doesn't return anything.
I am certain that there are articles with id starting with this prefix because currently, we are iterating through this result to filter out such articles. What am I missing here?
In your prefix query, you need to use the article_id.keyword field, not article_id. Also, you should prefer filter over must since you're simply doing yes/no matching (aka filters)
{
"query": {
"bool": {
"filter": [ <-- change this
{
"term": {
"article_lang": 0
}
},
{
"prefix": {
"article_id.keyword": { <-- and this
"value": "article-"
}
}
}
],
"filter": {
"range": {
"created_time": {
"gte": "now-3h"
}
}
}
}
},
"aggs": {
"articles": {
"terms": {
"field": "article_id.keyword",
"order": {
"score": "desc"
},
"size": 1000
},
"aggs": {
"score": {
"sum": {
"field": "score"
}
}
}
}
}
}

Elasticsearch distinct records in order with pagination

How do I get records after aggregation on a terms field in order with pagination. So far I have this:
{
"query": {
"bool": {
"filter": [
{
"terms": {
"user_id.keyword": [
"user#domain.com"
]
}
},
{
"range": {
"creation_time": {
"gte": "2019-02-04T19:00:00.000Z",
"lte": "2019-05-04T19:00:00.000Z"
}
}
}
],
"should": [
{
"wildcard": {
"operation": "*sol*"
}
},
{
"wildcard": {
"object_id": "*sol*"
}
},
{
"wildcard": {
"user_id": "*sol*"
}
},
{
"wildcard": {
"user_type": "*sol*"
}
},
{
"wildcard": {
"client_ip": "*sol*"
}
},
{
"wildcard": {
"country": "*sol*"
}
},
{
"wildcard": {
"workload": "*sol*"
}
}
]
}
},
"aggs": {
"user_ids": {
"terms": {
"field": "country.keyword",
"include": ".*United.*"
}
}
},
"from": 0,
"size": 10,
"sort": [
{
"creation_time": {
"order": "desc"
}
}
]
}
I looked into this and some people say its possible by using composite aggregations or by using partitions. But I am not sure how I can actually achieve this.
I also looked into bucket_sort but I cant seem to get it to work:
"my_bucket_sort": {
"bucket_sort": {
"sort": [
{
"user_ids": {
"order": "desc"
}
}
],
"size": 3
}
}
I am a noob at this. Kindly help me out. Thanks.
As the field is country, and presumably doesn't have a high cardinality, you could set size to be a sufficiently high number to return all countries in a single request
"aggs": {
"user_ids": {
"terms": {
"field": "country.keyword",
"include": ".*United.*",
"size": 10000
}
}
}
Or alternatively, for a high cardinality field, you could filter the aggregation first, and then use partitioning to page through the values
{
"size": 0,
"aggs": {
"user_ids": {
"filter": {
"wildcard" : { "country" : ".*United.*" }
},
"aggs": {
"countries": {
"terms": {
"field": "country.keyword",
"include": {
"partition": 0,
"num_partitions": 20
},
"size": 10000
}
}
}
}
}
}
where you would increase the value of partition with each query you send up to 19
See the elastic documentation for further details

Filtered aggregation query error

I am trying to run a filtered aggregation like below but getting error.
"Unknown key for a START_OBJECT in [associations]: [disabledDate]. Can anyone review the query and suggest any changes required.
STEPS in the query:
1. Query all documents with versionDate less than or equal to the given
date.
2. Aggregate on Id.
3. Run a subaggregation top hits query with missing disabledDate filter.
4. apply post filter for missing disabledDate.
{
"query": {
"bool": {
"must": [
{
"range": {
"versionDate": {
"from": null,
"to": "2016-05-25T20:53:22.742Z",
"include_lower": false,
"include_upper": true
}
}
},
{
"terms": {
"domainId": [
"yy"
]
}
},
{
"terms": {
"termId": [
"rr"
]
}
}
]
}
},
"aggregations": {
"associations": {
"terms": {
"field": "id",
"size": 0,
"execution_hint": "global_ordinals_low_cardinality",
"order": {
"_term": "asc"
},
"disabledDate": {
"filters": {
"missing": {
"field": "disbaledDate"
}
},
"aggregations": {
"top": {
"top_hits": {
"size": 1,
"_source": {
"includes": [],
"excludes": []
},
"sort": [
{
"versionDate": {
"order": "desc"
}
}
]
}
}
}
}
}
}
},
"post_filter": {
"missing": {
"field": "disabledDate"
}
}
}

Elasticsearch facets filters

I've created a facet using elasticsearch but I want to filter it just for specific words.
{
...
"facets": {
"my_facets": {
"terms": {
"field": "description",
"size": 1000
}
}
}
}
And the result contains all the words from description .
{
"my_facet": {
"_type": "terms",
"missing": 0,
"total": 180,
"other": 0,
"terms": [
{
"term": "și",
"count": 1
},
{
"term": "światłowska",
"count": 1
},
{
"term": "łódź",
"count": 1
}
]
}
}
I want my facets to contain an analyze just for specific words not for entire words finded in description .
I've already tried to use a query match inside my facet but it makes an overall analyze
like follows
{
"query_Facet_test": {
"query": {
"match": {
"description": "word1 word2"
}
}
}
}
and the result I get :
{
"query_Facet_test": {
"_type": "query",
"count": 1
}
}
You can use a bool query like this to get query facets
{
"query": {
"bool": {
"must": [
{
"match": {
"description": "word1"
}
},
{
"match": {
"description": "word2"
}
}
]
}
},
"facets": {
"my_facets": {
"terms": {
"field": "description",
"size": 1000
}
}
}
}

Resources