Filtered aggregation query error - elasticsearch

I am trying to run a filtered aggregation like below but getting error.
"Unknown key for a START_OBJECT in [associations]: [disabledDate]. Can anyone review the query and suggest any changes required.
STEPS in the query:
1. Query all documents with versionDate less than or equal to the given
date.
2. Aggregate on Id.
3. Run a subaggregation top hits query with missing disabledDate filter.
4. apply post filter for missing disabledDate.
{
"query": {
"bool": {
"must": [
{
"range": {
"versionDate": {
"from": null,
"to": "2016-05-25T20:53:22.742Z",
"include_lower": false,
"include_upper": true
}
}
},
{
"terms": {
"domainId": [
"yy"
]
}
},
{
"terms": {
"termId": [
"rr"
]
}
}
]
}
},
"aggregations": {
"associations": {
"terms": {
"field": "id",
"size": 0,
"execution_hint": "global_ordinals_low_cardinality",
"order": {
"_term": "asc"
},
"disabledDate": {
"filters": {
"missing": {
"field": "disbaledDate"
}
},
"aggregations": {
"top": {
"top_hits": {
"size": 1,
"_source": {
"includes": [],
"excludes": []
},
"sort": [
{
"versionDate": {
"order": "desc"
}
}
]
}
}
}
}
}
}
},
"post_filter": {
"missing": {
"field": "disabledDate"
}
}
}

Related

Is it possible to fetch count of total number of docs that contain a qualifying aggregation condition in elasticsearch?

I use ES v7.3 and as per my requirements I am aggregating some fields to fetch the required docs in response, further their is a requirement to fetch the count of total number of all such docs also that contain the nested field which qualifies the aggregation condition as described below but I did not find a way where I am able to do that.
Current aggregation query that I am using to fetch the documents is,
"aggs": {
"users": {
"composite": {
"sources": [
{
"users": {
"terms": {
"field": "co_profileId.keyword"
}
}
}
],
"size": 5000
},
"aggs": {
"sessions": {
"nested": {
"path": "co_score"
},
"aggs": {
"last_4_days": {
"filter": {
"range": {
"co_score.sessionTime": {
"gte": "2021-01-10T00:00:31.399Z",
"lte": "2021-01-14T01:37:31.399Z"
}
}
},
"aggs": {
"score_count": {
"sum": {
"field": "co_score.value"
}
}
}
}
}
},
"page_view_count_filter": {
"bucket_selector": {
"buckets_path": {
"sessionCount": "sessions > last_4_days > score_count"
},
"script": "params.sessionCount > 100"
}
},
"filtered_users": {
"top_hits": {
"size": 1,
"_source": {
"includes": [
"co_profileId",
"co_type",
"co_score"
]
}
}
}
}
}
}
Sample doc:
{
"co_profileId": "14654325",
"co_type": "identify",
"co_updatedAt": "2021-01-11T11:37:33.499Z",
"co_score": [
{
"value": 3,
"sessionTime": "2021-01-09T01:37:31.399Z"
},
{
"value": 3,
"sessionTime": "2021-01-10T10:47:33.419Z"
},
{
"value": 6,
"sessionTime": "2021-01-11T11:37:33.499Z"
}
]
}

Elasticsearch distinct records in order with pagination

How do I get records after aggregation on a terms field in order with pagination. So far I have this:
{
"query": {
"bool": {
"filter": [
{
"terms": {
"user_id.keyword": [
"user#domain.com"
]
}
},
{
"range": {
"creation_time": {
"gte": "2019-02-04T19:00:00.000Z",
"lte": "2019-05-04T19:00:00.000Z"
}
}
}
],
"should": [
{
"wildcard": {
"operation": "*sol*"
}
},
{
"wildcard": {
"object_id": "*sol*"
}
},
{
"wildcard": {
"user_id": "*sol*"
}
},
{
"wildcard": {
"user_type": "*sol*"
}
},
{
"wildcard": {
"client_ip": "*sol*"
}
},
{
"wildcard": {
"country": "*sol*"
}
},
{
"wildcard": {
"workload": "*sol*"
}
}
]
}
},
"aggs": {
"user_ids": {
"terms": {
"field": "country.keyword",
"include": ".*United.*"
}
}
},
"from": 0,
"size": 10,
"sort": [
{
"creation_time": {
"order": "desc"
}
}
]
}
I looked into this and some people say its possible by using composite aggregations or by using partitions. But I am not sure how I can actually achieve this.
I also looked into bucket_sort but I cant seem to get it to work:
"my_bucket_sort": {
"bucket_sort": {
"sort": [
{
"user_ids": {
"order": "desc"
}
}
],
"size": 3
}
}
I am a noob at this. Kindly help me out. Thanks.
As the field is country, and presumably doesn't have a high cardinality, you could set size to be a sufficiently high number to return all countries in a single request
"aggs": {
"user_ids": {
"terms": {
"field": "country.keyword",
"include": ".*United.*",
"size": 10000
}
}
}
Or alternatively, for a high cardinality field, you could filter the aggregation first, and then use partitioning to page through the values
{
"size": 0,
"aggs": {
"user_ids": {
"filter": {
"wildcard" : { "country" : ".*United.*" }
},
"aggs": {
"countries": {
"terms": {
"field": "country.keyword",
"include": {
"partition": 0,
"num_partitions": 20
},
"size": 10000
}
}
}
}
}
}
where you would increase the value of partition with each query you send up to 19
See the elastic documentation for further details

Improving performance of Elasticsearch exists query

I have the following query, which finds records that do not contain any of the following fields: timestamp_login, timestamp_logout, timestamp_signup and groups by user_city.
{
"query": {
"bool": {
"must": [],
"must_not": [
{
"exists": {
"field": "timestamp_login"
}
},
{
"exists": {
"field": "timestamp_logout"
}
},
{
"exists": {
"field": "timestamp_signup"
}
}
]
}
},
"aggs": {
"group_by_item": {
"terms": {
"script": "doc['user_city.keyword'].value?.toLowerCase()",
"size": 10,
"order": {
"_count": "desc"
}
}
},
"distinct_terms": {
"cardinality": {
"script": "doc['user_city.keyword'].value?.toLowerCase()"
}
}
},
"size": 0
}
However, the query often times out. Is there a more efficient way to pull records where a list of fields are missing? Also, I'm running ES 5.6.
Thanks for your help!

Elastic search find sum of two fields in single query

I have a requirement of find sum of two fields in a single query. I have managed to find the sum of one field, but facing difficulty to add two aggression in a single query.
My json look like the following way
{
"_index": "outboxprov1",
"_type": "message",
"_id": "JXpDpNefSkKO-Hij3T9m4w",
"_score": 1,
"_source": {
"team_id": "1fa86701af05a863f59dd0f4b6546b32",
"created_user": "1a9d05586a8dc3f29b4c8147997391f9",
"created_ip": "192.168.2.245",
"folder": 1,
"post_count": 5,
"sent": 3,
"failed": 2,
"status": 6,
"message_date": "2014-08-20T14:30Z",
"created_date": "2014-06-27T04:34:30.885Z"
}
}
My search query
{
"query": {
"filtered": {
"query": {
"match": {
"team_id": {
"query": "1fa86701af05a863f59dd0f4b6546b32"
}
}
},
"filter": {
"and": [
{
"term": {
"status": "6"
}
}
]
}
}
},
"aggs": {
"intraday_return": {
"sum": {
"field": "sent"
}
}
},
"aggs": {
"intraday_return": {
"sum": {
"field": "failed"
}
}
}
}
How to put two aggression in one query? Please help me to solve this issue. Thank you
You can compute the sum using script
Example:
{
"size": 0,
"aggregations": {
"age_ranges": {
"range": {
"script": "DateTime.now().year - doc[\"birthdate\"].date.year",
"ranges": [
{
"from": 22,
"to": 25
}
]
}
}
}
}
your query should contain
"script" : "doc['sent'].value+doc['failed'].value"
There can be multiple sub aggregates
{
"query": {
"filtered": {
"query": {
"match": {
"team_id": {
"query": "1fa86701af05a863f59dd0f4b6546b32"
}
}
},
"filter": {
"and": [
{
"term": {
"status": "6"
}
}
]
}
}
},
"aggs": {
"intraday_return_sent": {
"sum": {
"field": "sent"
}
},
"intraday_return_failed": {
"sum": {
"field": "failed"
}
}
}
}

elasticsearch facets OR filter

I have a problem with my elasticsearch DSL, in that when using facet navigation, when I apply my facet filter, the next set of results don't include any further facets, even though I've asked for them.
When I do the initial search, I get the results I want back:
{
"sort": {
"_score": {},
"salesQuantity": {
"order": "asc"
}
},
"query": {
"filtered": {
"query": {
"match": {
"categoryTree": "D01"
}
},
"filter": {
"term": {
"publicwebEnabled": true,
"parentID": 0
}
}
}
},
"facets": {
"delivery_locations": {
"terms": {
"field": "delivery_locations",
"all_terms": true
}
},
"categories": {
"terms": {
"field": "categoryTree",
"all_terms": true
}
},
"collectable": {
"terms": {
"field": "collectable",
"all_terms": true
}
}
},
"from": 0,
"size": 12}
When I then apply a filter like so, the results I get back do not include the facets:
{
"sort": {
"_score": {},
"salesQuantity": {
"order": "asc"
}
},
"query": {
"filtered": {
"query": {
"match": {
"categoryTree": "D01"
}
},
"filter": {
"term": {
"publicwebEnabled": true,
"parentID": 0
},
"or": [
{
"range": {
"Retail_Price": {
"to": "49.99",
"from": "0"
}
}
}
]
}
}
},
"facets": {
"delivery_locations": {
"terms": {
"field": "delivery_locations",
"all_terms": true
}
},
"categories": {
"terms": {
"field": "categoryTree",
"all_terms": true
}
},
"collectable": {
"terms": {
"field": "collectable",
"all_terms": true
}
}
},
"from": 0,
"size": 12}
NOTE, I'm adding the OR filter above - because users may choose multiple price ranges to filter on.
Am I doing something wrong?
I want the new facets returned as altering the prices would obviously alter the facet counts of the other facets...
Add the original term-filter inside the or-filter, or add another boolean filter to wrap your whole filter inside a boolean expression. I dont think you can add the two filters just by comma-separating them like that.

Resources