Sorting aggregation resultset in Elasticsearch and filtering - elasticsearch

Data sample in Elasticsearch index:
"_source": {
"Type": "SELL",
"Id": 31,
"status": "YES",
"base": "FIAT",
"orderDate": "2019-02-01T05:00:00.000Z",
}
I need to
1. Filter the records based on 'base'=? and 'Type'=? THEN
2. get the top of stack or latest records for these filtered records for each Id and THEN
3. from the results of these I need only the records with 'status'= 'YES'.
Elasticsearch query I wrote:
{
"size":0,
"query":{
"bool":{
"must":[
{ "match":{ "base":"FIAT" } },
{ "match":{ "Type":"SELL" } }
]
}
},
"aggs":{
"sources":{
"terms":{ "field":"Id" },
"aggs":{
"latest":{
"top_hits":{
"size":1,
"_source":{
"includes":[
"Id",
"orderDate",
"status"
]
},
"sort":{ "orderDate":"desc" }
}
}
}
}
}
}

Did you try using composite aggregations.
Composite Aggregations in ElasticSearch

Related

Can I alter the score of results based on a query within an Elasticsearch Aggregation?

I'm using an Elasticsearch filter aggregation with a nested top_hits aggregation to retrieve top matching documents based on different filters, but I can't seem to change the scores of results in each bucket via boosting or a nested function_score query. Is this just not possible? I haven't found any explicit documentation saying it won't work, and the query executes just fine, however the resulting scores aren't impacted.
Example query (note the huge boost in the first aggregation):
GET _search
{
"size":0,
"query":{
"bool":{
"should":[
{
"multi_match":{
"type":"phrase",
"query":"TV",
"fields":[
"categories^4"
]
}
}
]
}
},
"aggs":{
"1":{
"filter":{
"bool":{
"must":[
{
"multi_match":{
"type":"phrase",
"query":"Music",
"fields":[
"categories^10"
]
}
}
]
}
},
"aggs":{
"1_hits":{
"top_hits":{
"size":10,
"sort":[
{
"_score":{
"order":"desc"
}
}
]
}
}
}
},
"2":{
"filter":{
"bool":{
"must":[
{
"multi_match":{
"type":"phrase",
"query":"Music",
"fields":[
"categories"
]
}
}
]
}
},
"aggs":{
"2_hits":{
"top_hits":{
"size":10,
"sort":[
{
"_score":{
"order":"desc"
}
}
]
}
}
}
}
}
}

Query based on Fields existing in different Indices in Elasticsearch

I've got the following query
{
"from":0,
"size":50000,
"_source":[
"T121",
"timestamp"
],
"sort":{
"timestamp":{
"order":"asc"
}
},
"query":{
"bool":{
"must":{
"range":{
"timestamp":{
"gte":"2017-01-17 11:44:41.347",
"lte":"2017-02-18 11:44:47.878"
}
}
},
"must":{
"exists":{
"field":"T121"
}
}
}
}
}
http://172.22.23.169:9200/index1,index2,Index3/_search?pretty
With this URL i want to query over a number of indices in Elasticsearch and only return those documents where a specific field exists.
Is it possible to put in a list of fields in the "exists" clause where i define
if "field1" OR "field2" OR "fiedl3" are existing in one of the documents return it, otherwise don't, or do i have to script such a case?
To search across all indices use > http://172.22.23.169:9200/_search?pretty
To search across selected indices add following filter to "bool" filter
"must": {
"terms": {
"_index": [
"index1",
"index2"
]
}
}
For OR'ing multiple "exists", you can use should clause with multiple exists and specify "minimum_should_match" to control searched records.
{
"from":0,
"size":50000,
"_source":[
"T121",
"timestamp"
],
"sort":{
"timestamp":{
"order":"asc"
}
},
"query":{
"bool":{
"must":{
"range":{
"timestamp":{
"gte":"2017-01-17 11:44:41.347",
"lte":"2017-02-18 11:44:47.878"
}
}
},
"should":[
{
"exists":{
"field":"field1"
}
},
{
"exists":{
"field":"field2"
}
},
{
"exists":{
"field":"field3"
}
}
]
}
}
}

Combine two function_score queries in dis_max

I would like to make a query with two subqueries, each of them has it's own scoring based on function_score with script. For example, this subquery:
{
"query":{
"function_score":{
"query":{
"bool":{
"filter":[
{
"term":{
"rooms_count":3
}
},
{
"term":{
"addresses":"d76255c8-3173-4db5-a39b-badd3ebdf851"
}
},
{
"exists":{
"field":"zhk_id"
}
}
]
}
},
"script_score":{
"script":"1 * doc['price'].value/100000"
},
"boost_mode":"replace"
}
}
}
works fine, and it's score is based on price (about 190 points). But if I try to combine two subqueries in dis_max query, function_score is not working and I get scores about 1 point.
Explanation for each subquery looks like this
"value": 100.9416, "description": "script score function, computed with script:"[script: 1 * doc['price'].value/100000, type: inline, lang: null, params: {}]" and parameters:
{}",
for dis_max query like
"value": 1, "description": "ConstantScore(function score (#rooms_count: #addresses:d76255c8-3173-4db5-a39b-badd3ebdf851 #ConstantScore(fieldnames:zhk_id),function=script[script: 1 * doc['price'].value/100000, type: inline, lang: null, params: {}])), product of:",`
Can anybody tell me, how to combine function_score queries properly?
My full dis_max query on pastebin
Thanks to Daniel Mitterdorfer from https://discuss.elastic.co/t/combine-two-function-score-queries-in-dis-max/70666.
correct query is
{
"query":{
"dis_max":{
"queries":[
{
"function_score":{
"query":{
"bool":{
"filter":[
{
"term":{
"rooms_count":3
}
},
{
"term":{
"addresses":"d76255c8-3173-4db5-a39b-badd3ebdf851"
}
},
{
"missing":{
"field":"zhk_id"
}
}
]
}
},
"script_score":{
"script":"1 * doc['price'].value/100000"
},
"boost_mode":"replace"
}
},
{
"function_score":{
"query":{
"bool":{
"filter":[
{
"term":{
"rooms_count":3
}
},
{
"term":{
"addresses":"d76255c8-3173-4db5-a39b-badd3ebdf851"
}
},
{
"exists":{
"field":"zhk_id"
}
}
]
}
},
"script_score":{
"script":"1 * doc['price'].value/100000"
},
"boost_mode":"replace"
}
}
]
}
}
}

How to select fields after aggregation in Elastic Search 2.3

I have following schema for an index:-
PUT
"mappings": {
"event": {
"properties": {
"#timestamp": { "type": "date", "doc_values": true},
"partner_id": { "type": "integer", "doc_values": true},
"event_id": { "type": "integer", "doc_values": true},
"count": { "type": "integer", "doc_values": true, "index": "no" },
"device_id": { "type": "string", "index":"not_analyzed","doc_values":true }
"product_id": { "type": "integer", "doc_values": true},
}
}
}
I need result equivalent to following query:-
SELECT product_id, device_id, sum(count) FROM index WHERE partner_id=5 AND timestamp<=end_date AND timestamp>=start_date GROUP BY device_id,product_id having sum(count)>1;
I am able to achieve the result by following elastic query:-
GET
{
"store": true,
"size":0,
"aggs":{
"matching_events":{
"filter":{
"bool":{
"must":[
{
"term":{
"partner_id":5
}
},
{
"range":{
"#timestamp":{
"from":1470904000,
"to":1470904999
}
}
}
]
}
},
"aggs":{
"group_by_productid": {
"terms":{
"field":"product_id"
},
"aggs":{
"group_by_device_id":{
"terms":{
"field":"device_id"
},
"aggs":{
"total_count":{
"sum":{
"field":"count"
}
},
"sales_bucket_filter":{
"bucket_selector":{
"buckets_path":{
"totalCount":"total_count"
},
"script": {"inline": "totalCount > 1"}
}
}
}
}}
}
}
}
}
}'
However for the case where count is <=1 query is returning empty buckets with key as product_id. Now out of 40 million groups, only 100k will have satisfy the condition, so I am returned with huge result set, majority of which is useless. How can I select only particular field after aggregation? I tried this but not working- `"fields": ["aggregations.matching_events.group_by_productid.group_by_device_id.buckets.key"]
Edit:
I have following set of data:-
device id Partner Id Count
db63te2bd38672921ffw27t82 367 3
db63te2bd38672921ffw27t82 272 1
I go this output:-
{
"took":6,
"timed_out":false,
"_shards":{
"total":5,
"successful":5,
"failed":0
},
"hits":{
"total":7,
"max_score":0.0,
"hits":[
]
},
"aggregations":{
"matching_events":{
"doc_count":5,
"group_by_productid":{
"doc_count_error_upper_bound":0,
"sum_other_doc_count":0,
"buckets":[
{
"key":367,
"doc_count":3,
"group_by_device_id":{
"doc_count_error_upper_bound":0,
"sum_other_doc_count":0,
"buckets":[
{
"key":"db63te2bd38672921ffw27t82",
"doc_count":3,
"total_count":{
"value":3.0
}
}
]
}
},
{
"key":272,
"doc_count":1,
"group_by_device_id":{
"doc_count_error_upper_bound":0,
"sum_other_doc_count":0,
"buckets":[
]
}
}
]
}
}
}
}
As you can see, bucket with key 272 is empty which make sense, but shouldn't this bucket be removed from result set altogether?
I've just found out that there is a fairly recent issue and PR that adds a _bucket_count path to a buckets_path option so that an aggregation can potentially filter the parent bucket based on the number of buckets another aggregation has. In other words if the _bucket_count is 0 for a parent bucket_selector the bucket should be removed.
This is the github issue: https://github.com/elastic/elasticsearch/issues/19553

Filtering nested aggregations in ElasticSearch

Given the following mapping from my index (Items):
{
"title": {
"type":"string"
},
"tag_groups": {
"type":"nested",
"include_in_parent":true,
"properties": {
"name":{
"type":"string",
"index":"not_analyzed"
},
"terms": {
"type":"string",
"index":"not_analyzed"
}
}
}
}
And the following sample of data that each document in the index follows:
{
"title":"Christian Louboutin Magenta Leather Lady Peep",
"tag_groups": [
{
"name": "Color",
"terms": ["pink"]
},
{
"name":"Material/Fabric",
"terms":["leather"]
},
{
"name":"Season",
"terms":["summer", "spring"]
},
{
"name":"Occasion",
"terms":["cocktail", "night out", "wedding: for the guests", "date night"]
}
],
}
IMPORTANT: These tag_groups are variable from product to product and category to category. So pulling them out of the nested property would be tough since it would create index properties that don't apply to all documents in the index.
Here is my query that is producing the correct aggregated results across each tag_groups.name and corresponding set of values. Counts are accurate too.
{
"size":"40",
"query": {
"filtered": {
"query": {"match_all": {}}
}
},
"aggs":{
"tagGroupAgg": {
"nested": {
"path":"tag_groups"
},
"aggs":{
"tagGroupNameAgg":{
"terms":{
"field":"tag_groups.name"
},
"aggs":{
"tagGroupTermsAgg":{
"terms": {
"field":"tag_groups.terms"
}
}
}
}
}
}
}
}
NOW FOR THE QUESTION...
In order for the aggregation counts on the left to reflect accurately, when I apply a TermsFilter to the aggregation (tag_groups.Color = ['pink']), how do I make sure that aggregation filter isn't applied to the tag_groups.Color result?
Currently, when I apply that filter I am losing all of my tag_groups.Colors (except for pink) preventing the user from search other colors...
I'm hitting a wall on this one. Any help would be much appreciated!
{
"size":"40",
"query":{
"filtered":{
"query":{
"match_all":{
}
}
}
},
"aggs":{
"tagGroupAgg":{
"nested":{
"path":"tag_groups"
},
"aggs":{
"tagGroupNameAgg":{
"terms":{
"field":"tag_groups.name"
},
"aggs":{
"tagGroupTermsAgg":{
"terms":{
"field":"tag_groups.terms"
},
"aggs":{
"tagGroupTermsReverseAgg":{
"reverse_nested":{
},
"aggs":{
"testingReverseFilter":{
"filter":{
"bool":{
"must":[
{
"terms":{
"tag_groups.name":[
"Color"
]
}
},
{
"terms":{
"brand_name.raw":[
"Chanel"
]
}
}
]
}
},
"aggs":{
"tagGroupTermsAgg2":{
"terms":{
"field":"tag_groups.terms"
}
}
}
}
}
}
}
}
}
}
}
}
}
}

Resources