Aggregate, sort and paginate on nested documents - elasticsearch

I'm managing a product index, with product sales and other KPIs under a nested field.
Trying to sort based on nested aggregation, and paginate - with no success.
Below is a simplified version of my mapping, for the sake of the example -
{
"product_type":
{
"type": "keyword"
},
"family":
{
"type": "keyword"
},
"rootdomain":
{
"type": "keyword"
},
"kpis":
{
"type": "nested",
"properties":
{
"sales_1d":
{
"type": "float"
},
"timestamp":
{
"type": "date",
"format": "strict_date_optional_time_nanos"
},
"views_1d":
{
"type": "float"
}
}
}
}
My aggregation is similar to the one below-
{
"aggs": {
"group_by_family": {
"aggs": {
"nested_aggregation": {
"aggs": {
"range_filtered": {
"aggs": {
"sales_1d": {
"sum": {
"field": "kpis.sales_1d"
}
},
"views_1d": {
"sum": {
"field": "kpis.views_1d"
}
},
"reverse_nesting": {
"aggs": {
"docs": {
"top_hits": {
"size": 1,
"sort": [
{
"_id": {
"order": "asc"
}
}
],
"_source": {
"includes": [
"_id",
"family",
"rootdomain",
"product_type"
]
}
}
}
},
"reverse_nested": {}
}
},
"filter": {
"range": {
"kpis.timestamp": {
"format": "basic_date_time_no_millis",
"gte": "20220721T000000Z",
"lte": "20220918T235959Z"
}
}
}
}
},
"nested": {
"path": "kpis"
}
}
},
"terms": {
"field": "family",
"size": 10
}
}
},
"query": {
//some query to filter by product-type and rootdomain
},
"size": 0
}
I'm aware that I can add an order clause to term aggregation to order the aggregated results.
My target though is to paginate the aggregated results - meaning I want to retrieve and order
1-10 best-selling products, and later retrieve 11-20 best-selling products and so on.
I've tried using bucket sort under range_filtered but I'm getting an error -
class org.elasticsearch.search.aggregations.bucket.filter.InternalFilter cannot be cast to class org.elasticsearch.search.aggregations.InternalMultiBucketAggregation
I'm not sure how to proceed from here, is this possible? if not, is there any workaround?
Thanks.

Related

Compose nested aggregations

Im sorry for any english misstake.
i hope that someone can help me.
Supose that i have the following mapping to my index:
PUT test-index
{
"mappings": {
"properties": {
"nestedOBJField": {
"type": "nested",
"index": true
},
"keywordField": {
"type": "keyword",
"index": true
}
}
}
}
It is possible to use the composite feature with nested fields?
It will be very handful if i can do something like this:
GET /test-index/_search
{
"size": 0,
"aggs": {
"TestAgg": {
"composite": {
"size": 10000,
"sources": [
{
"keyWordFieldAgg": {
"terms": {
"field": "keyWordField"
}
},
{
"nestedFieldAgg": {
"terms": {
"field": "nestedOBJField.attribute"
}
}
}
]
}
}
}
}
But this aproach is returning a several number of errors.
I will appreciate a lot if someone can help
Property nestedOBJField is of data type "nested" and property keyWordField is keyword type and at same level as nestedOBJField.
To use nested fields in aggregation , you need to use nested aggregation but then all sources in composite aggegation must be of type nested. This open issue can tell more about it.
You can use following work arounds.
Move keyWordField inside nested object in your documents.
{
"mappings": {
"properties": {
"nestedOBJField": {
"type": "nested",
"properties":{
"keywordField": {
"type": "keyword"
}
}
}
}
}
}
Sample Document
{
"nestedOBJField":[
{
"attribute":"1",
"age":1,
"keywordField":"xyz"
},
{
"attribute":"2",
"age":2,
"keywordField":"xyz"
}
]
}
Query
"aggs": {
"TestAgg": {
"nested": {
"path": "nestedOBJField"
},
"aggs": {
"name": {
"composite": {
"size": 10000,
"sources": [
{
"nestedFieldAgg": {
"terms": {
"field": "nestedOBJField.attribute.keyword"
}
}
},
{
"a":{
"terms": {
"field": "nestedOBJField.keywordField.keyword"
}
}
}
]
}
}
}
}
}
Moving your field inside nested property will mean data duplication , updating data in all nested documents.
Using terms aggregation - pagination will be an issue in this case
{
"size": 0,
"aggs": {
"TestAgg": {
"nested": {
"path": "nestedOBJField"
},
"aggs": {
"name": {
"terms": {
"field": "nestedOBJField.attribute.keyword",
"size": 10
},
"aggs": {
"back_to_parent": {
"reverse_nested": {},
"aggs": {
"keywords": {
"terms": {
"field": "keywordField.keyword",
"size": 10
}
}
}
}
}
}
}
}
}
}

How to diversify the result of top-hits aggregation?

Let's start with a concrete example. I have a document with these fields:
{
"template": {
"mappings": {
"template": {
"properties": {
"tid": {
"type": "long"
},
"folder_id": {
"type": "long"
},
"status": {
"type": "integer"
},
"major_num": {
"type": "integer"
}
}
}
}
}
}
I want to aggregate the query result by field folder_id, and for each group divided by folder_id, retrieve the top-N documents' _source detail. So i write query DSL like:
GET /template/template/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"status": 1
}
}
]
}
},
"aggs": {
"folder": {
"terms": {
"field": "folder_id",
"size": 10
},
"aggs": {
"top_hit":{
"top_hits": {
"size": 5,
"_source": ["major_num"]
}
}
}
}
}
}
However, now comes a requirement that the top hits documents for each folder_id must be diversified on the field major_num. For each folder_id, the top hits documents retrieve by the sub top_hits aggregation under the terms aggregation, must be unique on field major_num, and for each major_num value, return at most 1 document in the sub top hits aggregation result.
top_hits aggregation cannot accept sub-aggregations, so how should i solve the question?
Why not simply adding another terms aggregation on the major_num field ?
GET /template/template/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"status": 1
}
}
]
}
},
"aggs": {
"folder": {
"terms": {
"field": "folder_id",
"size": 10
},
"aggs": {
"majornum": {
"terms": {
"field": "major_num",
"size": 10
},
"aggs": {
"top_hit": {
"top_hits": {
"size": 1
}
}
}
}
}
}
}
}

Unable to create nested date aggregation query

I am trying to create an ElasticSearch aggregation query which can generate sum or average of value in all my ingested documents.
The documents are of the format -
{
"weather":"cold",
"date_1":"2017/07/05",
"feedback":[
{
"date_2":"2017/08/07",
"value":28,
"comment":"not cold"
},{
"date_2":"2017/08/09",
"value":48,
"comment":"a bit chilly"
},{
"date_2":"2017/09/07",
"value":18,
"comment":"very cold"
}, ...
]
}
I am able to create a sum aggregation of all "feedback.value" using "date_1" by using the following request -
GET _search
{
"query": {
"query_string": {
"query": "cold"
}
},
"size": 0,
"aggs": {
"temperature": {
"date_histogram":{
"field" : "date_1",
"interval" : "month"
},
"aggs":{
"temperature_agg":{
"terms": {
"field": "feedback.value"
}
}
}
}
}
}
However, I need to generate the same query across all documents aggregate based on "feedback.date_2". I am not sure if ElasticSearch can resolve such aggregation or how to approach it. Any guidance would be helpful
[EDIT]
Mapping file( I only define the nested items, ES identifes other fields on its own)
{
"mappings": {
"catalog_item": {
"properties": {
"feedback":{
"type":"nested",
"properties":{
"date_2":{
"type": "date",
"format":"YYYY-MM-DD"
},
"value": {
"type": "float"
},
"comment": {
"type": "text"
}
}
}
}
}
}
}
You would need to make use of nested documents and sum aggregation.
Here's a working example:
Sample Mapping:
PUT test
{
"mappings": {
"doc": {
"properties": {
"feedback": {
"type": "nested"
}
}
}
}
}
Add Sample document:
PUT test/doc/1
{
"date_1": "2017/08/07",
"feedback": [
{
"date_2": "2017/08/07",
"value": 28,
"comment": "not cold"
},
{
"date_2": "2017/08/09",
"value": 48,
"comment": "a bit chilly"
},
{
"date_2": "2017/09/07",
"value": 18,
"comment": "very cold"
}
]
}
Calculate both the sum and average based on date_2.
GET test/_search
{
"size": 0,
"aggs": {
"temperature_aggregation": {
"nested": {
"path": "feedback"
},
"aggs": {
"temperature": {
"date_histogram": {
"field": "feedback.date_2",
"interval": "month"
},
"aggs": {
"sum": {
"sum": {
"field": "feedback.value"
}
},
"avg": {
"avg": {
"field": "feedback.value"
}
}
}
}
}
}
}
}

Elasticsearch aggregation doesn't work with nested-type fields

I can't make elasticsearch aggregation+filter to work with nested fields. The data schema (relevant part) is like this:
"mappings": {
"rb": {
"properties": {
"project": {
"type": "nested",
"properties": {
"age": {
"type": "long"
},
"name": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
Essentially "rb" object contains a nested field called "project" which contains two more fields - "name" and "age". Query I'm running:
"aggs": {
"root": {
"aggs": {
"group": {
"aggs": {
"filtered": {
"aggs": {
"order": {
"percentiles": {
"field": "project.age",
"percents": ["50"]
}
}
},
"filter": {
"range": {
"last_updated": {
"gte": "2015-01-01",
"lt": "2015-07-01"
}
}
}
}
},
"terms": {
"field": "project.name",
"min_doc_count": 5,
"order": {
"filtered>order.50": "asc"
},
"shard_size": 10,
"size": 10
}
}
},
"nested": {
"path": "project"
}
}
}
This query is supposed to produce top 10 projects (project.name field) which match the date filter, sorted by their median age, ignoring projects with less than 5 mentions in the database. Median should be calculated only for projects matching the filter (date range).
Despite having more than a hundred thousands objects in the database, this query produces empty list. No errors, just empty response. I've tried it both on ES 1.6 and ES 2.0-beta.
I've re-organized your aggregation query a bit and I could get some results showing up. The main point is type since you are aggregating around a nested type, I took out the filter aggregation on the last_updated field and moved it up the hierarchy as the first aggregation. Then comes the nested aggregation on the project field and finally the terms and the percentile.
That seems to work out pretty well. Please try.
{
"size": 0,
"aggs": {
"filtered": {
"filter": {
"range": {
"last_updated": {
"gte": "2015-01-01",
"lt": "2015-07-01"
}
}
},
"aggs": {
"root": {
"nested": {
"path": "project"
},
"aggs": {
"group": {
"terms": {
"field": "project.name",
"min_doc_count": 5,
"shard_size": 10,
"order": {
"order.50": "asc"
},
"size": 10
},
"aggs": {
"order": {
"percentiles": {
"field": "project.age",
"percents": [
"50"
]
}
}
}
}
}
}
}
}
}
}

Aggregates in Nest (Elastic) with filter having both nested and parent objects

I have a catalog of products that I want to calculate aggregates on. The trouble comes with trying to do nested aggregations with filter that has both nested and parent fields in it. Either it gives wrong counts or 0 hits. Here is a sample of my product object mapping:
"Products": {
"properties": {
"ProductID": {
"type": "long"
},
"ProductType": {
"type": "long"
},
"ProductName": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"Prices": {
"type": "nested",
"properties": {
"CurrencyType": {
"type": "integer"
},
"Cost": {
"type": "double"
}
}
}
}
}
Here is an example of the sql query that I am trying to replicate in elastic:
SELECT PRODPR.Cost AS PRODPR_Cost
,COUNT(PROD.ProdcutID) AS PROD_ProductID_Count
FROM Products PROD WITH (NOLOCK)
LEFT OUTER JOIN Prices PRODPR WITH (NOLOCK) ON (PRODPR.objectid = PROD.objectid)
WHERE PRODPR.CurrencyType = 4
AND PROD.ProductType IN (
11273
,11293
,11294
)
GROUP BY PRODPR.Cost
Elastic Search queries I came up with:
First One (following query returns correct counts with just CurrencyType as filter but when I add ProductType filter, it gives me wrong counts)
GET /IndexName/Products/_search
{
"aggs": {
"price_agg": {
"filter": {
"bool": {
**"must": [
{
"nested": {
"path": "Prices",
"filter": {
"term": {
"Prices.CurrencyType": "8"
}
}
}
},
{
"terms": {
"ProductType": [
"11273",
"11293",
"11294"
]
}
}
]**
}
},
"aggs": {
"price_nested_agg": {
"nested": {
"path": "Prices"
},
"aggs": {
"59316518_group_agg": {
"terms": {
"field": "Prices.Cost",
"size": 0
},
"aggs": {
"product_count": {
"reverse_nested": { },
"aggs": {
"ProductID_count_agg": {
"value_count": {
"field": "ProductID"
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
Second One (following query returns correct counts with just CurrencyType as filter but when I add ProductType filter, it gives me 0 hits):
GET /IndexName/Prodcuts/_search
{
"aggs": {
"price_agg": {
"nested": {
"path": "Prices"
},
"aggs": {
"currency_filter": {
"filter": {
"bool": {
"must": [
{
"term": {
"Prices.CurrrencyType": "4"
}
},
{
"terms": {
"ProductType": [
"11273",
"11293"
]
}
}
]
}
},
"aggs": {
"59316518_group_agg": {
"terms": {
"field": "Prices.Cost",
"size": 0
},
"aggs": {
"product_count": {
"reverse_nested": {},
"aggs": {
"ProductID_count_agg": {
"value_count": {
"field": "ProductID"
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
I have tried some more queries but the above two are the closest I came up with. Has anyone come across this use case? What am I doing wrong? Any help is appreciated. Thanks!

Resources