Compose nested aggregations - elasticsearch

Im sorry for any english misstake.
i hope that someone can help me.
Supose that i have the following mapping to my index:
PUT test-index
{
"mappings": {
"properties": {
"nestedOBJField": {
"type": "nested",
"index": true
},
"keywordField": {
"type": "keyword",
"index": true
}
}
}
}
It is possible to use the composite feature with nested fields?
It will be very handful if i can do something like this:
GET /test-index/_search
{
"size": 0,
"aggs": {
"TestAgg": {
"composite": {
"size": 10000,
"sources": [
{
"keyWordFieldAgg": {
"terms": {
"field": "keyWordField"
}
},
{
"nestedFieldAgg": {
"terms": {
"field": "nestedOBJField.attribute"
}
}
}
]
}
}
}
}
But this aproach is returning a several number of errors.
I will appreciate a lot if someone can help

Property nestedOBJField is of data type "nested" and property keyWordField is keyword type and at same level as nestedOBJField.
To use nested fields in aggregation , you need to use nested aggregation but then all sources in composite aggegation must be of type nested. This open issue can tell more about it.
You can use following work arounds.
Move keyWordField inside nested object in your documents.
{
"mappings": {
"properties": {
"nestedOBJField": {
"type": "nested",
"properties":{
"keywordField": {
"type": "keyword"
}
}
}
}
}
}
Sample Document
{
"nestedOBJField":[
{
"attribute":"1",
"age":1,
"keywordField":"xyz"
},
{
"attribute":"2",
"age":2,
"keywordField":"xyz"
}
]
}
Query
"aggs": {
"TestAgg": {
"nested": {
"path": "nestedOBJField"
},
"aggs": {
"name": {
"composite": {
"size": 10000,
"sources": [
{
"nestedFieldAgg": {
"terms": {
"field": "nestedOBJField.attribute.keyword"
}
}
},
{
"a":{
"terms": {
"field": "nestedOBJField.keywordField.keyword"
}
}
}
]
}
}
}
}
}
Moving your field inside nested property will mean data duplication , updating data in all nested documents.
Using terms aggregation - pagination will be an issue in this case
{
"size": 0,
"aggs": {
"TestAgg": {
"nested": {
"path": "nestedOBJField"
},
"aggs": {
"name": {
"terms": {
"field": "nestedOBJField.attribute.keyword",
"size": 10
},
"aggs": {
"back_to_parent": {
"reverse_nested": {},
"aggs": {
"keywords": {
"terms": {
"field": "keywordField.keyword",
"size": 10
}
}
}
}
}
}
}
}
}
}

Related

Aggregate, sort and paginate on nested documents

I'm managing a product index, with product sales and other KPIs under a nested field.
Trying to sort based on nested aggregation, and paginate - with no success.
Below is a simplified version of my mapping, for the sake of the example -
{
"product_type":
{
"type": "keyword"
},
"family":
{
"type": "keyword"
},
"rootdomain":
{
"type": "keyword"
},
"kpis":
{
"type": "nested",
"properties":
{
"sales_1d":
{
"type": "float"
},
"timestamp":
{
"type": "date",
"format": "strict_date_optional_time_nanos"
},
"views_1d":
{
"type": "float"
}
}
}
}
My aggregation is similar to the one below-
{
"aggs": {
"group_by_family": {
"aggs": {
"nested_aggregation": {
"aggs": {
"range_filtered": {
"aggs": {
"sales_1d": {
"sum": {
"field": "kpis.sales_1d"
}
},
"views_1d": {
"sum": {
"field": "kpis.views_1d"
}
},
"reverse_nesting": {
"aggs": {
"docs": {
"top_hits": {
"size": 1,
"sort": [
{
"_id": {
"order": "asc"
}
}
],
"_source": {
"includes": [
"_id",
"family",
"rootdomain",
"product_type"
]
}
}
}
},
"reverse_nested": {}
}
},
"filter": {
"range": {
"kpis.timestamp": {
"format": "basic_date_time_no_millis",
"gte": "20220721T000000Z",
"lte": "20220918T235959Z"
}
}
}
}
},
"nested": {
"path": "kpis"
}
}
},
"terms": {
"field": "family",
"size": 10
}
}
},
"query": {
//some query to filter by product-type and rootdomain
},
"size": 0
}
I'm aware that I can add an order clause to term aggregation to order the aggregated results.
My target though is to paginate the aggregated results - meaning I want to retrieve and order
1-10 best-selling products, and later retrieve 11-20 best-selling products and so on.
I've tried using bucket sort under range_filtered but I'm getting an error -
class org.elasticsearch.search.aggregations.bucket.filter.InternalFilter cannot be cast to class org.elasticsearch.search.aggregations.InternalMultiBucketAggregation
I'm not sure how to proceed from here, is this possible? if not, is there any workaround?
Thanks.

How to diversify the result of top-hits aggregation?

Let's start with a concrete example. I have a document with these fields:
{
"template": {
"mappings": {
"template": {
"properties": {
"tid": {
"type": "long"
},
"folder_id": {
"type": "long"
},
"status": {
"type": "integer"
},
"major_num": {
"type": "integer"
}
}
}
}
}
}
I want to aggregate the query result by field folder_id, and for each group divided by folder_id, retrieve the top-N documents' _source detail. So i write query DSL like:
GET /template/template/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"status": 1
}
}
]
}
},
"aggs": {
"folder": {
"terms": {
"field": "folder_id",
"size": 10
},
"aggs": {
"top_hit":{
"top_hits": {
"size": 5,
"_source": ["major_num"]
}
}
}
}
}
}
However, now comes a requirement that the top hits documents for each folder_id must be diversified on the field major_num. For each folder_id, the top hits documents retrieve by the sub top_hits aggregation under the terms aggregation, must be unique on field major_num, and for each major_num value, return at most 1 document in the sub top hits aggregation result.
top_hits aggregation cannot accept sub-aggregations, so how should i solve the question?
Why not simply adding another terms aggregation on the major_num field ?
GET /template/template/_search
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"term": {
"status": 1
}
}
]
}
},
"aggs": {
"folder": {
"terms": {
"field": "folder_id",
"size": 10
},
"aggs": {
"majornum": {
"terms": {
"field": "major_num",
"size": 10
},
"aggs": {
"top_hit": {
"top_hits": {
"size": 1
}
}
}
}
}
}
}
}

Right way access parent field in Elasticsearch nested aggs script

Elasticsearch Version: 5.6.3
I have a mapping like this:
PUT /my_stock
{
"mappings": {
"stock": {
"properties": {
"industry": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"rate": {
"type": "double"
}
}
},
"changeRatio": {
"type": "double"
}
}
}
}
}
Datas:
POST /_bulk
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Technology","rate":0.6},{"name":"Health", "rate":0.2}],"changeRatio":0.1}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Health", "rate":0.3},{"name":"Education", "rate":0.2}],"changeRatio":0.2}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Health","rate":0.5},{"name":"Education","rate":0.2}],"changeRatio":-0.3}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Technology","rate":0.3},{"name":"Education","rate":0.3}],"changeRatio":0.4}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Education","rate":0.3},{"name":"Technology","rate":0.1}],"changeRatio":-0.5}
I then want to build a aggs query like this:
GET my_stock/stock/_search
{
"size": 0,
"aggs": {
"industry": {
"nested": {
"path": "industry"
},
"aggs": {
"groups": {
"terms": {
"field": "industry.name",
"order": {
"rate": "desc"
}
},
"aggs": {
"rate": {
"sum": {
"script": {
"source": "doc['changeRatio'].value * doc['industry.rate'].value"
}
}
}
}
}
}
}
}
}
but "doc['changeRatio'].value" can't get right value, it's always return 0
another query like this:
GET my_stock/stock/_search
{
"size": 0,
"aggs": {
"industry": {
"nested": {
"path": "industry"
},
"aggs": {
"groups": {
"terms": {
"field": "industry.name",
"order":{
"reverse>rate":"desc"
}
},
"aggs": {
"reverse": {
"reverse_nested": {},
"aggs": {
"rate": {
"sum": {
"script": {
"source": "doc['changeRatio'].value * doc['industry.rate'].value"
}
}
}
}
}
}
}
}
}
}
}
"doc['changeRatio'].value" is right, but "doc['industry.rate'].value" get 0
Refer to this question: Elasticsearch 5.4: Use normal and nested fields in same Painless script query?
1. { params['_source']['changeRatio'] } or { params['_source']['industry.rate'] } not work in this version
2. "copy to" stored as a multivalue field, also not working
How can i make a correct script get "changeRatio * industry.rate"?

Aggregates in Nest (Elastic) with filter having both nested and parent objects

I have a catalog of products that I want to calculate aggregates on. The trouble comes with trying to do nested aggregations with filter that has both nested and parent fields in it. Either it gives wrong counts or 0 hits. Here is a sample of my product object mapping:
"Products": {
"properties": {
"ProductID": {
"type": "long"
},
"ProductType": {
"type": "long"
},
"ProductName": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"Prices": {
"type": "nested",
"properties": {
"CurrencyType": {
"type": "integer"
},
"Cost": {
"type": "double"
}
}
}
}
}
Here is an example of the sql query that I am trying to replicate in elastic:
SELECT PRODPR.Cost AS PRODPR_Cost
,COUNT(PROD.ProdcutID) AS PROD_ProductID_Count
FROM Products PROD WITH (NOLOCK)
LEFT OUTER JOIN Prices PRODPR WITH (NOLOCK) ON (PRODPR.objectid = PROD.objectid)
WHERE PRODPR.CurrencyType = 4
AND PROD.ProductType IN (
11273
,11293
,11294
)
GROUP BY PRODPR.Cost
Elastic Search queries I came up with:
First One (following query returns correct counts with just CurrencyType as filter but when I add ProductType filter, it gives me wrong counts)
GET /IndexName/Products/_search
{
"aggs": {
"price_agg": {
"filter": {
"bool": {
**"must": [
{
"nested": {
"path": "Prices",
"filter": {
"term": {
"Prices.CurrencyType": "8"
}
}
}
},
{
"terms": {
"ProductType": [
"11273",
"11293",
"11294"
]
}
}
]**
}
},
"aggs": {
"price_nested_agg": {
"nested": {
"path": "Prices"
},
"aggs": {
"59316518_group_agg": {
"terms": {
"field": "Prices.Cost",
"size": 0
},
"aggs": {
"product_count": {
"reverse_nested": { },
"aggs": {
"ProductID_count_agg": {
"value_count": {
"field": "ProductID"
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
Second One (following query returns correct counts with just CurrencyType as filter but when I add ProductType filter, it gives me 0 hits):
GET /IndexName/Prodcuts/_search
{
"aggs": {
"price_agg": {
"nested": {
"path": "Prices"
},
"aggs": {
"currency_filter": {
"filter": {
"bool": {
"must": [
{
"term": {
"Prices.CurrrencyType": "4"
}
},
{
"terms": {
"ProductType": [
"11273",
"11293"
]
}
}
]
}
},
"aggs": {
"59316518_group_agg": {
"terms": {
"field": "Prices.Cost",
"size": 0
},
"aggs": {
"product_count": {
"reverse_nested": {},
"aggs": {
"ProductID_count_agg": {
"value_count": {
"field": "ProductID"
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
I have tried some more queries but the above two are the closest I came up with. Has anyone come across this use case? What am I doing wrong? Any help is appreciated. Thanks!

Elasticsearch - Lsit whole object in terms aggregation

I need to make a query that lists the whole objects in terms aggregation. The mapping is like this:
{
"travelers": {
"properties": {
"traveler": "string",
"cars": {
"type":"nested",
"properties": {
"type": {
"type":"string"
},
"color": {
"type":"string"
}
}
}
}
}
}
And the query I can make is like this:
{
"aggregations": {
"people": {
"terms": {
"field":"traveler"
}
},
"aggregations": {
"cars": {
"nested": {
"path":"cars"
},
"aggregations": {
"types": {
"terms": {
"field":"cars.type"
}
}
}
}
}
}
}
But this query only returns the types of the cars. I can modify for it to return the types and the colors, but I can't, that way, tell which color is related to which type of car. How can I do that?
Nested aggregation simply makes sure that each nested object is treated as a document and aggregation happens on nested documents level and not on actual document level.
Hence you need to do one more level of aggregation with color to get what you are looking for.
{
"aggregations": {
"people": {
"terms": {
"field": "traveler"
}
},
"aggregations": {
"cars": {
"nested": {
"path": "cars"
},
"aggregations": {
"types": {
"terms": {
"field": "cars.type"
},
"aggs": {
"colors": {
"terms": {
"field": "cars.color"
}
}
}
}
}
}
}
}
}

Resources