Right way access parent field in Elasticsearch nested aggs script - elasticsearch

Elasticsearch Version: 5.6.3
I have a mapping like this:
PUT /my_stock
{
"mappings": {
"stock": {
"properties": {
"industry": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"rate": {
"type": "double"
}
}
},
"changeRatio": {
"type": "double"
}
}
}
}
}
Datas:
POST /_bulk
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Technology","rate":0.6},{"name":"Health", "rate":0.2}],"changeRatio":0.1}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Health", "rate":0.3},{"name":"Education", "rate":0.2}],"changeRatio":0.2}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Health","rate":0.5},{"name":"Education","rate":0.2}],"changeRatio":-0.3}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Technology","rate":0.3},{"name":"Education","rate":0.3}],"changeRatio":0.4}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Education","rate":0.3},{"name":"Technology","rate":0.1}],"changeRatio":-0.5}
I then want to build a aggs query like this:
GET my_stock/stock/_search
{
"size": 0,
"aggs": {
"industry": {
"nested": {
"path": "industry"
},
"aggs": {
"groups": {
"terms": {
"field": "industry.name",
"order": {
"rate": "desc"
}
},
"aggs": {
"rate": {
"sum": {
"script": {
"source": "doc['changeRatio'].value * doc['industry.rate'].value"
}
}
}
}
}
}
}
}
}
but "doc['changeRatio'].value" can't get right value, it's always return 0
another query like this:
GET my_stock/stock/_search
{
"size": 0,
"aggs": {
"industry": {
"nested": {
"path": "industry"
},
"aggs": {
"groups": {
"terms": {
"field": "industry.name",
"order":{
"reverse>rate":"desc"
}
},
"aggs": {
"reverse": {
"reverse_nested": {},
"aggs": {
"rate": {
"sum": {
"script": {
"source": "doc['changeRatio'].value * doc['industry.rate'].value"
}
}
}
}
}
}
}
}
}
}
}
"doc['changeRatio'].value" is right, but "doc['industry.rate'].value" get 0
Refer to this question: Elasticsearch 5.4: Use normal and nested fields in same Painless script query?
1. { params['_source']['changeRatio'] } or { params['_source']['industry.rate'] } not work in this version
2. "copy to" stored as a multivalue field, also not working
How can i make a correct script get "changeRatio * industry.rate"?

Related

Compose nested aggregations

Im sorry for any english misstake.
i hope that someone can help me.
Supose that i have the following mapping to my index:
PUT test-index
{
"mappings": {
"properties": {
"nestedOBJField": {
"type": "nested",
"index": true
},
"keywordField": {
"type": "keyword",
"index": true
}
}
}
}
It is possible to use the composite feature with nested fields?
It will be very handful if i can do something like this:
GET /test-index/_search
{
"size": 0,
"aggs": {
"TestAgg": {
"composite": {
"size": 10000,
"sources": [
{
"keyWordFieldAgg": {
"terms": {
"field": "keyWordField"
}
},
{
"nestedFieldAgg": {
"terms": {
"field": "nestedOBJField.attribute"
}
}
}
]
}
}
}
}
But this aproach is returning a several number of errors.
I will appreciate a lot if someone can help
Property nestedOBJField is of data type "nested" and property keyWordField is keyword type and at same level as nestedOBJField.
To use nested fields in aggregation , you need to use nested aggregation but then all sources in composite aggegation must be of type nested. This open issue can tell more about it.
You can use following work arounds.
Move keyWordField inside nested object in your documents.
{
"mappings": {
"properties": {
"nestedOBJField": {
"type": "nested",
"properties":{
"keywordField": {
"type": "keyword"
}
}
}
}
}
}
Sample Document
{
"nestedOBJField":[
{
"attribute":"1",
"age":1,
"keywordField":"xyz"
},
{
"attribute":"2",
"age":2,
"keywordField":"xyz"
}
]
}
Query
"aggs": {
"TestAgg": {
"nested": {
"path": "nestedOBJField"
},
"aggs": {
"name": {
"composite": {
"size": 10000,
"sources": [
{
"nestedFieldAgg": {
"terms": {
"field": "nestedOBJField.attribute.keyword"
}
}
},
{
"a":{
"terms": {
"field": "nestedOBJField.keywordField.keyword"
}
}
}
]
}
}
}
}
}
Moving your field inside nested property will mean data duplication , updating data in all nested documents.
Using terms aggregation - pagination will be an issue in this case
{
"size": 0,
"aggs": {
"TestAgg": {
"nested": {
"path": "nestedOBJField"
},
"aggs": {
"name": {
"terms": {
"field": "nestedOBJField.attribute.keyword",
"size": 10
},
"aggs": {
"back_to_parent": {
"reverse_nested": {},
"aggs": {
"keywords": {
"terms": {
"field": "keywordField.keyword",
"size": 10
}
}
}
}
}
}
}
}
}
}

Unable to create nested date aggregation query

I am trying to create an ElasticSearch aggregation query which can generate sum or average of value in all my ingested documents.
The documents are of the format -
{
"weather":"cold",
"date_1":"2017/07/05",
"feedback":[
{
"date_2":"2017/08/07",
"value":28,
"comment":"not cold"
},{
"date_2":"2017/08/09",
"value":48,
"comment":"a bit chilly"
},{
"date_2":"2017/09/07",
"value":18,
"comment":"very cold"
}, ...
]
}
I am able to create a sum aggregation of all "feedback.value" using "date_1" by using the following request -
GET _search
{
"query": {
"query_string": {
"query": "cold"
}
},
"size": 0,
"aggs": {
"temperature": {
"date_histogram":{
"field" : "date_1",
"interval" : "month"
},
"aggs":{
"temperature_agg":{
"terms": {
"field": "feedback.value"
}
}
}
}
}
}
However, I need to generate the same query across all documents aggregate based on "feedback.date_2". I am not sure if ElasticSearch can resolve such aggregation or how to approach it. Any guidance would be helpful
[EDIT]
Mapping file( I only define the nested items, ES identifes other fields on its own)
{
"mappings": {
"catalog_item": {
"properties": {
"feedback":{
"type":"nested",
"properties":{
"date_2":{
"type": "date",
"format":"YYYY-MM-DD"
},
"value": {
"type": "float"
},
"comment": {
"type": "text"
}
}
}
}
}
}
}
You would need to make use of nested documents and sum aggregation.
Here's a working example:
Sample Mapping:
PUT test
{
"mappings": {
"doc": {
"properties": {
"feedback": {
"type": "nested"
}
}
}
}
}
Add Sample document:
PUT test/doc/1
{
"date_1": "2017/08/07",
"feedback": [
{
"date_2": "2017/08/07",
"value": 28,
"comment": "not cold"
},
{
"date_2": "2017/08/09",
"value": 48,
"comment": "a bit chilly"
},
{
"date_2": "2017/09/07",
"value": 18,
"comment": "very cold"
}
]
}
Calculate both the sum and average based on date_2.
GET test/_search
{
"size": 0,
"aggs": {
"temperature_aggregation": {
"nested": {
"path": "feedback"
},
"aggs": {
"temperature": {
"date_histogram": {
"field": "feedback.date_2",
"interval": "month"
},
"aggs": {
"sum": {
"sum": {
"field": "feedback.value"
}
},
"avg": {
"avg": {
"field": "feedback.value"
}
}
}
}
}
}
}
}

Elasticsearch - Applying multi level filter on nested aggregation bucket?

I'm, trying to get distinct nested objects by applying multiple filters.
Basically in Elasticsearch I have cities as top level document and inside I have nested citizens documents, which have another nested pets documents.
I am trying to get all citizens that have certain conditions applied on all of these 3 levels (cities, citizens and pets):
Give me all distinct citizens
that have age:"40",
that have pets "name":"Casper",
from cities with office_type="secondary"
I know that to filter 1st level I can use query condition, and then if I need to filter the nested citizens I can add a filter in the aggregation level.
I am using this article as an example: https://iridakos.com/tutorials/2018/10/22/elasticsearch-bucket-aggregations.html
Query working so far:
GET city_offices/_search
{
"size" : 10,
"query": {
"term" : { "office_type" : "secondary" }
},
"aggs": {
"citizens": {
"nested": {
"path": "citizens"
},
"aggs": {
"inner_agg": {
"filter": {
"term": { "citizens.age": "40" }
} ,
"aggs": {
"occupations": {
"terms": {
"field": "citizens.occupation"
}
}
}
}
}
}
}
}
BUT: How can I add the "pets" nested filter condition?
Mapping:
PUT city_offices
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"doc": {
"properties": {
"city": {
"type": "keyword"
},
"office_type": {
"type": "keyword"
},
"citizens": {
"type": "nested",
"properties": {
"occupation": {
"type": "keyword"
},
"age": {
"type": "integer"
},
"pets": {
"type": "nested",
"properties": {
"kind": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"age": {
"type": "integer"
}
}
}
}
}
}
}
}
}
Index data:
PUT /city_offices/doc/1
{
"city":"Athens",
"office_type":"secondary",
"citizens":[
{
"occupation":"Statistician",
"age":30,
"pets":[
{
"kind":"Cat",
"name":"Phoebe",
"age":14
}
]
},
{
"occupation":"Librarian",
"age":30,
"pets":[
{
"kind":"Rabbit",
"name":"Nino",
"age":13
}
]
},
{
"occupation":"Librarian",
"age":40,
"pets":[
{
"kind":"Rabbit",
"name":"Nino",
"age":13
}
]
},
{
"occupation":"Statistician",
"age":40,
"pets":[
{
"kind":"Rabbit",
"name":"Casper",
"age":2
},
{
"kind":"Rabbit",
"name":"Nino",
"age":13
},
{
"kind":"Dog",
"name":"Nino",
"age":15
}
]
}
]
}
So I found a solution for this.
Basically I apply top level filters in the query section and then apply rest of conditions in the aggregations.
First I apply citizens level filter aggregation, then I go inside nested pets and apply the filter and then I need to get back up to citizens level (using reverse_nested: citizens) and then set the term that will generate the final bucket.
Query looks like this:
GET city_offices/_search
{
"size" : 10,
"query": {
"term" : { "office_type" : "secondary" }
},
"aggs": {
"citizens": {
"nested": {
"path": "citizens"
},
"aggs": {
"inner": {
"filter": {
"term": { "citizens.age": "40" }
} ,
"aggs": {
"occupations": {
"nested": {
"path": "citizens.pets"
},
"aggs": {
"inner_pets": {
"filter": {
"term": { "citizens.pets.name": "Casper" }
} ,
"aggs": {
"lll": {
"reverse_nested": {
"path": "citizens"
},
"aggs": {
"xxx": {
"terms": {
"field": "citizens.occupation",
"size": 10
}
}
}
}
}
}
}
}
}
}
}
}
}
}
The response bucket looks like this:
"xxx": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Librarian",
"doc_count": 1
},
{
"key": "Statistician",
"doc_count": 1
}
]
}
Any other suggestions?

Sum and count aggregations over Elasticsearch fields

I am new to Elasticsearch and I am looking to perform certain aggregations over the fields from an Elasticsearch 5.x index. I have an index that contains the documents with fields langs (which have nested structure) and docLang. These are dynamically mapped fields. Following are the examples documents
DOC 1:
{
"_index":"A",
"_type":"document",
"_id":"1",
"_source":{
"text":"This is a test sentence.",
"langs":{
"X":{
"en":1,
"es":2,
"zh":3
},
"Y":{
"en":4,
"es":5,
"zh":6
}
},
"docLang": "en"
}
}
DOC 2:
{
"_index":"A",
"_type":"document",
"_id":"2",
"_source":{
"text":"This is a test sentence.",
"langs":{
"X":{
"en":1,
"es":2
},
"Y":{
"en":3,
"es":4
}
},
"docLang": "es"
}
}
DOC 3:
{
"_index":"A",
"_type":"document",
"_id":"2",
"_source":{
"text":"This is a test sentence.",
"langs":{
"X":{
"en":1
},
"Y":{
"en":2
}
},
"docLang": "en"
}
}
I want to perform sum aggregation over the langs field in a way that for each key (X/Y) and for each language, I can get the sum across all documents in an index. Also, I want to produce the counts of documents for each type of language from docLang field.
e.g.: For above 3 documents, sum aggregation over langs field would look like below:
"langs":{
"X":{
"en":3,
"es":4,
"zh":3
},
"Y":{
"en":9,
"es":9,
"zh":6
}
}
And the docLang count would look like below:
"docLang":{
"en" : 2,
"es" : 1
}
Also because of some production env restrictions, I cannot use scripts in Elasticsearch. So, I was wondering if it is possible to use just field aggregation type for above fields?
{
"size": 0,
"aggs": {
"X": {
"nested": {
"path": "langs.X"
},
"aggs": {
"X_sum_en": {
"sum": {
"field": "langs.X.en"
}
},
"X_sum_es": {
"sum": {
"field": "langs.X.es"
}
},
"X_sum_zh": {
"sum": {
"field": "langs.X.zh"
}
}
}
},
"Y": {
"nested": {
"path": "langs.Y"
},
"aggs": {
"Y_sum_en": {
"sum": {
"field": "langs.Y.en"
}
},
"Y_sum_es": {
"sum": {
"field": "langs.Y.es"
}
},
"Y_sum_zh": {
"sum": {
"field": "langs.Y.zh"
}
}
}
},
"sum_docLang": {
"terms": {
"field": "docLang.keyword",
"size": 10
}
}
}
}
Since you didn't mention, but I think it's important. I made X and Y as nested fields:
"langs": {
"properties": {
"X": {
"type": "nested",
"properties": {
"en": {
"type": "long"
},
"es": {
"type": "long"
},
"zh": {
"type": "long"
}
}
},
"Y": {
"type": "nested",
"properties": {
"en": {
"type": "long"
},
"es": {
"type": "long"
},
"zh": {
"type": "long"
}
}
}
}
}
But, if you fields are not nested at all and here I mean actually the nested field type in Elasticsearch, a simple aggregation like this one should be enough:
{
"size": 0,
"aggs": {
"X_sum_en": {
"sum": {
"field": "langs.X.en"
}
},
"X_sum_es": {
"sum": {
"field": "langs.X.es"
}
},
"X_sum_zh": {
"sum": {
"field": "langs.X.zh"
}
},
"Y_sum_en": {
"sum": {
"field": "langs.Y.en"
}
},
"Y_sum_es": {
"sum": {
"field": "langs.Y.es"
}
},
"Y_sum_zh": {
"sum": {
"field": "langs.Y.zh"
}
},
"sum_docLang": {
"terms": {
"field": "docLang.keyword",
"size": 10
}
}
}
}

Aggregates in Nest (Elastic) with filter having both nested and parent objects

I have a catalog of products that I want to calculate aggregates on. The trouble comes with trying to do nested aggregations with filter that has both nested and parent fields in it. Either it gives wrong counts or 0 hits. Here is a sample of my product object mapping:
"Products": {
"properties": {
"ProductID": {
"type": "long"
},
"ProductType": {
"type": "long"
},
"ProductName": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"Prices": {
"type": "nested",
"properties": {
"CurrencyType": {
"type": "integer"
},
"Cost": {
"type": "double"
}
}
}
}
}
Here is an example of the sql query that I am trying to replicate in elastic:
SELECT PRODPR.Cost AS PRODPR_Cost
,COUNT(PROD.ProdcutID) AS PROD_ProductID_Count
FROM Products PROD WITH (NOLOCK)
LEFT OUTER JOIN Prices PRODPR WITH (NOLOCK) ON (PRODPR.objectid = PROD.objectid)
WHERE PRODPR.CurrencyType = 4
AND PROD.ProductType IN (
11273
,11293
,11294
)
GROUP BY PRODPR.Cost
Elastic Search queries I came up with:
First One (following query returns correct counts with just CurrencyType as filter but when I add ProductType filter, it gives me wrong counts)
GET /IndexName/Products/_search
{
"aggs": {
"price_agg": {
"filter": {
"bool": {
**"must": [
{
"nested": {
"path": "Prices",
"filter": {
"term": {
"Prices.CurrencyType": "8"
}
}
}
},
{
"terms": {
"ProductType": [
"11273",
"11293",
"11294"
]
}
}
]**
}
},
"aggs": {
"price_nested_agg": {
"nested": {
"path": "Prices"
},
"aggs": {
"59316518_group_agg": {
"terms": {
"field": "Prices.Cost",
"size": 0
},
"aggs": {
"product_count": {
"reverse_nested": { },
"aggs": {
"ProductID_count_agg": {
"value_count": {
"field": "ProductID"
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
Second One (following query returns correct counts with just CurrencyType as filter but when I add ProductType filter, it gives me 0 hits):
GET /IndexName/Prodcuts/_search
{
"aggs": {
"price_agg": {
"nested": {
"path": "Prices"
},
"aggs": {
"currency_filter": {
"filter": {
"bool": {
"must": [
{
"term": {
"Prices.CurrrencyType": "4"
}
},
{
"terms": {
"ProductType": [
"11273",
"11293"
]
}
}
]
}
},
"aggs": {
"59316518_group_agg": {
"terms": {
"field": "Prices.Cost",
"size": 0
},
"aggs": {
"product_count": {
"reverse_nested": {},
"aggs": {
"ProductID_count_agg": {
"value_count": {
"field": "ProductID"
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
I have tried some more queries but the above two are the closest I came up with. Has anyone come across this use case? What am I doing wrong? Any help is appreciated. Thanks!

Resources