elasticsearch: using nested agg after reverse_nested shows higher count than expected - elasticsearch

Using Elasticsearch 2.2.0, I am doing this:
Grouping by a nested field: nested_path.nested_field
Using a reverse_nested agg so I can apply this filter: non_nested_field == "yay"
Using a nested agg so I can then get a count of the nested field I am grouping by: nested_path.nested_field
Problem: By using the reverse_nested agg I am getting a higher doc_count than I would expect.
Here is the mapping and docs I am indexing:
PUT /my_index
{
"mappings": {
"my_type": {
"properties": {
"nested_path": {
"type": "nested",
"properties": {
"nested_field": {
"type": "string"
}
}
},
"non_nested_field": {
"type": "string"
}
}
}
}
}
POST /my_index/my_type/1
{
"non_nested_field": "whoray",
"nested_path": [
{
"nested_field": "yes"
},
{
"nested_field": "yes"
},
{
"nested_field": "no"
}
]
}
POST /my_index/my_type/2
{
"non_nested_field": "yay",
"nested_path": [
{
"nested_field": "maybe"
},
{
"nested_field": "no"
}
]
}
Request body:
POST my_index/my_type/_search
{
"aggs": {
"nested_option": {
"nested": {
"path": "nested_path"
},
"aggs": {
"group_list": {
"terms": {
"field": "nested_path.nested_field",
"size": 100
},
"aggs": {
"level_1": {
"reverse_nested": {},
"aggs": {
"level_2": {
"filter": {
"term": {
"non_nested_field": "yay"
}
},
"aggs": {
"level_3": {
"nested": {
"path": "nested_path"
},
"aggs": {
"stat": {
"value_count": {
"field": "nested_path.nested_field"
}
}
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
Part of the response I get is this:
{
"aggregations": {
"nested_option": {
"doc_count": 5,
"group_list": {
"buckets": [
{
"key": "no",
"doc_count": 2,
"level_1": {
"doc_count": 2,
"level_2": {
"doc_count": 1,
"level_3": {
"doc_count": 2,
"stat": {
"value": 2
}
}
}
}
}
//....
]
}
}
}
}
In the first element of the buckets array in the response, level_1.level_2.doc_count is 1, and this is correct, because there's only one of the two docs indexed where nested_path.nested_field == "no" and non_nested_field == "yay". But level_1.level_2.level_3.doc_count in the response is 2. It should only be 1. This seems like a bug to me.

Related

Elasticsearch - Applying multi level filter on nested aggregation bucket?

I'm, trying to get distinct nested objects by applying multiple filters.
Basically in Elasticsearch I have cities as top level document and inside I have nested citizens documents, which have another nested pets documents.
I am trying to get all citizens that have certain conditions applied on all of these 3 levels (cities, citizens and pets):
Give me all distinct citizens
that have age:"40",
that have pets "name":"Casper",
from cities with office_type="secondary"
I know that to filter 1st level I can use query condition, and then if I need to filter the nested citizens I can add a filter in the aggregation level.
I am using this article as an example: https://iridakos.com/tutorials/2018/10/22/elasticsearch-bucket-aggregations.html
Query working so far:
GET city_offices/_search
{
"size" : 10,
"query": {
"term" : { "office_type" : "secondary" }
},
"aggs": {
"citizens": {
"nested": {
"path": "citizens"
},
"aggs": {
"inner_agg": {
"filter": {
"term": { "citizens.age": "40" }
} ,
"aggs": {
"occupations": {
"terms": {
"field": "citizens.occupation"
}
}
}
}
}
}
}
}
BUT: How can I add the "pets" nested filter condition?
Mapping:
PUT city_offices
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"doc": {
"properties": {
"city": {
"type": "keyword"
},
"office_type": {
"type": "keyword"
},
"citizens": {
"type": "nested",
"properties": {
"occupation": {
"type": "keyword"
},
"age": {
"type": "integer"
},
"pets": {
"type": "nested",
"properties": {
"kind": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"age": {
"type": "integer"
}
}
}
}
}
}
}
}
}
Index data:
PUT /city_offices/doc/1
{
"city":"Athens",
"office_type":"secondary",
"citizens":[
{
"occupation":"Statistician",
"age":30,
"pets":[
{
"kind":"Cat",
"name":"Phoebe",
"age":14
}
]
},
{
"occupation":"Librarian",
"age":30,
"pets":[
{
"kind":"Rabbit",
"name":"Nino",
"age":13
}
]
},
{
"occupation":"Librarian",
"age":40,
"pets":[
{
"kind":"Rabbit",
"name":"Nino",
"age":13
}
]
},
{
"occupation":"Statistician",
"age":40,
"pets":[
{
"kind":"Rabbit",
"name":"Casper",
"age":2
},
{
"kind":"Rabbit",
"name":"Nino",
"age":13
},
{
"kind":"Dog",
"name":"Nino",
"age":15
}
]
}
]
}
So I found a solution for this.
Basically I apply top level filters in the query section and then apply rest of conditions in the aggregations.
First I apply citizens level filter aggregation, then I go inside nested pets and apply the filter and then I need to get back up to citizens level (using reverse_nested: citizens) and then set the term that will generate the final bucket.
Query looks like this:
GET city_offices/_search
{
"size" : 10,
"query": {
"term" : { "office_type" : "secondary" }
},
"aggs": {
"citizens": {
"nested": {
"path": "citizens"
},
"aggs": {
"inner": {
"filter": {
"term": { "citizens.age": "40" }
} ,
"aggs": {
"occupations": {
"nested": {
"path": "citizens.pets"
},
"aggs": {
"inner_pets": {
"filter": {
"term": { "citizens.pets.name": "Casper" }
} ,
"aggs": {
"lll": {
"reverse_nested": {
"path": "citizens"
},
"aggs": {
"xxx": {
"terms": {
"field": "citizens.occupation",
"size": 10
}
}
}
}
}
}
}
}
}
}
}
}
}
}
The response bucket looks like this:
"xxx": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Librarian",
"doc_count": 1
},
{
"key": "Statistician",
"doc_count": 1
}
]
}
Any other suggestions?

Right way access parent field in Elasticsearch nested aggs script

Elasticsearch Version: 5.6.3
I have a mapping like this:
PUT /my_stock
{
"mappings": {
"stock": {
"properties": {
"industry": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"rate": {
"type": "double"
}
}
},
"changeRatio": {
"type": "double"
}
}
}
}
}
Datas:
POST /_bulk
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Technology","rate":0.6},{"name":"Health", "rate":0.2}],"changeRatio":0.1}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Health", "rate":0.3},{"name":"Education", "rate":0.2}],"changeRatio":0.2}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Health","rate":0.5},{"name":"Education","rate":0.2}],"changeRatio":-0.3}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Technology","rate":0.3},{"name":"Education","rate":0.3}],"changeRatio":0.4}
{"index":{"_index":"my_stock","_type":"stock","_id":null}}
{"industry":[{"name":"Education","rate":0.3},{"name":"Technology","rate":0.1}],"changeRatio":-0.5}
I then want to build a aggs query like this:
GET my_stock/stock/_search
{
"size": 0,
"aggs": {
"industry": {
"nested": {
"path": "industry"
},
"aggs": {
"groups": {
"terms": {
"field": "industry.name",
"order": {
"rate": "desc"
}
},
"aggs": {
"rate": {
"sum": {
"script": {
"source": "doc['changeRatio'].value * doc['industry.rate'].value"
}
}
}
}
}
}
}
}
}
but "doc['changeRatio'].value" can't get right value, it's always return 0
another query like this:
GET my_stock/stock/_search
{
"size": 0,
"aggs": {
"industry": {
"nested": {
"path": "industry"
},
"aggs": {
"groups": {
"terms": {
"field": "industry.name",
"order":{
"reverse>rate":"desc"
}
},
"aggs": {
"reverse": {
"reverse_nested": {},
"aggs": {
"rate": {
"sum": {
"script": {
"source": "doc['changeRatio'].value * doc['industry.rate'].value"
}
}
}
}
}
}
}
}
}
}
}
"doc['changeRatio'].value" is right, but "doc['industry.rate'].value" get 0
Refer to this question: Elasticsearch 5.4: Use normal and nested fields in same Painless script query?
1. { params['_source']['changeRatio'] } or { params['_source']['industry.rate'] } not work in this version
2. "copy to" stored as a multivalue field, also not working
How can i make a correct script get "changeRatio * industry.rate"?

Elasticsearch : Is it possible to not analysed aggregation query on analysed field?

I have certain document which stores the brand names in analysed form for ex: {"name":"Sam-sung"} {"name":"Motion:Systems"}. There are cases where i would want to aggregation these brands under timestamp.
my query as follow ,
{
"size": 0,
"aggs": {
"filtered_aggs": {
"filter": {
"range": {
"#timestamp":{
"gte":"2016-07-18T14:23:41.459Z",
"lte":"2016-07-18T14:53:10.017Z"
}
}
},
"aggs": {
"execute_time": {
"terms": {
"field": "brands",
"size": 0
}
}
}
}
}
}
but the return results will be
{
...
"aggregations": {
"states": {
"buckets": [
{
"key": "Sam",
"doc_count": 5
},
{
"key": "sung",
"doc_count": 5
},
{
"key": "Motion",
"doc_count": 1
},
{
"key": "Systems",
"doc_count": 1
}
]
}
}
}
but i want to the results is
{
...
"aggregations": {
"states": {
"buckets": [
{
"key": "Sam-sung",
"doc_count": 5
},
{
"key": "Motion:Systems",
"doc_count": 1
}
]
}
}
}
Is there any way in which i can make not analysed query on analysed field in elastic search?
You need to add a not_analyzed sub-field to your brands fields and then aggregate on that field.
PUT /index/_mapping/type
{
"properties": {
"brands": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
Then you need to fully reindex your data in order to populate the new sub-fields brands.raw.
Finally, you can change your query to this:
POST index/_search
{
"size": 0,
"aggs": {
"filtered_aggs": {
"filter": {
"range": {
"#timestamp":{
"gte":"2016-07-18T14:23:41.459Z",
"lte":"2016-07-18T14:53:10.017Z"
}
}
},
"aggs": {
"execute_time": {
"terms": {
"field": "brands.raw",
"size": 0
}
}
}
}
}
}

Ratio with elasticsearch

I have a list of customers with this structure:
{
"name" : "Toya Romano",
"hungry" : false,
"date" : 1420090500020
}
I would like to get the ratio of people who are hungry. How can I do it with an ElasticSearch query? I am running ES 2.3.
Rather a hacky approach because of this issue, but this should work:
{
"size": 0,
"aggs": {
"whatever": {
"filters": {
"filters": [{}]
},
"aggs": {
"all_people": {
"filter": {}
},
"hungry_count": {
"filter": {
"term": {
"hungry": true
}
}
},
"hungry_ratio": {
"bucket_script": {
"buckets_path": {
"total_hungry": "hungry_count._count",
"all": "all_people._count"
},
"script": "total_hungry/all"
}
}
}
}
}
}
With the result like this:
"buckets": [
{
"doc_count": 5,
"all_people": {
"doc_count": 5
},
"hungry_count": {
"doc_count": 3
},
"hungry_ratio": {
"value": 0.6
}
}
]

Aggregates in Nest (Elastic) with filter having both nested and parent objects

I have a catalog of products that I want to calculate aggregates on. The trouble comes with trying to do nested aggregations with filter that has both nested and parent fields in it. Either it gives wrong counts or 0 hits. Here is a sample of my product object mapping:
"Products": {
"properties": {
"ProductID": {
"type": "long"
},
"ProductType": {
"type": "long"
},
"ProductName": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
},
"Prices": {
"type": "nested",
"properties": {
"CurrencyType": {
"type": "integer"
},
"Cost": {
"type": "double"
}
}
}
}
}
Here is an example of the sql query that I am trying to replicate in elastic:
SELECT PRODPR.Cost AS PRODPR_Cost
,COUNT(PROD.ProdcutID) AS PROD_ProductID_Count
FROM Products PROD WITH (NOLOCK)
LEFT OUTER JOIN Prices PRODPR WITH (NOLOCK) ON (PRODPR.objectid = PROD.objectid)
WHERE PRODPR.CurrencyType = 4
AND PROD.ProductType IN (
11273
,11293
,11294
)
GROUP BY PRODPR.Cost
Elastic Search queries I came up with:
First One (following query returns correct counts with just CurrencyType as filter but when I add ProductType filter, it gives me wrong counts)
GET /IndexName/Products/_search
{
"aggs": {
"price_agg": {
"filter": {
"bool": {
**"must": [
{
"nested": {
"path": "Prices",
"filter": {
"term": {
"Prices.CurrencyType": "8"
}
}
}
},
{
"terms": {
"ProductType": [
"11273",
"11293",
"11294"
]
}
}
]**
}
},
"aggs": {
"price_nested_agg": {
"nested": {
"path": "Prices"
},
"aggs": {
"59316518_group_agg": {
"terms": {
"field": "Prices.Cost",
"size": 0
},
"aggs": {
"product_count": {
"reverse_nested": { },
"aggs": {
"ProductID_count_agg": {
"value_count": {
"field": "ProductID"
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
Second One (following query returns correct counts with just CurrencyType as filter but when I add ProductType filter, it gives me 0 hits):
GET /IndexName/Prodcuts/_search
{
"aggs": {
"price_agg": {
"nested": {
"path": "Prices"
},
"aggs": {
"currency_filter": {
"filter": {
"bool": {
"must": [
{
"term": {
"Prices.CurrrencyType": "4"
}
},
{
"terms": {
"ProductType": [
"11273",
"11293"
]
}
}
]
}
},
"aggs": {
"59316518_group_agg": {
"terms": {
"field": "Prices.Cost",
"size": 0
},
"aggs": {
"product_count": {
"reverse_nested": {},
"aggs": {
"ProductID_count_agg": {
"value_count": {
"field": "ProductID"
}
}
}
}
}
}
}
}
}
}
},
"size": 0
}
I have tried some more queries but the above two are the closest I came up with. Has anyone come across this use case? What am I doing wrong? Any help is appreciated. Thanks!

Resources