Nested aggregation in nested field? - elasticsearch

I am new to elasticsearch and don't know a lot about aggregations but I have this ES6 mapping:
{
"mappings": {
"test": {
"properties": {
"id": {
"type": "integer"
}
"countries": {
"type": "nested",
"properties": {
"global_id": {
"type": "keyword"
},
"name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
}
}
},
"areas": {
"type": "nested",
"properties": {
"global_id": {
"type": "keyword"
},
"name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"parent_global_id": {
"type": "keyword"
}
}
}
}
}
}
}
How can I get all documents grouped by areas which is then grouped by countries. Also the document has to be returned in full, not just the nested document. Is this even possible ?

1) Aggregation _search query:
first agg by area, with the path as this is nested. Then reverse to the root document and nested agg to country.
{
"size": 0,
"aggs": {
"agg_areas": {
"nested": {
"path": "areas"
},
"aggs": {
"areas_name": {
"terms": {
"field": "areas.name"
},
"aggs": {
"agg_reverse": {
"reverse_nested": {},
"aggs": {
"agg_countries": {
"nested": {
"path": "countries"
},
"aggs": {
"countries_name": {
"terms": {
"field": "countries.name"
}
}
}
}
}
}
}
}
}
}
}
}
2) retrieve documents:
add a tophits inside your aggregation:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-top-hits-aggregation.html
top_hits is slow so you will have to read documentation and adjust size and sort to your context.
...
"terms": {
"field": "areas.name"
},
"aggregations": {
"hits": {
"top_hits": { "size": 100}
}
},
...

Related

Aggregation based off of nested document field with filters on nested document and parent

I have the following mapping:
{
"accountId": {
"type": "long"
},
"storeProductId": {
"type": "long"
},
"storeSchemaId": {
"type": "long"
},
"yoyoValues": {
"type": "nested",
"properties": {
"yoyoNameId": {
"type": "long"
},
"dataType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"languageId": {
"type": "long"
},
"value_Number": {
"type": "float"
},
"value_Raw": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
and I'm trying to get the max and min values for value_number for all nested documents with yoyoNameId of 3 that also has a parent document with an accountId of 1285 and storeSchemaId of 241.
Everytime I've tried, I've been unable to properly filter the nested documents so it ends up being the min and max values for all nested documents with the correct parent document values.
I've tried several different queries but my most recent one is as follows:
{
"size": 0,
"aggs": {
"filter-layer": {
"filters": {
"filters": [
{
"term": {
"accountId": 1285
}
},
{
"term": {
"yoyoSchemaId": 241
}
},
{
"nested": {
"path": "yoyoValues",
"query": {
"bool": {
"filter": [
{
"term": {
"yoyoValues.yoyoNameId": 3
}
}
]
}
}
}
}
]
},
"aggs": {
"yoyoValues": {
"nested": {
"path": "yoyoValues"
},
"inner": {
"filter": {
"term": {
"yoyoValues.yoyoNameId": 3
}
},
"aggs": {
"min_value": {
"min": {
"field": "yoyoValues.value_Number"
}
},
"max_value": {
"max": {
"field": "yoyoValues.value_Number"
}
}
}
}
}
}
}
}
}
Can someone please help me correct this query? I'm limited to elastic v7.13.

Elasticsearch nested query with aggregation using nested term doesn't return any bucket

I have an ES index with this mapping:
{
"_doc": {
"dynamic": "false",
"properties": {
"original": {
"properties":{
"id": {
"type": "keyword"
},
"purchaseStatus": {
"type": "keyword"
},
"marketCode": {
"type": "keyword"
},
"salesProfiles": {
"type": "nested",
"properties": {
"marketCode": {
"type": "keyword"
},
"purchaseStatus": {
"type": "keyword"
}
}
}
}
},
"recommended": {
"properties":{
"id": {
"type": "keyword"
},
"purchaseStatus": {
"type": "keyword"
},
"marketCode": {
"type": "keyword"
},
"salesProfiles": {
"type": "nested",
"properties": {
"marketCode": {
"type": "keyword"
},
"purchaseStatus": {
"type": "keyword"
}
}
}
}
},
"distance": {
"type": "double"
},
"rank": {
"type": "double"
},
"source": {
"properties": {
"application": {
"type": "keyword"
},
"platform": {
"type": "keyword"
}
}
},
"timestamp": {
"properties": {
"createdAt": {
"type": "date"
},
"updatedAt": {
"type": "date"
}
}
}
}
},
"_default_": {
"dynamic": "false"
}
}
and I need to obtain the recommended docs with salesProfiles.marketCode equal to original.marketCode but my query doesn't return any buckets:
GET index/_search
{
"aggs": {
"similarities": {
"filter": {
"bool": {
"must": [
{
"term": {
"original.storefrontId": "12345"
}
},
{
"nested": {
"path": "recommended.salesProfiles",
"query": {
"bool": {
"must": [
{
"match": {
"recommended.salesProfiles.purchaseStatus": "PAID"
}
}
]
}
}
}
}
]
}
},
"aggs": {
"markets": {
"nested": {
"path": "recommended.salesProfiles"
},
"aggs": {
"recommendedMarket": {
"terms": {
"field": "recommended.salesProfiles.marketCode",
"size": 100
}
}
}
}
}
}
},
"explain": false
}
Any suggestion would be really appreciated. Thanks in advance!
Its hard to debug this without any example docs, but I think this might work
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"original.storefrontId": "12345"
}
},
{
"nested": {
"path": "recommended.salesProfiles",
"query": {
"bool": {
"must": [
{
"match": {
"recommended.salesProfiles.purchaseStatus": "PAID"
}
}
]
}
}
}
}
]
}
},
"aggs": {
"Profiles": {
"nested": {
"path": "recommended.salesProfiles"
},
"aggs": {
"by_term": {
"terms": {
"field": "recommended.salesProfiles.marketCode",
"size": 100
}
}
}
}
}
}
I don't think you can use "nested" under the filter agg without being under a nested aggregation, so I believe that's why you didn't get any docs.
I basically moved all the filtering to the query and just aggregated the terms later

In Kibana how can you sum nested fields and then bucket for each document?

We have multiple nested fields which need to be summed and then graphed almost as if it were a value of the parent document (using scripted fields is not an ideal solution for us).
Given the example index mapping:
{
"mapping": {
"_doc": {
"properties": {
"build_name": { "type": "keyword" },
"start_ms": { "type": "date" },
"projects": {
"type": "nested",
"properties": {
"project_duration_ms": { type": "long" },
"project_name": { "type": "keyword" }
}
}
}
}
}
}
Example doc._source:
{
"build_name": "example_build_1",
"start_ms": "1611252094540",
"projects": [
{ "project_duration_ms": "19381", project_name": "example_project_1" },
{ "project_duration_ms": "2081", "project_name": "example_project_2" }
]
},
{
"build_name": "example_build_2",
"start_ms": "1611252097638",
"projects": [
{ "project_duration_ms": "21546", project_name": "example_project_1" },
{ "project_duration_ms": "2354", "project_name": "example_project_2" }
]
}
It would be ideal to get a aggregation something like:
....
"aggregations" : {
"builds" : {
"total_durations" : {
"buckets" : [
{
"key": "example_build_1",
"start_ms": "1611252094540",
"total_duration": "21462"
},
{
"key": "example_build_2",
"start_ms": "1611252097638",
"total_duration": "23900"
}
}
}
}
}
}
No scripted fields necessary. This nested sum aggregation should do the trick:
{
"size": 0,
"aggs": {
"builds": {
"terms": {
"field": "build_name"
},
"aggs": {
"total_durations_parent": {
"nested": {
"path": "projects"
},
"aggs": {
"total_durations": {
"sum": {
"field": "projects.project_duration_ms"
}
}
}
}
}
}
}
}
Your use case is a great candidate for employing the copy_to parameter which'll put the build durations into one top-level list of longs so that the nested query won't be required when we're summing them up.
Adjust the mapping like so:
"properties": {
"build_name": { "type": "keyword" },
"start_ms": { "type": "date" },
"total_duration_ms": { "type": "long" }, <--
"projects": {
"type": "nested",
"properties": {
"project_duration_ms": {
"type": "long",
"copy_to": "total_duration_ms" <--
},
"project_name": { "type": "keyword" }
}
}
}
After reindexing (which is required due to the newly added field), the above query gets simplified to:
{
"size": 0,
"aggs": {
"builds": {
"terms": {
"field": "build_name"
},
"aggs": {
"total_durations": {
"sum": {
"field": "total_duration_ms"
}
}
}
}
}
}

Find the count of nested.nested Elastic Search documents

Is it possible using the Elastic Search _count API and having the following abbreviated ES template to find the count of sponsorships for all the campaigns by brandId?
sponsorshipSets and sponsorships are optional so it can be null.
{
"index_patterns": "campaigns*",
"order": 4,
"version": 4,
"aliases": {
"campaigns": {
}
},
"settings": {
"number_of_shards": 5
},
"mappings": {
"dynamic": "false",
"properties": {
"brandId": {
"type": "keyword"
},
"sponsorshipSets": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
},
"sponsorships": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
}
}
}
}
}
}
filter aggregation can be used to fetch docs with certain brand Id. Two Nested aggregations to point to sponsorship and value_count aggregation to get the count.
Query
{
"aggs": {
"selected_brand": {
"filter": {
"term": {
"brandId": "1"
}
}
},
"sponsorshipSets": {
"nested": {
"path": "sponsorshipSets"
},
"aggs": {
"sponsorships": {
"nested": {
"path": "sponsorshipSets.sponsorships"
},
"aggs": {
"count": {
"value_count": {
"field": "sponsorshipSets.sponsorships.id"
}
}
}
}
}
}
}
}
I found a solution without using Aggregations, it seems more accurate from the above and I can use the _count API.
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "sponsorshipSets.sponsorships",
"query": {
"bool": {
"filter": {
"exists": {
"field": "sponsorshipSets.sponsorships"
}
}
}
}
}
},
{
"term": {
"brandId": "b1d28821-3730-4266-8f55-eb69596004fb"
}
}
]
}
}
}

nested aggregation elasticsearch with access parent field for sub-aggregation

I have following mappings
PUT prod_nested
{
"mappings": {
"default": {
"properties": {
"pkey": {
"type": "keyword"
},
"original_price": {
"type": "float"
},
"tags": {
"type": "nested",
"properties": {
"category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 30
}
}
},
"attribute": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 30
}
}
},
"original_price": {
"type": "float"
}
}
}
}
}
}
}
I am trying to do something like following sql aggregation
select tag_attribute,
tag_category,
avg(original_price)
FROM products
GROUP BY tag_category, tag_attribute
I am able to do the group-by part using nested aggregation on tags, but its not able to access the original_price in sub-aggregation. One option might be to duplicate the original_price inside the tags nested document, but I have millions of records to handle. My current aggregation is
GET prod_nested/_search?size=0
{
"aggs": {
"tags": {
"nested": {
"path": "tags"
},
"aggs": {
"categories": {
"terms": {
"field": "tags.category.keyword",
"size": 30
},
"aggs": {
"attributes": {
"terms": {
"field": "tags.attribute.keyword",
"size": 30
},
"aggs": {
"price": {
"avg": {
"field": "original_price"
}
}
}
}
}
}
}
}
}
}
Thanks, in advance.
I was able to get the desired results by using reverse_nested aggregation.
GET prod_nested/_search?size=0
{
"aggs": {
"tags": {
"nested": {
"path": "tags"
},
"aggs": {
"categories": {
"terms": {
"field": "tags.category.keyword",
"size": 10
},
"aggs": {
"attributes": {
"terms": {
"field": "tags.attribute.keyword",
"size": 10
},
"aggs": {
"parent_doc_price": {
"reverse_nested": {},
"aggs": {
"avg_price": {
"avg": {
"field": "original_price"
}
}
}
}
}
}
}
}
}
}
}
}
i think what you want is not possible.
but how about change your mapping?
{
"mappings": {
"default": {
"properties": {
"pkey": {
"type": "keyword"
},
"original_price": {
"type": "float"
},
"tags": {
"type": "nested",
"properties": {
"category_attribute": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 30
}
}
},
"category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 30
}
}
},
"attribute": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 30
}
}
},
"original_price": {
"type": "float"
}
}
}
}
}
}
}
you can use category_attribute.
and your aggregation will be below.
GET prod_nested/_search?size=0
{
"aggs": {
"tags": {
"nested": {
"path": "tags"
},
"aggs": {
"category_attribute": {
"terms": {
"field": "tags.category_attribute.keyword",
"size": 30
},
"aggs": {
"price": {
"avg": {
"field": "original_price"
}
}
}
}
}
}
}
}

Resources