nested aggregation elasticsearch with access parent field for sub-aggregation - elasticsearch

I have following mappings
PUT prod_nested
{
"mappings": {
"default": {
"properties": {
"pkey": {
"type": "keyword"
},
"original_price": {
"type": "float"
},
"tags": {
"type": "nested",
"properties": {
"category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 30
}
}
},
"attribute": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 30
}
}
},
"original_price": {
"type": "float"
}
}
}
}
}
}
}
I am trying to do something like following sql aggregation
select tag_attribute,
tag_category,
avg(original_price)
FROM products
GROUP BY tag_category, tag_attribute
I am able to do the group-by part using nested aggregation on tags, but its not able to access the original_price in sub-aggregation. One option might be to duplicate the original_price inside the tags nested document, but I have millions of records to handle. My current aggregation is
GET prod_nested/_search?size=0
{
"aggs": {
"tags": {
"nested": {
"path": "tags"
},
"aggs": {
"categories": {
"terms": {
"field": "tags.category.keyword",
"size": 30
},
"aggs": {
"attributes": {
"terms": {
"field": "tags.attribute.keyword",
"size": 30
},
"aggs": {
"price": {
"avg": {
"field": "original_price"
}
}
}
}
}
}
}
}
}
}
Thanks, in advance.

I was able to get the desired results by using reverse_nested aggregation.
GET prod_nested/_search?size=0
{
"aggs": {
"tags": {
"nested": {
"path": "tags"
},
"aggs": {
"categories": {
"terms": {
"field": "tags.category.keyword",
"size": 10
},
"aggs": {
"attributes": {
"terms": {
"field": "tags.attribute.keyword",
"size": 10
},
"aggs": {
"parent_doc_price": {
"reverse_nested": {},
"aggs": {
"avg_price": {
"avg": {
"field": "original_price"
}
}
}
}
}
}
}
}
}
}
}
}

i think what you want is not possible.
but how about change your mapping?
{
"mappings": {
"default": {
"properties": {
"pkey": {
"type": "keyword"
},
"original_price": {
"type": "float"
},
"tags": {
"type": "nested",
"properties": {
"category_attribute": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 30
}
}
},
"category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 30
}
}
},
"attribute": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 30
}
}
},
"original_price": {
"type": "float"
}
}
}
}
}
}
}
you can use category_attribute.
and your aggregation will be below.
GET prod_nested/_search?size=0
{
"aggs": {
"tags": {
"nested": {
"path": "tags"
},
"aggs": {
"category_attribute": {
"terms": {
"field": "tags.category_attribute.keyword",
"size": 30
},
"aggs": {
"price": {
"avg": {
"field": "original_price"
}
}
}
}
}
}
}
}

Related

Aggregation based off of nested document field with filters on nested document and parent

I have the following mapping:
{
"accountId": {
"type": "long"
},
"storeProductId": {
"type": "long"
},
"storeSchemaId": {
"type": "long"
},
"yoyoValues": {
"type": "nested",
"properties": {
"yoyoNameId": {
"type": "long"
},
"dataType": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "long"
},
"languageId": {
"type": "long"
},
"value_Number": {
"type": "float"
},
"value_Raw": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
and I'm trying to get the max and min values for value_number for all nested documents with yoyoNameId of 3 that also has a parent document with an accountId of 1285 and storeSchemaId of 241.
Everytime I've tried, I've been unable to properly filter the nested documents so it ends up being the min and max values for all nested documents with the correct parent document values.
I've tried several different queries but my most recent one is as follows:
{
"size": 0,
"aggs": {
"filter-layer": {
"filters": {
"filters": [
{
"term": {
"accountId": 1285
}
},
{
"term": {
"yoyoSchemaId": 241
}
},
{
"nested": {
"path": "yoyoValues",
"query": {
"bool": {
"filter": [
{
"term": {
"yoyoValues.yoyoNameId": 3
}
}
]
}
}
}
}
]
},
"aggs": {
"yoyoValues": {
"nested": {
"path": "yoyoValues"
},
"inner": {
"filter": {
"term": {
"yoyoValues.yoyoNameId": 3
}
},
"aggs": {
"min_value": {
"min": {
"field": "yoyoValues.value_Number"
}
},
"max_value": {
"max": {
"field": "yoyoValues.value_Number"
}
}
}
}
}
}
}
}
}
Can someone please help me correct this query? I'm limited to elastic v7.13.

How to do sorting on a field with composite aggregation in elastic search

How to do sorting on a field with composite aggregation in elastic search.
We are using elastic search version 6.8.6 and trying to achieve sorting on a field with composite aggregation.
But we are not able to get expected results with aggregation.
This is our mapping
{
"properties": {
"department": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"project": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"billingUnit": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"billingType": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"application": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"environmet": {
"type": "text",
"fields": {
"keyword": {
"ignore_above": 256.0,
"type": "keyword"
}
}
},
"cost": {
"type": "float"
}
}
}
By using the following query we are not able to do sorting, The results are not in alphabetical orders :
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"department": {
"query": "HR",
"slop": 0,
"zero_terms_query": "NONE",
"boost": 1.0
}
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"sort": [
{
"project.keyword": {
"order": "desc"
}
}
],
"aggs": {
"TERM_RANGE": {
"composite": {
"size": 10000,
"sources": [
{
"billingUnitKey": {
"terms": {
"field": "billingUnit.keyword",
"missing_bucket": false
}
}
},
{
"billingTypeKey": {
"terms": {
"field": "billingType.keyword",
"missing_bucket": false
}
}
}
]
},
"aggregations": {
"TOTAL": {
"sum": {
"field": "cost"
}
},
"dataHits": {
"top_hits": {
"from": 0,
"size": 1,
"version": false,
"seq_no_primary_term": false,
"explain": false,
"_source": {
"includes": [
"application.keyword",
"environmet.keyword",
],
"excludes": []
},
"docvalue_fields": [
{
"field": "application.keyword"
},
{
"field": "environmet.keyword"
}
]
}
},
"paginate_bucket": {
"bucket_sort": {
"sort": [],
"from": 0,
"size": 100,
"gap_policy": "SKIP"
}
}
}
}
}
}
Sorting is working fine with following query without aggregation
{
"query": {
"match": {
"department": "HR"
}
},
"size": 100,
"sort": [
{
"project.keyword": {
"order": "desc"
}
}
]
}
You should use order key of composite aggregation
https://www.elastic.co/guide/en/elasticsearch/reference/7.8/search-aggregations-bucket-composite-aggregation.html#_order

Nested aggregation in nested field?

I am new to elasticsearch and don't know a lot about aggregations but I have this ES6 mapping:
{
"mappings": {
"test": {
"properties": {
"id": {
"type": "integer"
}
"countries": {
"type": "nested",
"properties": {
"global_id": {
"type": "keyword"
},
"name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
}
}
},
"areas": {
"type": "nested",
"properties": {
"global_id": {
"type": "keyword"
},
"name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"parent_global_id": {
"type": "keyword"
}
}
}
}
}
}
}
How can I get all documents grouped by areas which is then grouped by countries. Also the document has to be returned in full, not just the nested document. Is this even possible ?
1) Aggregation _search query:
first agg by area, with the path as this is nested. Then reverse to the root document and nested agg to country.
{
"size": 0,
"aggs": {
"agg_areas": {
"nested": {
"path": "areas"
},
"aggs": {
"areas_name": {
"terms": {
"field": "areas.name"
},
"aggs": {
"agg_reverse": {
"reverse_nested": {},
"aggs": {
"agg_countries": {
"nested": {
"path": "countries"
},
"aggs": {
"countries_name": {
"terms": {
"field": "countries.name"
}
}
}
}
}
}
}
}
}
}
}
}
2) retrieve documents:
add a tophits inside your aggregation:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-top-hits-aggregation.html
top_hits is slow so you will have to read documentation and adjust size and sort to your context.
...
"terms": {
"field": "areas.name"
},
"aggregations": {
"hits": {
"top_hits": { "size": 100}
}
},
...

Elasticsearch min price of a month

I would like to receive the lowest prices for the next and previous 15 days from my chosen date in my products index.
How can I get this prices in ES? What kind of query should I write?
My mapping:
{
"product-data": {
"mappings": {
"mine-apple": {
"properties": {
"date": {
"type": "date"
},
"productName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"productDescription": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"price": {
"type": "long"
},
"query": {
"properties": {
"match_all": {
"type": "object"
}
}
}
}
}
}
}
}
Thanks in advance.
The solution I found: I added date-histogram to my query.In this way, grouping my query with date-histogram. At the latest I get minimum prices with minimum aggregation.
{
"query": {
"bool": {
"must": [
{
"range": {
"date": {
"gte": "2017-05-11",
"lte": "2017-05-14"
}
}
}
]
}
},
"size": 0,
"aggs": {
"sales_per_month": {
"date_histogram": {
"format": "YYYY-MM-dd",
"field": "date",
"interval": "day"
},
"aggs": {
"sales": {
"min": {
"field": "price"
}
}
}
}
}
}

Elastic search top_hits aggregation on nested

I have an index which contains CustomerProfile documents. Each of this document in the CustomerInsightTargets(with the properties Source,Value) property can be an array with x items. What I am trying to achieve is an autocomplete (of top 5) on CustomerInsightTargets.Value grouped by CustomerInisghtTarget.Source.
It will be helpful if anyone gives me hint about how to select only a subset of nested objects from each document and use that nested obj in aggregations.
{
"customerinsights": {
"aliases": {},
"mappings": {
"customerprofile": {
"properties": {
"CreatedById": {
"type": "long"
},
"CreatedDateTime": {
"type": "date"
},
"CustomerInsightTargets": {
"type": "nested",
"properties": {
"CustomerInsightSource": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"CustomerInsightValue": {
"type": "text",
"term_vector": "yes",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
},
"analyzer": "ngram_tokenizer_analyzer"
},
"CustomerProfileId": {
"type": "long"
},
"Guid": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
}
}
},
"DisplayName": {
"type": "text",
"term_vector": "yes",
"analyzer": "ngram_tokenizer_analyzer"
},
"Email": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"Id": {
"type": "long"
},
"ImageUrl": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
},
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "customerinsights",
"creation_date": "1484860145041",
"analysis": {
"analyzer": {
"ngram_tokenizer_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer"
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "10"
}
}
},
"number_of_replicas": "2",
"uuid": "nOyI0O2cTO2JOFvqIoE8JQ",
"version": {
"created": "5010199"
}
}
}
}
}
Having as example a document:
{
{
"Id": 9072856,
"CreatedDateTime": "2017-01-12T11:26:58.413Z",
"CreatedById": 9108469,
"DisplayName": "valentinos",
"Email": "valentinos#mail.com",
"CustomerInsightTargets": [
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Tags",
"CustomerInsightValue": "Tag1",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "ProfileName",
"CustomerInsightValue": "valentinos",
"Guid": "00000000-0000-0000-0000-000000000000"
},
{
"Id": 160,
"CustomerProfileId": 9072856,
"CustomerInsightSource": "Playground",
"CustomerInsightValue": "Wiki",
"Guid": "00000000-0000-0000-0000-000000000000"
}
]
}
}
If i ran an aggregation on the top_hits the result will include all targets from a document -> if one of them match my search text.
Example
GET customerinsights/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "CustomerInsightTargets",
"query": {
"bool": {
"must": [
{
"match": {
"CustomerInsightTargets.CustomerInsightValue": {
"query": "2017",
"operator": "AND",
"fuzziness": 2
}
}
}
]
}
}
}
}
]
}
} ,
"aggs": {
"root": {
"nested": {
"path": "CustomerInsightTargets"
},
"aggs": {
"top_tags": {
"terms": {
"field": "CustomerInsightTargets.CustomerInsightSource.keyword"
},
"aggs": {
"top_tag_hits": {
"top_hits": {
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"size": 5,
"_source": "CustomerInsightTargets"
}
}
}
}
}
}
},
"size": 0,
"_source": "CustomerInsightTargets"
}
My question is how I should use the aggregation to get the "autocomplete" Values grouped by Source and order by the _score. I tried to use a significant_terms aggregation but doesn't work so well, also terms aggs doesn't sort by score (and by _count) and having fuzzy also adds complexity.

Resources