Elasticsearch: terms aggregations on doubly nested object - elasticsearch

I am trying to do a doubly nested aggregation on a doubly nested object. That is, I have the root document, a child property, and a grand-child property. To be more precise, I have the following mapping:
{
"mappings": {
"root": {
"properties": {
"fields": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"selections": {
"type": "nested",
"properties": {
"value": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
I am trying to aggregate selection value counts per field, or in other words, to count the number of occurrences of each value for each field name, accross all root objects.
I have this:
{
"query": {
...
},
"aggregations": {
"fields": {
"nested": {
"path": "fields"
},
"aggregations": {
"name": {
"terms": {
"field": "fields.name"
},
"aggregations": {
"values": {
"nested": {
"path": "selections"
},
"aggregations": {
"value": {
"terms": {
"field": "selections.value"
}
}
}
}
}
}
}
}
}
}
which gets the field names as I want but for each of them I get no doc counts for the values.
What am I doing wrong?

You need to give full name for inner nested field, Change "path":"selections" to "path":"fields.selections"
{
"size": 0,
"aggregations": {
"fields": {
"nested": {
"path": "fields"
},
"aggregations": {
"name": {
"terms": {
"field": "fields.name"
},
"aggregations": {
"values": {
"nested": {
"path": "fields.selections"
},
"aggregations": {
"value": {
"terms": {
"field": "fields.selections.value"
}
}
}
}
}
}
}
}
}
}
Result:
"aggregations" : {
"fields" : {
"doc_count" : 2,
"name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "abc",
"doc_count" : 2,
"values" : {
"doc_count" : 2,
"value" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1",
"doc_count" : 2
}
]
}
}
}
]
}
}
}

Related

Filter in aggregation

say I have mapping:
{
// ...other fields,
"locations": {
"type": "nested",
"properties": {
"countrySlug": { "type": "keyword" },
"citySlug": { "type": "keyword" }
}
}
}
So this way, each document can have multiple locations:
{
"locations": [
{
"countrySlug": "germany",
"citySlug": "berlin"
},
{
"countrySlug": "germany",
"citySlug": "hamburg"
},
{
"countrySlug": "poland",
"citySlug": "krakow"
},
{
"countrySlug": "italy",
"citySlug": "milan"
}
]
}
Now I want to get aggregation of city slugs where location contains countrySlug = "germany".
My query looks like this:
{
"_source": false,
"aggs": {
"cities": {
"filter": {
"bool": {
"must": [
{
"bool": {
"should": [
{
"nested": {
"path": "locations",
"query": {
"bool": {
"must": {
"term": {
"locations.countrySlug": "germany"
}
}
}
}
}
}
]
}
}
]
}
},
"aggs": {
"agg": {
"nested": {
"path": "locations"
},
"aggs": {
"slugs": {
"terms": {
"field": "locations.citySlug",
"size": 5
},
"aggs": {
"top_reverse_nested": {
"reverse_nested": {}
}
}
}
}
}
}
}
},
"size": 0
}
But it returns all city slugs that were found, eg:
berlin: 2
krakow: 1
milan: 3
My goal is to get just:
berlin: 2
(or other city slugs that are related to a location with countrySlug = "germany")
Am I missing anything? How to make something like "post filter" for aggregations?
Thanks, PS
After filtering out all the documents where countrySlug is germany, you can put a nested aggregation on the returned records.
GET /cities/_search
{
"size": 0,
"aggs": {
"cities": {
"nested": {
"path": "locations"
},
"aggs": {
"filter_cities": {
"filter": {
"bool": {
"filter": [
{
"term": {
"locations.countrySlug": "germany"
}
}
]
}
},
"aggs": {
"cities": {
"terms": {
"field": "locations.citySlug"
}
}
}
}
}
}
}
}
The result for the above query:
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"cities" : {
"doc_count" : 17,
"filter_cities" : {
"doc_count" : 9,
"cities" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "hamburg",
"doc_count" : 5
},
{
"key" : "berlin",
"doc_count" : 4
}
]
}
}
}
}
}

elasticsearch find documents where the given number items in array has the same property value

first of all I would like to show simplified structure of document.
{
"_id": "413123123",
"_source": {
"description": {
"firstLine": "this is my description",
"secondLine": "some value"
},
"InsertDetails": {
"Timestamp": "2020-06-12T11:14:36+0000"
},
"Links": [
{
"LinkDetails": {
"linkId": 2342,
"type": "Link",
"dateCreation": "2012-09-21T08:42:09+0000",
"typeId": 404019,
"typeOfLink": "http"
}
},
{
"LinkDetails": {
"linkId": 321313,
"type": "Link",
"dateCreation": "2012-08-21T08:42:09+0000",
"typeId": 404019,
"typeOfLink": "http"
}
},
{
"LinkDetails": {
"linkId": 1231,
"type": "Link",
"dateCreation": "2012-09-21T08:42:09+0000",
"typeId": 32323,
"typeOfLink": "https"
}
},
{
"LinkDetails": {
"linkId": 53434,
"type": "Link",
"dateCreation": "2012-11-21T08:42:09+0000",
"typeId": 123231,
"typeOfLink": "wss"
}
}
]
}
}
I have a problem with forming query, which would find documents, where the following requirements are met:
two items in Links arrays has typeOfLink equal to http
description string contains word "this"
found items will be sorted by date desc
The version of elasticsearch is 2.3.2
I've tried with query such like this:
{
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match": {
"Links.LinkDetails.typeOfLink": "http"
}
}
],
"minimum_should_match": 2
}
},
{
"match": {
"description.firstLine": "this"
}
}
]
}
},
"sort": [
{
"InsertDetails.Timestamp": {
"order": "desc"
}
}
]
}
The problem is that this query returns me also the documents, which has only one item in the array with the given value. I've tried to modify this query in different ways, but without any luck.
Added mapping
{
"my_index": {
"mappings": {
"en": {
"properties": {
"InsertDetails": {
"properties": {
"Timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
}
}
},
"description": {
"properties": {
"firstLine": {
"type": "string"
},
"secondLine": {
"type": "string"
}
}
},
"Links": {
"properties": {
"LinkDetails": {
"properties": {
"linkId": {
"type": "long"
},
"type": {
"type": "string"
},
"dateCreation": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"typeOfLink": {
"type": "string"
},
"typeId": {
"type": "long"
}
}
}
}
}
}
}
}
}
}
At first, you want to filter on a nested field. (array of object)
To have coherent result you must have to map this field as a nested one.
https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html
Then, you will have to use aggregations.
What you want is to aggregate only "http" values for type_of_link, and return results if the aggregation return more than 2 results.
You query will be a little more complicated:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "Links",
"query": {
"match": {
"Links.LinkDetails.typeOfLink": "http"
}
}
}
},
{
"match": {
"description.firstLine": "this"
}
}
]
}
},
"aggs": {
"links": {
"nested": {
"path": "Links"
},
"aggs": {
"http_only": {
"filter": {
"term": {
"Links.LinkDetails.typeOfLink.keyword": "http"
}
},
"aggs": {
"several_http": {
"terms": {
"field": "Links.LinkDetails.typeOfLink.keyword",
"min_doc_count": 2
}
,
"aggs": {
"complete_match": {
"top_hits": {
"size": 100
}
}
}
}
}
}
}
}
},
"sort": [
{
"InsertDetails.Timestamp": {
"order": "desc"
}
}
]
}
And your response will looks like:
"aggregations" : {
"links" : {
"doc_count" : 4,
"http_only" : {
"doc_count" : 2,
"several_http" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "http",
"doc_count" : 2,
"complete_match" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.98082924,
"hits" : [
{
"_index" : "test3",
"_type" : "_doc",
"_id" : "ed1AkXQBD_dLYq-V78bD",
"_nested" : {
"field" : "Links",
"offset" : 0
},
"_score" : 0.98082924,
"_source" : {
"LinkDetails" : {
"linkId" : 2342,
"type" : "Link",
"dateCreation" : "2012-09-21T08:42:09+0000",
"typeId" : 404019,
"typeOfLink" : "http"
}
}
},
{
"_index" : "test3",
"_type" : "_doc",
"_id" : "ed1AkXQBD_dLYq-V78bD",
"_nested" : {
"field" : "Links",
"offset" : 1
},
"_score" : 0.98082924,
"_source" : {
"LinkDetails" : {
"linkId" : 321313,
"type" : "Link",
"dateCreation" : "2012-08-21T08:42:09+0000",
"typeId" : 404019,
"typeOfLink" : "http"
}
}
}
]
}
}
}
]
}
}
}
}
By playing with the given aggregation you should be able to do what you want.

How to filter nested aggregations in ElasticSearch?

For example, let's assume we have a product index with the following mapping:
{
"product": {
"mappings": {
"producttype": {
"properties": {
"id": {
"type": "keyword"
},
"productAttributes": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "text",
"analyzer": "keyword"
}
},
"analyzer": "standard"
}
}
}
}
}
}
I am trying to find how many products which have specific product attributes using the following query(I am using a fuzzy query to allow some edit distance):
{
"size": 0,
"query": {
"nested": {
"query": {
"fuzzy": {
"productAttributes.name": {
"value": "SSD"
}
}
},
"path": "productAttributes"
}
},
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
But it returns all product attributes for each matched document and here is the response I get.
"aggregations" : {
"product_attribute_nested_agg" : {
"doc_count" : 6,
"terms_nested_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "SSD",
"doc_count" : 3
},
{
"key" : "USB 2.0",
"doc_count" : 3
}
]
}
}
}
Could you please guide me to how to filter buckets to only return matched attributes?
Edit:
Here are some document samples:
"hits" : {
"total" : 12,
"max_score" : 1.0,
"hits" : [
{
"_index" : "product",
"_type" : "producttype",
"_id" : "677d1164-c401-4d36-8a08-6aa14f7f32bb",
"_score" : 1.0,
"_source" : {
"title" : "Dell laptop",
"productAttributes" : [
{
"name" : "USB 2.0",
"value" : "4"
},
{
"name" : "SSD",
"value" : "250 GB"
}
]
}
},
{
"_index" : "product",
"_type" : "producttype",
"_id" : "2954935a-7f60-437a-8a54-00da2d71da46",
"_score" : 1.0,
"_source" : {
"productAttributes" : [
{
"name" : "USB 2.0",
"value" : "3"
},
{
"name" : "SSD",
"value" : "500 GB"
}
],
"title" : "HP laptop"
}
},
]
}
To filter only specific, you can use filter queries.
Query:
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"terms": {
"productAttributes.name": [
"SSD"
]
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}
This is what it does the trick:
"filter": {
"terms": {
"productAttributes.name": [
"SSD"
]
}
}
You need to do filter part of the aggregation.
Output:
"aggregations": {
"product_attribute_nested_agg": {
"doc_count": 4,
"inner": {
"doc_count": 2,
"terms_nested_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "SSD",
"doc_count": 2
}
]
}
}
}
}
Filtering using Fuzziness :
GET /product/_search
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"fuzzy": {
"productAttributes.name": {
"value": "SSt",//here will match SSD
"fuzziness": 3//you can remove it to be as Auto
}
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}

group by nested and non nested fields in es

Hi i am trying to do group by nested and non nested fields.I want to do group by on 1 non nested fields(from_district) ,1 nested field(truck_number) and max on nested field(truck_number.score).
Requirement -: to get max score of each truck in all districts if truck is present in that district for a given sp_id
eg-:
District1 ,truck1, 0.9,
District2 ,truck1, 0.8,
District1 ,truck2, 1.8,
District2 ,truck3, 0.7,
District3 ,truck4, 1.7
Below is my mapping
{
"sp_ranked_indent" : {
"mappings" : {
"properties" : {
"from_district" : {
"type" : "keyword"
},
"sp_id" : {
"type" : "long"
},
"to_district" : {
"type" : "keyword"
},
"truck_ranking_document" : {
"type" : "nested",
"properties" : {
"score" : {
"type" : "float"
},
"truck_number" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
}
Below is the query that i tried but it is not grouping by nested and non nested field and also the max truck score is incorrect
{
"size": 0,
"query": {
"terms": {
"sp_id": [650128],
"boost": 1.0
}
},
"aggregations": {
"NESTED_AGG": {
"nested": {
"path": "truck_ranking_document"
},
"aggregations": {
"max_score": {
"max": {
"field": "truck_ranking_document.score"
}
},
"truck_numer": {
"terms": {
"field": "truck_ranking_document.truck_number.keyword",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [{
"_count": "desc"
}, {
"_key": "asc"
}]
}
},
"fromdistrictagg": {
"reverse_nested": {},
"aggregations": {
"fromDistrict": {
"terms": {
"field": "from_district",
"size": 10,
"min_doc_count": 1,
"shard_min_doc_count": 0,
"show_term_doc_count_error": false,
"order": [{
"_count": "desc"
}, {
"_key": "asc"
}]
}
}
}
}
}
}
}
}
I think this can be done using term and nested aggregation. Below query will produce output in follwing format
District1
Truck1
Max score
Truck2
Max score
Truck3
Max score
District2
Truck1
Max score
Truck2
Max score
Truck3
Max score
Query:
{
"query": {
"terms": {
"sp_id": [
1
]
}
},
"aggs": {
"district": {
"terms": {
"field": "from_district",
"size": 10
},
"aggs": {
"trucks": {
"nested": {
"path": "truck_ranking_document"
},
"aggs": {
"truck_no": {
"terms": {
"field": "truck_ranking_document.truck_number.keyword",
"size": 10
},
"aggs": {
"max_score": {
"max": {
"field": "truck_ranking_document.score"
}
},
"select": {
"bucket_selector": {
"buckets_path": {
"score": "max_score"
},
"script": "if(params.score>0) return true;"
}
}
}
}
}
},
"min_bucket_selector": {
"bucket_selector": {
"buckets_path": {
"count": "trucks>truck_no._bucket_count"
},
"script": {
"inline": "params.count != 0"
}
}
}
}
}
}
}
Result:
"aggregations" : {
"district" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "District1",
"doc_count" : 1,
"trucks" : {
"doc_count" : 2,
"truck_no" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1",
"doc_count" : 1,
"max_score" : {
"value" : 2.0
}
},
{
"key" : "3",
"doc_count" : 1,
"max_score" : {
"value" : 3.0
}
}
]
}
}
}
]
}
Composite Aggregation
Composite aggregation response contains an after_key
"after_key" : {
"district" : "District4"
}
you need to use the after parameter to retrieve the next results
{
"aggs": {
"my_buckets": {
"composite": {
"size": 100,
"sources": [
{
"district": {
"terms": {
"field": "from_district"
}
}
}
]
},
"aggs": {
"trucks": {
"nested": {
"path": "truck_ranking_document"
},
"aggs": {
"truck_no": {
"terms": {
"field": "truck_ranking_document.truck_number.keyword",
"size": 10
},
"aggs": {
"max_score": {
"max": {
"field": "truck_ranking_document.score"
}
},
"select": {
"bucket_selector": {
"buckets_path": {
"score": "max_score"
},
"script": "if(params.score>0) return true;"
}
}
}
}
}
}
}
}
}
}

How to do two nested aggregations in elasticsearch?

City and home type are two nested objects in the following document mapping:
"mappings" : {
"home_index_doc" : {
"properties" : {
"city" : {
"type" : "nested",
"properties" : {
"country" : {
"type" : "nested",
"properties" : {
"name" : {
"type" : "text"
}
}
},
"name" : {
"type" : "keyword"
}
}
},
"home_type" : {
"type" : "nested",
"properties" : {
"name" : {
"type" : "keyword"
}
}
},
...
}
}
}
I am trying to do the following aggregation:
Take all present documents and show all home_types per city.
I imagine it should look similar to:
"aggregations": {
"all_cities": {
"buckets": [
{
"key": "Tokyo",
"doc_count": 12,
"home_types": {
"buckets": [
{
"key": "apartment",
"doc_count": 5
},
{
"key": "house",
"doc_count": 12
}
]
}
},
{
"key": "New York",
"doc_count": 1,
"home_types": {
"buckets": [
{
"key": "house",
"doc_count": 1
}
]
}
}
]
}
}
After trying gazzilion aproaches and combinations, I've made it that far with Kibana:
GET home-index/home_index_doc/_search
{
"size": 0,
"aggs": {
"all_cities": {
"nested": {
"path": "city"
},
"aggs": {
"city_name": {
"terms": {
"field": "city.name"
}
}
}
},
"aggs": {
"all_home_types": {
"nested": {
"path": "home_type"
},
"aggs": {
"home_type_name": {
"terms": {
"field": "home_type.name"
}
}
}
}
}
}
}
and I get the following exception:
"type": "unknown_named_object_exception",
"reason": "Unknown BaseAggregationBuilder [all_home_types]",
You need to use reverse_nested in order to jump out of the city nested type back at the root level and do another nested aggregation for the home_type nested type. Basically, like this:
{
"size": 0,
"aggs": {
"all_cities": {
"nested": {
"path": "city"
},
"aggs": {
"city_name": {
"terms": {
"field": "city.name"
},
"aggs": {
"by_home_types": {
"reverse_nested": {},
"aggs": {
"all_home_types": {
"nested": {
"path": "home_type"
},
"aggs": {
"home_type_name": {
"terms": {
"field": "home_type.name"
}
}
}
}
}
}
}
}
}
}
}
}

Resources