How to do two nested aggregations in elasticsearch? - elasticsearch

City and home type are two nested objects in the following document mapping:
"mappings" : {
"home_index_doc" : {
"properties" : {
"city" : {
"type" : "nested",
"properties" : {
"country" : {
"type" : "nested",
"properties" : {
"name" : {
"type" : "text"
}
}
},
"name" : {
"type" : "keyword"
}
}
},
"home_type" : {
"type" : "nested",
"properties" : {
"name" : {
"type" : "keyword"
}
}
},
...
}
}
}
I am trying to do the following aggregation:
Take all present documents and show all home_types per city.
I imagine it should look similar to:
"aggregations": {
"all_cities": {
"buckets": [
{
"key": "Tokyo",
"doc_count": 12,
"home_types": {
"buckets": [
{
"key": "apartment",
"doc_count": 5
},
{
"key": "house",
"doc_count": 12
}
]
}
},
{
"key": "New York",
"doc_count": 1,
"home_types": {
"buckets": [
{
"key": "house",
"doc_count": 1
}
]
}
}
]
}
}
After trying gazzilion aproaches and combinations, I've made it that far with Kibana:
GET home-index/home_index_doc/_search
{
"size": 0,
"aggs": {
"all_cities": {
"nested": {
"path": "city"
},
"aggs": {
"city_name": {
"terms": {
"field": "city.name"
}
}
}
},
"aggs": {
"all_home_types": {
"nested": {
"path": "home_type"
},
"aggs": {
"home_type_name": {
"terms": {
"field": "home_type.name"
}
}
}
}
}
}
}
and I get the following exception:
"type": "unknown_named_object_exception",
"reason": "Unknown BaseAggregationBuilder [all_home_types]",

You need to use reverse_nested in order to jump out of the city nested type back at the root level and do another nested aggregation for the home_type nested type. Basically, like this:
{
"size": 0,
"aggs": {
"all_cities": {
"nested": {
"path": "city"
},
"aggs": {
"city_name": {
"terms": {
"field": "city.name"
},
"aggs": {
"by_home_types": {
"reverse_nested": {},
"aggs": {
"all_home_types": {
"nested": {
"path": "home_type"
},
"aggs": {
"home_type_name": {
"terms": {
"field": "home_type.name"
}
}
}
}
}
}
}
}
}
}
}
}

Related

Elasticsearch cardinality multiple fields

I have an index which is as follows:
{
"_index" : "r2332",
"_type" : "_doc",
"_id" : "Vl81o3oBs8vUbHSMCZVQ",
"_score" : 1.0,
"_source" : {
"maid" : "d8ee3c5e-babb-4777-9cba-17fb0cd8e8a9",
"date" : "2021-06-09",
"hour" : 5,
"site_id" : 1035
}
},
{
"_index" : "r2332",
"_type" : "_doc",
"_id" : "Xl81o3oBs8vUbHSMCZVQ",
"_score" : 1.0,
"_source" : {
"maid" : "d8ee3c5e-babb-4777-9cba-17fb0cd8e8a9",
"date" : "2021-06-09",
"hour" : 5,
"site_id" : 1897
}
}
I am trying to get the unique count across maid, date. I am able to aggregate with one field maid but not both. The following are the codes that I tried.
Trial 1:
{
"size": 0,
"query": {
"bool": {
"filter": [{
"terms": {
"site_id": [7560, 7566]
}
}, {
"range": {
"date": {
"gte": "2021-09-01",
"lte": "2021-09-15"
}
}
}]
}
},
"runtime_mappings": {
"type_and_promoted": {
"type": "keyword",
"script": "emit(doc['maid'].value + ' ' + doc['date'].value)"
}
},
"aggs": {
"group_by": {
"terms": {
"field": "site_id",
"size": 100
},
"aggs": {
"bydate": {
"terms": {
"field": "date",
"size": 100
},
"aggs": {
"byhour": {
"terms": {
"field": "hour",
"size": 24
},
"aggs": {
"reverse_nested": {},
"uv": {
"cardinality": {
"field": "runtime_mappings"
}
}
}
}
}
}
}
}
}
}
This is giving an empty output.
Trial 2:
{
"size": 0,
"query": {
"bool": {
"filter": [{
"terms": {
"site_id": [7560, 7566]
}
}, {
"range": {
"date": {
"gte": "2021-09-01",
"lte": "2021-09-15"
}
}
}]
}
},
"aggs": {
"group_by": {
"terms": {
"field": "site_id",
"size": 100
},
"aggs": {
"bydate": {
"terms": {
"field": "date",
"size": 100
},
"aggs": {
"byhour": {
"terms": {
"field": "hour",
"size": 24
},
"aggs": {
"uv": {
"cardinality": {
"script": "doc['maid'].value + '#' +doc'date'].value"
}
}
}
}
}
}
}
}
}
}
This gives me syntax error at doc['maid'].value. How do I effectively combine two fields for cardinality. I am using Elasticsearch 7.13.2.
The mapping of my index is as follows:
"r2332" : {
"mappings" : {
"dynamic" : "false",
"properties" : {
"date" : {
"type" : "date"
},
"hour" : {
"type" : "integer"
},
"maid" : {
"type" : "keyword"
},
"reach" : {
"type" : "integer"
},
"site_id" : {
"type" : "keyword"
}
}
}
}
}
Modify your second query as shown below. You need to use the maid.keyword instead of the maid field in the cardinality aggregation to avoid the search_phase_execution_exception
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"terms": {
"site_id": [
7560,
7566
]
}
},
{
"range": {
"date": {
"gte": "2021-09-01",
"lte": "2021-09-15"
}
}
}
]
}
},
"aggs": {
"group_by": {
"terms": {
"field": "site_id",
"size": 100
},
"aggs": {
"bydate": {
"terms": {
"field": "date",
"size": 100
},
"aggs": {
"byhour": {
"terms": {
"field": "hour",
"size": 24
},
"aggs": {
"uv": {
"cardinality": {
"script": "doc['maid.keyword'].value + '#' +doc['date'].value"
}
}
}
}
}
}
}
}
}
}

elasticsearch find documents where the given number items in array has the same property value

first of all I would like to show simplified structure of document.
{
"_id": "413123123",
"_source": {
"description": {
"firstLine": "this is my description",
"secondLine": "some value"
},
"InsertDetails": {
"Timestamp": "2020-06-12T11:14:36+0000"
},
"Links": [
{
"LinkDetails": {
"linkId": 2342,
"type": "Link",
"dateCreation": "2012-09-21T08:42:09+0000",
"typeId": 404019,
"typeOfLink": "http"
}
},
{
"LinkDetails": {
"linkId": 321313,
"type": "Link",
"dateCreation": "2012-08-21T08:42:09+0000",
"typeId": 404019,
"typeOfLink": "http"
}
},
{
"LinkDetails": {
"linkId": 1231,
"type": "Link",
"dateCreation": "2012-09-21T08:42:09+0000",
"typeId": 32323,
"typeOfLink": "https"
}
},
{
"LinkDetails": {
"linkId": 53434,
"type": "Link",
"dateCreation": "2012-11-21T08:42:09+0000",
"typeId": 123231,
"typeOfLink": "wss"
}
}
]
}
}
I have a problem with forming query, which would find documents, where the following requirements are met:
two items in Links arrays has typeOfLink equal to http
description string contains word "this"
found items will be sorted by date desc
The version of elasticsearch is 2.3.2
I've tried with query such like this:
{
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match": {
"Links.LinkDetails.typeOfLink": "http"
}
}
],
"minimum_should_match": 2
}
},
{
"match": {
"description.firstLine": "this"
}
}
]
}
},
"sort": [
{
"InsertDetails.Timestamp": {
"order": "desc"
}
}
]
}
The problem is that this query returns me also the documents, which has only one item in the array with the given value. I've tried to modify this query in different ways, but without any luck.
Added mapping
{
"my_index": {
"mappings": {
"en": {
"properties": {
"InsertDetails": {
"properties": {
"Timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
}
}
},
"description": {
"properties": {
"firstLine": {
"type": "string"
},
"secondLine": {
"type": "string"
}
}
},
"Links": {
"properties": {
"LinkDetails": {
"properties": {
"linkId": {
"type": "long"
},
"type": {
"type": "string"
},
"dateCreation": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"typeOfLink": {
"type": "string"
},
"typeId": {
"type": "long"
}
}
}
}
}
}
}
}
}
}
At first, you want to filter on a nested field. (array of object)
To have coherent result you must have to map this field as a nested one.
https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html
Then, you will have to use aggregations.
What you want is to aggregate only "http" values for type_of_link, and return results if the aggregation return more than 2 results.
You query will be a little more complicated:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "Links",
"query": {
"match": {
"Links.LinkDetails.typeOfLink": "http"
}
}
}
},
{
"match": {
"description.firstLine": "this"
}
}
]
}
},
"aggs": {
"links": {
"nested": {
"path": "Links"
},
"aggs": {
"http_only": {
"filter": {
"term": {
"Links.LinkDetails.typeOfLink.keyword": "http"
}
},
"aggs": {
"several_http": {
"terms": {
"field": "Links.LinkDetails.typeOfLink.keyword",
"min_doc_count": 2
}
,
"aggs": {
"complete_match": {
"top_hits": {
"size": 100
}
}
}
}
}
}
}
}
},
"sort": [
{
"InsertDetails.Timestamp": {
"order": "desc"
}
}
]
}
And your response will looks like:
"aggregations" : {
"links" : {
"doc_count" : 4,
"http_only" : {
"doc_count" : 2,
"several_http" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "http",
"doc_count" : 2,
"complete_match" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.98082924,
"hits" : [
{
"_index" : "test3",
"_type" : "_doc",
"_id" : "ed1AkXQBD_dLYq-V78bD",
"_nested" : {
"field" : "Links",
"offset" : 0
},
"_score" : 0.98082924,
"_source" : {
"LinkDetails" : {
"linkId" : 2342,
"type" : "Link",
"dateCreation" : "2012-09-21T08:42:09+0000",
"typeId" : 404019,
"typeOfLink" : "http"
}
}
},
{
"_index" : "test3",
"_type" : "_doc",
"_id" : "ed1AkXQBD_dLYq-V78bD",
"_nested" : {
"field" : "Links",
"offset" : 1
},
"_score" : 0.98082924,
"_source" : {
"LinkDetails" : {
"linkId" : 321313,
"type" : "Link",
"dateCreation" : "2012-08-21T08:42:09+0000",
"typeId" : 404019,
"typeOfLink" : "http"
}
}
}
]
}
}
}
]
}
}
}
}
By playing with the given aggregation you should be able to do what you want.

How to filter nested aggregations in ElasticSearch?

For example, let's assume we have a product index with the following mapping:
{
"product": {
"mappings": {
"producttype": {
"properties": {
"id": {
"type": "keyword"
},
"productAttributes": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "text",
"analyzer": "keyword"
}
},
"analyzer": "standard"
}
}
}
}
}
}
I am trying to find how many products which have specific product attributes using the following query(I am using a fuzzy query to allow some edit distance):
{
"size": 0,
"query": {
"nested": {
"query": {
"fuzzy": {
"productAttributes.name": {
"value": "SSD"
}
}
},
"path": "productAttributes"
}
},
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
But it returns all product attributes for each matched document and here is the response I get.
"aggregations" : {
"product_attribute_nested_agg" : {
"doc_count" : 6,
"terms_nested_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "SSD",
"doc_count" : 3
},
{
"key" : "USB 2.0",
"doc_count" : 3
}
]
}
}
}
Could you please guide me to how to filter buckets to only return matched attributes?
Edit:
Here are some document samples:
"hits" : {
"total" : 12,
"max_score" : 1.0,
"hits" : [
{
"_index" : "product",
"_type" : "producttype",
"_id" : "677d1164-c401-4d36-8a08-6aa14f7f32bb",
"_score" : 1.0,
"_source" : {
"title" : "Dell laptop",
"productAttributes" : [
{
"name" : "USB 2.0",
"value" : "4"
},
{
"name" : "SSD",
"value" : "250 GB"
}
]
}
},
{
"_index" : "product",
"_type" : "producttype",
"_id" : "2954935a-7f60-437a-8a54-00da2d71da46",
"_score" : 1.0,
"_source" : {
"productAttributes" : [
{
"name" : "USB 2.0",
"value" : "3"
},
{
"name" : "SSD",
"value" : "500 GB"
}
],
"title" : "HP laptop"
}
},
]
}
To filter only specific, you can use filter queries.
Query:
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"terms": {
"productAttributes.name": [
"SSD"
]
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}
This is what it does the trick:
"filter": {
"terms": {
"productAttributes.name": [
"SSD"
]
}
}
You need to do filter part of the aggregation.
Output:
"aggregations": {
"product_attribute_nested_agg": {
"doc_count": 4,
"inner": {
"doc_count": 2,
"terms_nested_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "SSD",
"doc_count": 2
}
]
}
}
}
}
Filtering using Fuzziness :
GET /product/_search
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"fuzzy": {
"productAttributes.name": {
"value": "SSt",//here will match SSD
"fuzziness": 3//you can remove it to be as Auto
}
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}

Elasticsearch: terms aggregations on doubly nested object

I am trying to do a doubly nested aggregation on a doubly nested object. That is, I have the root document, a child property, and a grand-child property. To be more precise, I have the following mapping:
{
"mappings": {
"root": {
"properties": {
"fields": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"selections": {
"type": "nested",
"properties": {
"value": {
"type": "keyword"
}
}
}
}
}
}
}
}
}
I am trying to aggregate selection value counts per field, or in other words, to count the number of occurrences of each value for each field name, accross all root objects.
I have this:
{
"query": {
...
},
"aggregations": {
"fields": {
"nested": {
"path": "fields"
},
"aggregations": {
"name": {
"terms": {
"field": "fields.name"
},
"aggregations": {
"values": {
"nested": {
"path": "selections"
},
"aggregations": {
"value": {
"terms": {
"field": "selections.value"
}
}
}
}
}
}
}
}
}
}
which gets the field names as I want but for each of them I get no doc counts for the values.
What am I doing wrong?
You need to give full name for inner nested field, Change "path":"selections" to "path":"fields.selections"
{
"size": 0,
"aggregations": {
"fields": {
"nested": {
"path": "fields"
},
"aggregations": {
"name": {
"terms": {
"field": "fields.name"
},
"aggregations": {
"values": {
"nested": {
"path": "fields.selections"
},
"aggregations": {
"value": {
"terms": {
"field": "fields.selections.value"
}
}
}
}
}
}
}
}
}
}
Result:
"aggregations" : {
"fields" : {
"doc_count" : 2,
"name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "abc",
"doc_count" : 2,
"values" : {
"doc_count" : 2,
"value" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "1",
"doc_count" : 2
}
]
}
}
}
]
}
}
}

ElasticSearch Nested Filter and Aggregation

So I have this mapping:
"employee": {
"properties": {
"DaysOff": {
"type": "nested",
"properties": {
"Date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"Days": {
"type": "double"
},
"ID": {
"type": "long"
}
}
}
}
}
So basically a employee can have days off. Each day off they have is stored in an array under the property DaysOff. Days can be a fraction of a day, so if an employee took half a day off then it would be 0.5.
So I have this search:
{
"size": 45,
"filter": {
"nested": {
"path": "DaysOff",
"filter": {
"range": {
"DaysOff.Date": {
"from": "now-2M",
"to": "now"
}
}
}
}
}
}
which brings me back 45 documents. which is correct. I'm just can't figure out how to now apply an aggregation to these documents in order to get back the sum of all the days that have been taken.
Using this resource I tried this aggs but didn't get me the correct result:
{
"size": 45,
"filter": {
"nested": {
"path": "DaysOff",
"filter": {
"range": {
"DaysOff.Date": {
"from": "now-2M",
"to": "now"
}
}
}
}
},
"aggs": {
"sum_docs": {
"nested": {
"path": "DaysOff"
},
"aggs": {
"stepped_down": {
"sum": {
"field": "DaysOff.Days"
}
}
}
}
}
}
You need to filter on those nested documents to get the correct results, From the docs
Because nested documents are indexed as separate documents, they can only be accessed within the scope of the nested query,
I created index like this
POST employee
{
"mappings": {
"emp_map": {
"properties": {
"DaysOff": {
"type": "nested",
"properties": {
"Date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"Days": {
"type": "double"
},
"ID": {
"type": "long"
}
}
},
"name": {
"type": "string"
}
}
}
}
},
Then I indexed few documents like this,
PUT employee/emp_map/1
{
"name" : "messi",
"DaysOff" : [
{
"Date" : "2015-11-01",
"Days" : 1,
"ID" : 11
},
{
"Date" : "2014-11-01",
"Days" : 2,
"ID" : 11
},
{
"Date" : "2015-12-01",
"Days" : 0.5,
"ID" : 11
}
]
}
PUT employee/emp_map/2
{
"name" : "ronaldo",
"DaysOff" : [
{
"Date" : "2015-10-01",
"Days" : 3,
"ID" : 12
},
{
"Date" : "2014-11-01",
"Days" : 2,
"ID" : 12
},
{
"Date" : "2015-12-01",
"Days" : 0.5,
"ID" : 12
}
]
}
PUT employee/emp_map/3
{
"name" : "suarez",
"DaysOff" : [
{
"Date" : "2015-11-01",
"Days" : 4,
"ID" : 13
},
{
"Date" : "2015-11-09",
"Days" : 2,
"ID" : 13
},
{
"Date" : "2015-12-01",
"Days" : 1.5,
"ID" : 13
}
]
}
This is my query, notice the filter aggregation in nested aggregation, without that ES will give you sum of all the days taken off.
GET employee/_search
{
"query": {
"bool": {
"filter": {
"nested": {
"path": "DaysOff",
"query": {
"range": {
"DaysOff.Date": {
"from": "now-2M",
"to": "now"
}
}
}
}
}
}
},
"aggs": {
"emp_name": {
"terms": {
"field": "name",
"size": 10
},
"aggs": {
"nesting": {
"nested": {
"path": "DaysOff"
},
"aggs": {
"filter_date": {
"filter": {
"range": {
"DaysOff.Date": {
"from": "now-2M",
"to": "now"
}
}
},
"aggs": {
"sum_taken_off_days": {
"sum": {
"field": "DaysOff.Days"
}
}
}
}
}
}
}
}
},
"size": 0
}
This is the result I get,
"aggregations": {
"emp_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "messi",
"doc_count": 1,
"nesting": {
"doc_count": 3,
"filter_date": {
"doc_count": 2,
"sum_taken_off_days": {
"value": 1.5
}
}
}
},
{
"key": "ronaldo",
"doc_count": 1,
"nesting": {
"doc_count": 3,
"filter_date": {
"doc_count": 1,
"sum_taken_off_days": {
"value": 0.5
}
}
}
},
{
"key": "suarez",
"doc_count": 1,
"nesting": {
"doc_count": 3,
"filter_date": {
"doc_count": 3,
"sum_taken_off_days": {
"value": 7.5
}
}
}
}
]
}
}
P.S : This is per employee, you can remove emp_name terms aggregation to get sum of all employees.

Resources