elasticsearch composite aggs with nested object - elasticsearch

I have an object with nested field.
"parameters": {
"type": "nested",
"properties": {
"id": {
"type": "integer"
},
"values": {
"type": "keyword"
}
}
}
I am trying aggregate operation:
GET places/place/_search?size=0
{
"query": {
"match_all": {}
},
"aggs": {
"parameters": {
"nested": {
"path": "parameters"
},
"aggs": {
"parameters_cnt_i": {
"terms": {
"field": "parameters.id",
"size": 100
},
"aggs": {
"parameters_cnt_v": {
"terms": {
"field": "parameters.values",
"size": 100
}
}
}
}
}
}
}
}
but it is not good, because i set a "size" too large.
in docs says
If you want to retrieve all terms or all combinations of terms in a nested terms aggregation you should use the Composite aggregation
but i cant understand how to use a Composite aggregation with nested object.. its real?

my solution
{
"size": 0,
"aggs" : {
"parameters" : {
"nested" : {
"path" : "parameters"
},
"aggs": {
"group":{
"composite" : {
"size": 100, // your size
"sources" : [
{ "id": { "terms" : { "field": "parameters.id"} }}
]
}
}
}
}
}
}

Try dropping your 3rd "aggs", like this:
{
"aggs": {
"parameters": {
"nested": {
"path": "parameters"
},
"aggs": {
"count_item_one": {
"terms" : {
"field": "parameters.item_one",
"size": 100
}
},
"count_item_two": {
"terms" : {
"field": "parameters.item_two",
"size": 100
}
}
}
}
}
}
If you're 2nd item is nested again, you may have to set up your nested params again as you did with your 1st "aggs".

Related

Compose nested aggregations

Im sorry for any english misstake.
i hope that someone can help me.
Supose that i have the following mapping to my index:
PUT test-index
{
"mappings": {
"properties": {
"nestedOBJField": {
"type": "nested",
"index": true
},
"keywordField": {
"type": "keyword",
"index": true
}
}
}
}
It is possible to use the composite feature with nested fields?
It will be very handful if i can do something like this:
GET /test-index/_search
{
"size": 0,
"aggs": {
"TestAgg": {
"composite": {
"size": 10000,
"sources": [
{
"keyWordFieldAgg": {
"terms": {
"field": "keyWordField"
}
},
{
"nestedFieldAgg": {
"terms": {
"field": "nestedOBJField.attribute"
}
}
}
]
}
}
}
}
But this aproach is returning a several number of errors.
I will appreciate a lot if someone can help
Property nestedOBJField is of data type "nested" and property keyWordField is keyword type and at same level as nestedOBJField.
To use nested fields in aggregation , you need to use nested aggregation but then all sources in composite aggegation must be of type nested. This open issue can tell more about it.
You can use following work arounds.
Move keyWordField inside nested object in your documents.
{
"mappings": {
"properties": {
"nestedOBJField": {
"type": "nested",
"properties":{
"keywordField": {
"type": "keyword"
}
}
}
}
}
}
Sample Document
{
"nestedOBJField":[
{
"attribute":"1",
"age":1,
"keywordField":"xyz"
},
{
"attribute":"2",
"age":2,
"keywordField":"xyz"
}
]
}
Query
"aggs": {
"TestAgg": {
"nested": {
"path": "nestedOBJField"
},
"aggs": {
"name": {
"composite": {
"size": 10000,
"sources": [
{
"nestedFieldAgg": {
"terms": {
"field": "nestedOBJField.attribute.keyword"
}
}
},
{
"a":{
"terms": {
"field": "nestedOBJField.keywordField.keyword"
}
}
}
]
}
}
}
}
}
Moving your field inside nested property will mean data duplication , updating data in all nested documents.
Using terms aggregation - pagination will be an issue in this case
{
"size": 0,
"aggs": {
"TestAgg": {
"nested": {
"path": "nestedOBJField"
},
"aggs": {
"name": {
"terms": {
"field": "nestedOBJField.attribute.keyword",
"size": 10
},
"aggs": {
"back_to_parent": {
"reverse_nested": {},
"aggs": {
"keywords": {
"terms": {
"field": "keywordField.keyword",
"size": 10
}
}
}
}
}
}
}
}
}
}

Unable to create nested date aggregation query

I am trying to create an ElasticSearch aggregation query which can generate sum or average of value in all my ingested documents.
The documents are of the format -
{
"weather":"cold",
"date_1":"2017/07/05",
"feedback":[
{
"date_2":"2017/08/07",
"value":28,
"comment":"not cold"
},{
"date_2":"2017/08/09",
"value":48,
"comment":"a bit chilly"
},{
"date_2":"2017/09/07",
"value":18,
"comment":"very cold"
}, ...
]
}
I am able to create a sum aggregation of all "feedback.value" using "date_1" by using the following request -
GET _search
{
"query": {
"query_string": {
"query": "cold"
}
},
"size": 0,
"aggs": {
"temperature": {
"date_histogram":{
"field" : "date_1",
"interval" : "month"
},
"aggs":{
"temperature_agg":{
"terms": {
"field": "feedback.value"
}
}
}
}
}
}
However, I need to generate the same query across all documents aggregate based on "feedback.date_2". I am not sure if ElasticSearch can resolve such aggregation or how to approach it. Any guidance would be helpful
[EDIT]
Mapping file( I only define the nested items, ES identifes other fields on its own)
{
"mappings": {
"catalog_item": {
"properties": {
"feedback":{
"type":"nested",
"properties":{
"date_2":{
"type": "date",
"format":"YYYY-MM-DD"
},
"value": {
"type": "float"
},
"comment": {
"type": "text"
}
}
}
}
}
}
}
You would need to make use of nested documents and sum aggregation.
Here's a working example:
Sample Mapping:
PUT test
{
"mappings": {
"doc": {
"properties": {
"feedback": {
"type": "nested"
}
}
}
}
}
Add Sample document:
PUT test/doc/1
{
"date_1": "2017/08/07",
"feedback": [
{
"date_2": "2017/08/07",
"value": 28,
"comment": "not cold"
},
{
"date_2": "2017/08/09",
"value": 48,
"comment": "a bit chilly"
},
{
"date_2": "2017/09/07",
"value": 18,
"comment": "very cold"
}
]
}
Calculate both the sum and average based on date_2.
GET test/_search
{
"size": 0,
"aggs": {
"temperature_aggregation": {
"nested": {
"path": "feedback"
},
"aggs": {
"temperature": {
"date_histogram": {
"field": "feedback.date_2",
"interval": "month"
},
"aggs": {
"sum": {
"sum": {
"field": "feedback.value"
}
},
"avg": {
"avg": {
"field": "feedback.value"
}
}
}
}
}
}
}
}

Elasticsearch: Aggregation on filtered nested objects to find unique values

I have an array of objects (tags) in each document in Elasticsearch 5:
{
"tags": [
{ "key": "tag1", "value": "val1" },
{ "key": "tag2", "value": "val2" },
...
]
}
Now I want to find unique tag values for a certain tag key. Something similiar to this SQL query:
SELECT DISTINCT(tags.value) FROM tags WHERE tags.key='some-key'
I have came to this DSL so far:
{
"size": 0,
"aggs": {
"my_tags": {
"nested": {
"path": "tags"
},
"aggs": {
"filter" : { "terms": { "tags.key": "tag1" } },
"aggs": {
"my_tags_values": {
"terms" : {
"field" : "tags.value",
"size": 9999
}
}
}
}
}
}
}
But It is showing me this error:
[terms] unknown field [tags.key], parser not found.
Is this the right approach to solve the problem? Thanks for your help.
Note: I have declared the tags field as a nested field in my mapping.
You mixed up things there. You wanted probably to add a filter aggregation, but you didn't give it any name:
{
"size": 0,
"aggs": {
"my_tags": {
"nested": {
"path": "tags"
},
"aggs": {
"my_filter": {
"filter": {
"terms": {
"tags.key": [
"tag1"
]
}
},
"aggs": {
"my_tags_values": {
"terms": {
"field": "tags.value",
"size": 9999
}
}
}
}
}
}
}
}
Try Bool Query inside the Filter-Aggregation:
{
"size": 0,
"aggs": {
"my_tags": {
"nested": {
"path": "tags"
},
"aggs": {
"filter": {
"bool": {
"must": [
{
"term": {
"tags.key": "tag1"
}
}
]
},
"aggs": {
"my_tags_values": {
"terms": {
"field": "tags.value",
"size": 0
}
}
}
}
}
}
}
}
BTW: if you want to retrieve all buckets, you can write 0 instead of 9999 in aggregation size.

Is it possible to perform elasticsearch nested stats aggregation on a field defined by the parent aggregation?

I'm trying to do a query to generate a plot. My data index looks like this:
"mappings": {
"mydata": {
"properties": {
"type": { "type": "string", "index": "not_analyzed" },
"stamp": { "type": "date", "format": "date_hour_minute_second_millis" },
"data": { "type": "object" }
}
}
Depending on the type, the data field will contain different objects, e.g.,
temperature_data = {
"type": "temperature",
"stamp": "2015-11-01T15:25:19.123",
"data": {"temperature": 23.4, "variance": 0.0}
}
humidity_data = {
"type": "humidity",
"stamp": "2015-11-01T15:26:21.063",
"data": {"humidity": 75.1, "variance": 0.0}
}
I'm trying to aggregate the data on buckets depending on their type, and then perform a date histogram to get the stats of each reading (temperature, humidity). My problem is how to set the field on the stats aggs since it changes with the type (for "type": "temperature" the field is data.temperature for example):
query = {
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{"range" : {
"stamp" : {
"gt" : start_stamp,
"lt" : end_stamp
}
}}
]
}
}
}
},
"aggs": {
"pathes": {
"terms": {
"field": "type"
},
"aggs": {
"points": {
"date_histogram": {
"field": "stamp",
"interval": interval
},
"aggs": {
"point_stats": {
"stats": {
"field": "data."+field???
}
}
}
}
}
}
}
}
* UPDATE *
As suggested I added a data-type.groovy file to config/scripts/, the file contains the following:
return doc['data.temperature'].value
Elasticsearch is able to compile the script:
[2015-11-02 19:50:32,651][INFO ][script] [Atum] compiling script file [/home/user/elasticsearch-1.7.0/config/scripts/data-type.groovy]
I updated the query to load the script file:
query = {
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{"range" : {
"stamp" : {
"gt" : start_stamp,
"lt" : end_stamp
}
}}
]
}
}
}
},
"aggs": {
"pathes": {
"terms": {
"field": "type"
},
"aggs": {
"points": {
"date_histogram": {
"field": "stamp",
"interval": interval
},
"aggs": {
"point_stats": {
"stats": {
"script": {"file": "data-type"}
}
}
}
}
}
}
}
}
When I run the query I get the following output:
{u'status': 400, u'error': u'SearchPhaseExecutionException[Failed to execute phase [query], ... Parse Failure [Unexpected token START_OBJECT in [point_stats].]]; }]'}
There's only temperature data in the database, if I change "script": {"file": "data-type"} for "field": "data.temperature" the query works.
One option is to rename the humidity and temperature fields to something identical, like value, so you can simply aggregate on that field and you're good. You'd already know what kind of value it is since you know it from the type field.
"aggs": {
"pathes": {
"terms": {
"field": "type"
},
"aggs": {
"points": {
"date_histogram": {
"field": "stamp",
"interval": interval
},
"aggs": {
"point_stats": {
"stats": {
"field": "data.value"
}
}
}
}
}
}
}
The second option is to use a script but that'd be less performant and less scalable if you were to add more type of data (pressure, etc)
"aggs": {
"pathes": {
"terms": {
"field": "type"
},
"aggs": {
"points": {
"date_histogram": {
"field": "stamp",
"interval": interval
},
"aggs": {
"point_stats": {
"stats": {
"script": "return doc.type.value == 'temperature' ? doc['data.temperature'].value : doc['data.humidity'].value"
}
}
}
}
}
}
}
Note that for this second option you need to enable dynamic scripting

Nested Aggregation Elasticsearch

I'm trying to build a nested aggregation in elasticsearch but it keeps giving errors. It says "cannot find agg type tags". How can I fix it. Thank you for your helps.Btw I don't have nested documents I have one document having 180 fields. Can I apply this aggregation? Here is my code:
{
"aggs": {
"comments": {
"nested": {
"path": "comments"
},
"aggs" : {
"red_products": {
"filter": {
"not": {
"terms": {
"text": [
"06melihgokcek",
"t.co","??","????","???"
]
}
}
},
"aggs": {
"top_docs": {
"terms": {
"field": "text",
"size": 50
}
},
"aggs" : {
"tags" : {
"terms" : {
"field" : "text",
"include" : ".*avni.*",
"exclude" : "fuat_.*"
}
}
}
}
}
}
}}}
Your innermost aggs (the one called tags at the bottom) is misplaced and should be a child element of top_docs.
{
"aggs": {
"comments": {
"nested": {
"path": "comments"
},
"aggs": {
"red_products": {
"filter": {
"not": {
"terms": {
"text": [
"06melihgokcek",
"t.co",
"??",
"????",
"???"
]
}
}
},
"aggs": {
"top_docs": {
"terms": {
"field": "text",
"size": 50
},
"aggs": { <---- this was the misplaced aggs
"tags": {
"terms": {
"field": "text",
"include": ".*avni.*",
"exclude": "fuat_.*"
}
}
}
}
}
}
}
}
}
}

Resources