elasticsearch 5 nested term aggregation does not work - elasticsearch

I have a nested type mapping in my index:
"actors": {
"type": "nested",
"properties": {
"actor": {
"type": "nested",
"properties": {
"actor_full_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
When i see the datas it looks to be ok
The request
GET /test_index/film/_search
{
"size": 100,
"_source": "actors.actor.actor_full_name"
}
Give me this answer:
"actors": {
"actor": [
{
"actor_full_name": "Antonio BANDERAS"
},
{
"actor_full_name": "Diane VENORA"
},
{
"actor_full_name": "Omar SHARIF"
},
{
"actor_full_name": "Vladimir KULICH"
}
]
},
...
I am trying to do a nested aggregation request on actor_full_name field
I am trying this request:
POST /test_index/film/_search
{
"size": 0,
"aggs": {
"actor_nested_agg_code": {
"nested": {
"path": "actors"
},
"aggs": {
"code_actor_agg": {
"terms": {
"field": "actor.actor_full_name.keyword",
"size": 100
}
}
}
}
}
}
Unfortunately it appears to give me an incorect aswere :
"aggregations": {
"actor_nested_agg_code": {
"doc_count": 1807,
"code_actor_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": []
}
}
}
Do you see what i did wrong and how i could fix it please?

You either didn't mean to make actors nested as well, or you overlooked that you have two nested fields in there:
{
"size": 0,
"aggs": {
"actor_nested_agg_code": {
"nested": {
"path": "actors"
},
"aggs": {
"second_nested_actor": {
"nested": {
"path": "actors.actor"
},
"aggs": {
"code_actor_agg": {
"terms": {
"field": "actors.actor.actor_full_name.keyword",
"size": 100
}
}
}
}
}
}
}
}

Related

ElasticSearch - How can I do nested field aggregation with field aliases?

I'm trying to query a nested field's inner hits for cardinality, however it's not working for field aliases (where resellers.price is an alias). I'm using an elastic search example to show this
GET /products/_search
{
"aggs": {
"resellers": {
"nested": {
"path": "resellers"
},
"aggs": {
"unique_prices": {
"cardinality": { "field": "resellers.price" }
}
}
}
}
}
Adding a working example with index data, mapping, search query and search result
Index Mapping:
{
"mappings": {
"properties": {
"resellers": {
"type": "nested",
"properties": {
"cost": {
"type": "integer"
},
"price": {
"type": "alias",
"path": "resellers.cost"
}
}
}
}
}
}
Index Data:
{
"resellers": {
"cost": 200
}
}
{
"resellers": {
"cost": 100
}
}
{
"resellers": {
"cost": 200
}
}
Search Query:
{
"size": 0,
"aggs": {
"resellers": {
"nested": {
"path": "resellers"
},
"aggs": {
"unique_prices": {
"cardinality": {
"field": "resellers.price"
}
}
}
}
}
}
Search Result:
"aggregations": {
"resellers": {
"doc_count": 3,
"unique_prices": {
"value": 2
}
}
}

Nested aggregation in nested field?

I am new to elasticsearch and don't know a lot about aggregations but I have this ES6 mapping:
{
"mappings": {
"test": {
"properties": {
"id": {
"type": "integer"
}
"countries": {
"type": "nested",
"properties": {
"global_id": {
"type": "keyword"
},
"name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
}
}
},
"areas": {
"type": "nested",
"properties": {
"global_id": {
"type": "keyword"
},
"name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"parent_global_id": {
"type": "keyword"
}
}
}
}
}
}
}
How can I get all documents grouped by areas which is then grouped by countries. Also the document has to be returned in full, not just the nested document. Is this even possible ?
1) Aggregation _search query:
first agg by area, with the path as this is nested. Then reverse to the root document and nested agg to country.
{
"size": 0,
"aggs": {
"agg_areas": {
"nested": {
"path": "areas"
},
"aggs": {
"areas_name": {
"terms": {
"field": "areas.name"
},
"aggs": {
"agg_reverse": {
"reverse_nested": {},
"aggs": {
"agg_countries": {
"nested": {
"path": "countries"
},
"aggs": {
"countries_name": {
"terms": {
"field": "countries.name"
}
}
}
}
}
}
}
}
}
}
}
}
2) retrieve documents:
add a tophits inside your aggregation:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-top-hits-aggregation.html
top_hits is slow so you will have to read documentation and adjust size and sort to your context.
...
"terms": {
"field": "areas.name"
},
"aggregations": {
"hits": {
"top_hits": { "size": 100}
}
},
...

Nested object aggregation term with mixed nested/non-nested filter

We have facets showing the number of results that will show when clicking the filters (and combining them). Something like this:
Before we introduced nested objects, the following would do the job:
GET /x_v1/_search/
{
"size": 0,
"aggs": {
"FilteredDescriptiveFeatures": {
"filter": {
"bool": {
"must": [
{
"terms": {
"breadcrumbs.categoryIds": [
"category"
]
}
},
{
"terms": {
"products.sterile": [
"0"
]
}
}
]
}
},
"aggs": {
"DescriptiveFeatures": {
"terms": {
"field": "products.descriptiveFeatures",
"size": 1000
}
}
}
}
}
}
This gives the result:
"aggregations": {
"FilteredDescriptiveFeatures": {
"doc_count": 280,
"DescriptiveFeatures": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "somekey",
"doc_count": 42
},
We needed to make products a nested object though, and I'm currently trying rewrite the above to work with this change.
My attempt looks like the following. It doesn't give the correct result though, and doesn't seem properly connected to the filter.
GET /x_v2/_search/
{
"size": 0,
"aggs": {
"FilteredDescriptiveFeatures": {
"filter": {
"bool": {
"must": [
{
"terms": {
"breadcrumbs.categoryIds": [
"category"
]
}
},
{
"nested": {
"path": "products",
"query": {
"terms": {
"products.sterile": [
"0"
]
}
}
}
}
]
}
},
"aggs": {
"nested": {
"nested": {
"path": "products"
},
"aggregations": {
"DescriptiveFeatures": {
"terms": {
"field": "products.descriptiveFeatures",
"size": 1000
}
}
}
}
}
}
}
}
This gives the result:
"aggregations": {
"FilteredDescriptiveFeatures": {
"doc_count": 280,
"nested": {
"doc_count": 1437,
"DescriptiveFeatures": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "somekey",
"doc_count": 164
},
I've also tried to put the nested definition higher up to contain both the filter and aggs, but then the filter term breadcrumbs.categoryId, which is not in the nested object, won't work.
Is what I'm trying to do even possible?
And how can it be solved?
In your FilteredDescriptiveFeatures step, you return all documents that have one product with sterile = 0
But after in the nested step you dont specify again this filter. So all nested products are return in this step, thus you make your terms aggregations on all products, not only products with sterile = 0
You should move your sterile filter in the nested step. And like Richa points out, you need to use a reverse_nested aggregation in the final step to count elasticsearch document and not nested products sub-documents.
Could you try this query ?
{
"size": 0,
"aggs": {
"filteredCategory": {
"filter": {
"terms": {
"breadcrumbs.categoryIds": [
"category"
]
}
},
"aggs": {
"nestedProducts": {
"nested": {
"path": "products"
},
"aggs": {
"filteredByProductsAttributes": {
"filter": {
"terms": {
"products.sterile": [
"0"
]
}
},
"aggs": {
"DescriptiveFeatures": {
"terms": {
"field": "products.descriptiveFeatures",
"size": 1000
},
"aggs": {
"productCount": {
"reverse_nested": {}
}
}
}
}
}
}
}
}
}
}
}
What I under stand from the description is that you want to filter your results on the basis of some Nested and Non Nested Fields and then apply aggregations on the Nested Field. I created a sample Index and data with some Nested and Non Nested Fields and created a query
Mapping
PUT stack-557722203
{
"mappings": {
"_doc": {
"properties": {
"category": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"user": {
"type": "nested", // NESTED FIELD
"properties": {
"fName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"lName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"type": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
}
}
}
}
Sample Data
POST _bulk
{"index":{"_index":"stack-557722203","_id":"1","_type":"_doc"}}
{"category":"X","user":[{"fName":"A","lName":"B","type":"X"},{"fName":"A","lName":"C","type":"X"},{"fName":"P","lName":"B","type":"Y"}]}
{"index":{"_index":"stack-557722203","_id":"2","_type":"_doc"}}
{"category":"X","user":[{"fName":"P","lName":"C","type":"Z"}]}
{"index":{"_index":"stack-557722203","_id":"3","_type":"_doc"}}
{"category":"X","user":[{"fName":"A","lName":"C","type":"Y"}]}
{"index":{"_index":"stack-557722203","_id":"4","_type":"_doc"}}
{"category":"Y","user":[{"fName":"A","lName":"C","type":"Y"}]}
Query
GET stack-557722203/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"nested": {
"path": "user",
"query": {
"term": {
"user.fName.keyword": {
"value": "A"
}
}
}
}
},
{
"term": {
"category.keyword": {
"value": "X"
}
}
}
]
}
},
"aggs": {
"group BylName": {
"nested": {
"path": "user"
},
"aggs": {
"group By lName": {
"terms": {
"field": "user.lName.keyword",
"size": 10
},
"aggs": {
"reverse Nested": {
"reverse_nested": {} // NOTE THIS
}
}
}
}
}
}
}
Output
{
"took": 18,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"group BylName": {
"doc_count": 4,
"group By lName": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "B",
"doc_count": 2,
"reverse Nested": {
"doc_count": 1
}
},
{
"key": "C",
"doc_count": 2,
"reverse Nested": {
"doc_count": 2
}
}
]
}
}
}
}
As per the discrepancy in data where you are getting, more documents in doc_count when you changed the mapping to Nested is because of the way Nested and Object(NonNested) documents are stored. See here to understand how are they internally stored. In order to connect them back to the root Document , you can use Reverse Nested aggregation and then you will have the same result.
Hope this helps!!

Elasticsearch aggregation on nested objects

I have an document with the following mappings:
{
"some_doc_name": {
"mappings": {
"_doc": {
"properties": {
"stages": {
"properties": {
"name": {
"type": "text"
},
"durationMillis": {
"type": "long"
}
}
}
}
}
}
}
}
And I would like to have an aggregation like: "The average duration of the stages which name contains the SCM token"
I tried something like:
{
"aggs": {
"scm_stage": {
"filter": {
"bool": {
"should": [{
"match_phrase": {
"stages.name": "SCM"
}
}]
}
},
"aggs" : {
"avg_duration": {
"avg": {
"field": "stages.durationMillis"
}
}
}
}
}
}
But that's giving me the average of all stages for all documents that contain at least one stage with the SCM token. Any advice on how to get this aggregation right?
Answering my own question thanks to the help of val
My mappings file was missing the "type": "nested", something like:
...
"stages": {
"type": "nested",
"properties": {
"id": {
"type": "keyword",
"ignore_above": 256
},
...
Then I can get my aggregation working with something like this:
{
"size": 0,
"query": {
"nested": {
"path": "stages",
"query": {
"match": {
"stages.name": "scm"
}
}
}
},
"aggs": {
"stages": {
"nested": {
"path": "stages"
},
"aggs": {
"stages-filter": {
"filter": {
"terms": {
"stages.name": [
"scm"
]
}
},
"aggs": {
"avg_duration": {
"avg": {
"field": "stages.durationMillis"
}
}
}
}
}
}
}
}

ElasticSearch - How to aggregation access log ignore GET parameter?

I want to aggregate access by function path.
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"path.keyword": "/hex/*"
}
}
]
}
},
"from": 0,
"size": 0,
"aggs": {
"path": {
"terms": {
"field": "path.keyword"
}
}
}
}
And i get the result like these..
{
"key": "/hex/user/admin_user/auth",
"doc_count": 38
},
{
"key": "/hex/report/chart/fastreport_lobby_all?start_date=2017-06-29&end_date=2017-07-05&category=date_range&value[]=payoff",
"doc_count": 35
},
{
"key": "/hex/report/chart/fastreport_lobby_all?start_date=2017-06-29&end_date=2017-07-05&category=lobby&value[]=payoff",
"doc_count": 35
},
{
"key": "/hex/report/chart/online_membership?start_date=2017-06-29&end_date=2017-07-05&category=datetime_range&value[]=user_total",
"doc_count": 34
}
There are two /hex/report/chart/fastreport_lobby_all?balabala... result.
It's not the real count about this function.
Do i have any method to count these as one?
{
"key": "/hex/report/chart/fastreport_lobby_all",
"doc_count": 70
}
I don't think this is possible without a custom analyzer like
PUT your_index
{
"settings": {
"analysis": {
"analyzer": {
"query_analyzer": {
"type": "custom",
"tokenizer": "split_query",
"filter": ["top1"
]
}
},
"filter":{
"top1":{
"type": "limit",
"max_token_count": 1
}
},
"tokenizer":{
"split_query":{
"type": "pattern",
"pattern": "\\?"
}
}
}
},
"mappings": {
"your_log_type": {
"properties": {
"path": {
"type": "text",
"fields": {
"keyword": {
"type":"keyword"
},
"no_query": {
"type":"string",
"fielddata":true,
"analyzer":"query_analyzer"
}
}
}
}
}
}
}
And then query on
POST test/log_type/_search
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"path.keyword": "/hex/*"
}
}
]
}
},
"from": 0,
"size": 0,
"aggs" : {
"genres" : {
"terms" : { "field" : "path.no_query" }
}
}
}

Resources