ElasticSearch Aggregation on nested field with bucketing on parent id - elasticsearch

Following is my doc structure
'Order': {
u'properties': {
u'order_id': {u'type': u'integer'},
'Product': {
u'properties': {
u'product_id': {u'type': u'integer'},
u'product_category': {'type': 'text'},
},
u'type': u'nested'
}
}
}
Doc1
"Order": {
"order_id": "1",
"Product": [
{
"product_id": "1",
"product_category": "category_1"
},
{
"product_id": "2",
"product_category": "category_2"
},
{
"product_id": "3",
"product_category": "category_2"
},
]
}
Doc2
"Order": {
"order_id": "2",
"Product": [
{
"product_id": "4",
"product_category": "category_1"
},
{
"product_id": "1",
"product_category": "category_1"
},
{
"product_id": "2",
"product_category": "category_2"
},
]
}
I want to get following output
"aggregations": {
"Order": [
{
"order_id": "1"
"category_counts": [
{
"category_1": 1
},
{
"category_2": 2
},
]
},
{
"order_id": "1"
"category_counts": [
{
"category_1": 2
},
{
"category_2": 1
},
]
},
]
}
I tried using nested aggregation
"aggs": {
"Product-nested": {
"nested": {
"path": "Product"
}
"aggs": {
"category_counts": {
"terms": {
"field": "Product.product_category"
}
}
},
}
}
It does not give output for each order but gives combined output for all orders
{
"Product-nested": {
"category_counts": [
"category_1": 3,
"category_2": 3
]
}
}
I have two questions:
How to get the desired output in above scenario?
What if instead of single product_category I have an array of
product_categories then how will we achieve the same in this
scenario?
I am using elasticsearch >= 5.0

I have an idea but i dont think its the best one..
you can make a terms aggregation on the "order_id" field, then a sub nestes aggregation on "Product.product_category".
somthing like this :
{
"aggs": {
"all-order-id": {
"terms": {
"field": "order_id",
"size": 10
},
"aggs": {
"Product-nested": {
"nested": {
"path": "Product"
},
"aggs": {
"all-products-in-order-id": {
"terms": {
"field": "Product.product_category"
}
}
}
}
}
}
}
}
sorry its lock bit messy i'm not so good with this answer editor

Related

Reverse_nested aggregation + top hits : get parent and nested data at the same time

Do you know how to use reverse_nested aggregation to get both the parent and ONLY the nested data inside my top hit aggregations ?
The 'ONLY' part is the problem right now.
This is my mapping :
{
"ticket": {
"mappings": {
"properties": {
"name": {
"type": "keyword"
}
},
"tasks": {
"type": "nested",
"properties": {
"string_task_name": {
"type": "keyword"
}
}
}
}
}
}
My query uses top hits and reverse nested aggs.
{
"aggs": {
"object_tasks": {
"nested": {
"path": "object_tasks"
},
"aggs": {
"filter_by_tasks_attribute": {
"filter": {
"bool": {
"must": [
{
"wildcard": {
"object_tasks.string_task_name.keyword": "*"
}
}
]
}
},
"aggs": {
"using_reverse_nested": {
"reverse_nested": {
"path": "object_tasks"
},
"aggs": {
"names": {
"top_hits": {
"_source": {
"includes": [
"object_tasks.string_task_name",
"string_name"
]
},
"sort": [
{
"object_tasks.string_task_name.keyword": {
"order": "desc"
}
}
],
"from": 0,
"size": 10
}
}
}
}
}
}
}
}
}
}
{
"hits": {
"total": {
"value": 25,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "random_index",
"_type": "_doc",
"_id": "5",
"_score": null,
"_source": {
"object_tasks": [ ================> I don't want all these tasks names, I just want the task name of the current nested object I am in.
{
"string_task_name": "task1"
},
{
"string_task_name": "task2"
},
{
"string_task_name": "task3"
},
{
"string_task_name": "task4"
}
],
"string_name": "Dummy Ticket 854"
},
"sort": [
"seek_a_sme"
]
}
]
}
}
As you can see the result is giving me 4 tasks name. What I want is to return only 1 task name.
The only workaround I have found is to copy the data of tickets inside the tasks. But if I can avoid it that would be awesome.
I don't want all these tasks names, I just want the task name of the current nested object I am in.
The statement "of the current nested object I'm in" implies that you are inside of a nested context but you cannot be in one when you escape it through reverse_nested…
I'm not sure if I truly understood what you're gunning for here but you could aggregate on the terms of object_tasks.string_task_name.keyword and the keys of this aggregation would then function as the individual "current nested objects" that you're after:
{
"size": 0,
"aggs": {
"object_tasks": {
"nested": {
"path": "object_tasks"
},
"aggs": {
"filter_by_tasks_attribute": {
"filter": {
"bool": {
"must": [
{
"wildcard": {
"object_tasks.string_task_name.keyword": "*"
}
}
]
}
},
"aggs": {
"by_string_task_name": {
"terms": {
"field": "object_tasks.string_task_name.keyword",
"order": {
"_key": "desc"
},
"size": 10
},
"aggs": {
"using_reverse_nested": {
"reverse_nested": {},
"aggs": {
"names": {
"top_hits": {
"_source": {
"includes": [
"string_name"
]
},
"from": 0,
"size": 10
}
}
}
}
}
}
}
}
}
}
}
}
yielding
"aggregations" : {
"object_tasks" : {
...
"filter_by_tasks_attribute" : {
...
"by_string_task_name" : {
...
"buckets" : [
{
"key" : "task4", <--
...
"using_reverse_nested" : {
...
"names" : {
"hits" : {
...
"hits" : [
{
...
"_source" : {
"string_name" : "Dummy Ticket 854" <--
}
}
]
}
}
}
},
{
"key" : "task3", <--
...
},
{
"key" : "task2", <--
...
},
{
"key" : "task1", <--
...
}
}
]
}
}
}
}
Notice that the top_hits aggregation doesn't need to be sorted anymore -- object_tasks.string_task_name.keyword will always be the same for any currently aggregated terms bucket. What I did instead was order this terms aggregation by _key which works the same way as a top_hits sort would have. BTW -- yours was missing the nested path parameter.

elasticsearch Saved Search with Group by

index_name: my_data-2020-12-01
ticket_number: T123
ticket_status: OPEN
ticket_updated_time: 2020-12-01 12:22:12
index_name: my_data-2020-12-01
ticket_number: T124
ticket_status: OPEN
ticket_updated_time: 2020-12-01 12:32:11
index_name: my_data-2020-12-02
ticket_number: T123
ticket_status: INPROGRESS
ticket_updated_time: 2020-12-02 12:33:12
index_name: my_data-2020-12-02
ticket_number: T125
ticket_status: OPEN
ticket_updated_time: 2020-12-02 14:11:45
I want to create a saved search with group by ticket_number field get unique doc with latest ticket status (ticket_status). Is it possible?
You can simply query again, I am assuming you are using Kibana for visualization purpose. in your query, you need to filter based on the ticket_number and sort based on ticket_updated_time.
Working example
Index mapping
{
"mappings": {
"properties": {
"ticket_updated_time": {
"type": "date"
},
"ticket_number" :{
"type" : "text"
},
"ticket_status" : {
"type" : "text"
}
}
}
}
Index sample docs
{
"ticket_number": "T123",
"ticket_status": "OPEN",
"ticket_updated_time": "2020-12-01T12:22:12"
}
{
"ticket_number": "T123",
"ticket_status": "INPROGRESS",
"ticket_updated_time": "2020-12-02T12:33:12"
}
Now as you can see, both the sample documents belong to the same ticket_number with different status and updated time.
Search query
{
"size" : 1, // fetch only the latest status document, if you remove this, will get other ticket with different status.
"query": {
"bool": {
"filter": [
{
"match": {
"ticket_number": "T123"
}
}
]
}
},
"sort": [
{
"ticket_updated_time": {
"order": "desc"
}
}
]
}
And search result
"hits": [
{
"_index": "65180491",
"_type": "_doc",
"_id": "2",
"_score": null,
"_source": {
"ticket_number": "T123",
"ticket_status": "INPROGRESS",
"ticket_updated_time": "2020-12-02T12:33:12"
},
"sort": [
1606912392000
]
}
]
If you need to group by ticket_number field, then you can use aggregation as well
Index Mapping:
{
"mappings": {
"properties": {
"ticket_updated_time": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
Search Query:
{
"size": 0,
"aggs": {
"unique_id": {
"terms": {
"field": "ticket_number.keyword",
"order": {
"latestOrder": "desc"
}
},
"aggs": {
"latestOrder": {
"max": {
"field": "ticket_updated_time"
}
}
}
}
}
}
Search Result:
"buckets": [
{
"key": "T125",
"doc_count": 1,
"latestOrder": {
"value": 1.606918305E12,
"value_as_string": "2020-12-02 14:11:45"
}
},
{
"key": "T123",
"doc_count": 2,
"latestOrder": {
"value": 1.606912392E12,
"value_as_string": "2020-12-02 12:33:12"
}
},
{
"key": "T124",
"doc_count": 1,
"latestOrder": {
"value": 1.606825931E12,
"value_as_string": "2020-12-01 12:32:11"
}
}
]

ElasticSearch simple query

I have structure like this in my ElasticSearch
{
_index: 'index',
_type: 'product',
_id: '896',
_score: 0,
_source: {
entity_id: '896',
category: [
{
category_id: 2,
is_virtual: 'false'
},
{
category_id: 82,
is_virtual: 'false'
}
]
}
}
I want return all "producs" that have "82" category_id.
{
"query": {
"bool": {
"filter": {
"terms": {
"category.category_id": [
82
]
}
}
}
}
}
This query gives me 0 hits.
What is right way to do this?
Adding working example, you need to define the category as nested field and modify your search query by including the nested path
Index Mapping
{
"mappings": {
"properties": {
"entity_id": {
"type": "text"
},
"category": {
"type": "nested"
}
}
}
}
Index your document
{
"entity_id": "896",
"category": [
{
"category_id": 2,
"is_virtual": false
},
{
"category_id": 82,
"is_virtual": false
}
]
}
Proper search query, note we are using nested query which doesn't support normal filter(so your query gives error)
{
"query": {
"nested": {
"path": "category",
"query": {
"bool": {
"must": [
{
"match": {
"category.category_id": 82
}
}
]
}
}
}
}
}
Search result retuns indexed doc
"hits": [
{
"_index": "complexnested",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"entity_id": "896",
"category": [
{
"category_id": 2,
"is_virtual": false
},
{
"category_id": 82,
"is_virtual": false
}
]
}
}
]
If your query gives you no results, I suspect that category is of type nested in your index mapping. If that's the case, that's good and you can modify your query like this to use the nested query:
{
"query": {
"bool": {
"filter": {
"nested": {
"path": "category",
"query": {
"terms": {
"category.category_id": [
82
]
}
}
}
}
}
}
}

Elasticsearch aggregations for faceted search excluding some fields

I have shop which use elasticsearch 2.4 for faceted search.
But at the moment the existing filters (product attributes) are taken from mysql. I want to do this using elasticsearch aggregations.
But I got the problem: I do not need to aggregate all the attributes.
What a have:
Part of Mapping:
...
'is_active' => [
'type' => 'long',
'index' => 'not_analyzed',
],
'category_id' => [
'type' => 'long',
'index' => 'not_analyzed',
],
'attrs' => [
'properties' => [
'attr_name' => ['type' => 'string', 'index' => 'not_analyzed'],
'value' => [
'type' => 'string',
'index' => 'analyzed',
'analyzer' => 'attrs_analizer',
],
]
],
...
Exemple of data:
{
"id": 1,
"is_active": "1",
"category_id": 189,
...
"price": "48.00",
"attrs": [
{
"attr_name": "Brand",
"value": "TP-Link"
},
{
"attr_name": "Model",
"value": "TL-1"
},
{
"attr_name": "Other",
"value": "<div>Some text of 'Other' property<br><img src......><ul><li>......</ul></div>"
}
]
},
{
"id": 2,
"is_active": "1",
"category_id": 242,
...
"price": "12.00",
"attrs": [
{
"attr_name": "Brand",
"value": "Lenovo"
},
{
"attr_name": "Model",
"value": "B570"
},
{
"attr_name": "OS",
"value": "Linux"
},
{
"attr_name": "Other",
"value": "<div>Some text of 'Other' property<br><img src......><ul><li>......</ul></div>"
}
]
},
{
"id": 3,
"is_active": "1",
"category_id": 242,
...
"price": "24.00",
"attrs": [
{
"attr_name": "Brand",
"value": "Asus"
},
{
"attr_name": "Model",
"value": "QZ85"
},
{
"attr_name": "OS",
"value": "Windows"
},
{
"attr_name": "Other",
"value": "<div>Some text of 'Other' property<br><img src......><ul><li>......</ul></div>"
}
]
}
Attributes such as "Model" and "Other" are not used when filtering products, they are only displayed on the product page. On the other attributes (Brand, OS, and others ...) I want to receive aggregations.
When I try to aggregate the attrs.value field, of course I get aggregations for all data (including the large "Other" fields, in which there can be a lot of HTML).
"aggs": {
"facet_value": {
"terms": {
"field": "attrs.value",
"size": 0
}
}
}
How to exclude "attrs.attr_name": ["Model", "Other"]?
Change the mapping is a bad solution for me, but if it is inevitable, tell me how to do it? I guess I'll need to make "attrs" nested?
UPD:
I want to receive:
1. All the attributes that the products have in a certain category, except for those that I indicate in the settings of the my system (in this example I will exclude "Model" and "Other").
2. Number of products near each value.
It should look like this:
For category "Laptops":
Brand:
Lenovo (18)
Asus (19)
.....
OS:
Windows (19)
Linux (5)
...
For "computer monitors":
Brand:
Samsung (18)
LG (19)
.....
Resolution:
1360x768 (19)
1920x1080 (22)
....
It's Terms Aggregation , I use this for the number of products for each category. And I try it for attrs.value, but I do not know how to exclude "attrs.value", which refer to "attrs.attr_name": "Model" & "attrs.attr_name": "Other".
UPD2:
In my case if map attrs as nested type, the weight of the index increases by 30%.
from 2700Mi to 3510Mi.
If there is no other option, I'll have to put up with it.
you have to map first attrs as nested type and use nested aggregations.
PUT no_play
{
"mappings": {
"document_type" : {
"properties": {
"is_active" : {
"type": "long"
},
"category_id" : {
"type": "long"
},
"attrs" : {
"type": "nested",
"properties": {
"attr_name" : {
"type" : "keyword"
},
"value" : {
"type" : "keyword"
}
}
}
}
}
}
}
POST no_play/document_type
{
"id": 3,
"is_active": "1",
"category_id": 242,
"price": "24.00",
"attrs": [
{
"attr_name": "Brand",
"value": "Asus"
},
{
"attr_name": "Model",
"value": "QZ85"
},
{
"attr_name": "OS",
"value": "<div>Some text of 'Other' property<br><img src......><ul><li>......</ul></div>"
},
{
"attr_name": "Other",
"value": "<div>Some text of 'Other' property<br><img src......><ul><li>......</ul></div>"
}
]
}
Since you didn't mention how you want to aggregate.
Case 1) If you want to count the attrs as individual. This metric gives you count of term occurrences.
POST no_play/_search
{
"size": 0,
"aggs": {
"nested_aggregation_value": {
"nested": {
"path": "attrs"
},
"aggs": {
"value_term": {
"terms": {
"field": "attrs.value",
"size": 10
}
}
}
}
}
}
POST no_play/_search
{
"size": 0,
"aggs": {
"nested_aggregation_value": {
"nested": {
"path": "attrs"
},
"aggs": {
"value_term": {
"terms": {
"field": "attrs.value",
"size": 10
},
"aggs": {
"reverse_back_to_roots": {
"reverse_nested": {
}
}
}
}
}
}
}
}
Now to get count of root document with attrs value you will need to hook a reverse nested aggregation to move the aggregator a level up to the level of root document.
Think of the following document.
{
"id": 3,
"is_active": "1",
"category_id": 242,
"price": "24.00",
"attrs": [
{
"attr_name": "Brand",
"value": "Asus"
},
{
"attr_name": "Model",
"value": "QZ85"
},
{
"attr_name": "OS",
"value": "repeated value"
},
{
"attr_name": "Other",
"value": "repeated value"
}
]
}
For first query the value count for 'repeated value' will be 2 and for second query it will be 1
Note
here is how you can do filtering to exclude
POST no_play/_search
{
"size": 0,
"aggs": {
"nested_aggregation_value": {
"nested": {
"path": "attrs"
},
"aggs": {
"filtered_results": {
"filter": {
"bool": {
"must_not": [{
"terms": {
"attrs.attr_name": ["Model", "Brand"]
}
}]
}
},
"aggs": {
"value_term": {
"terms": {
"field": "attrs.value",
"size": 10
}
}
}
}
}
}
}
}
POST no_play/_search
{
"size": 0,
"aggs": {
"nested_aggregation_value": {
"nested": {
"path": "attrs"
},
"aggs": {
"filtered_results": {
"filter": {
"bool": {
"must_not": [{
"terms": {
"attrs.attr_name": ["Model", "Brand"]
}
}]
}
},
"aggs": {
"value_term": {
"terms": {
"field": "attrs.value",
"size": 10
},
"aggs": {
"reverse_back_to_roots": {
"reverse_nested": {}
}
}
}
}
}
}
}
}
}
Thanks

Elastic search find sum of two fields in single query

I have a requirement of find sum of two fields in a single query. I have managed to find the sum of one field, but facing difficulty to add two aggression in a single query.
My json look like the following way
{
"_index": "outboxprov1",
"_type": "message",
"_id": "JXpDpNefSkKO-Hij3T9m4w",
"_score": 1,
"_source": {
"team_id": "1fa86701af05a863f59dd0f4b6546b32",
"created_user": "1a9d05586a8dc3f29b4c8147997391f9",
"created_ip": "192.168.2.245",
"folder": 1,
"post_count": 5,
"sent": 3,
"failed": 2,
"status": 6,
"message_date": "2014-08-20T14:30Z",
"created_date": "2014-06-27T04:34:30.885Z"
}
}
My search query
{
"query": {
"filtered": {
"query": {
"match": {
"team_id": {
"query": "1fa86701af05a863f59dd0f4b6546b32"
}
}
},
"filter": {
"and": [
{
"term": {
"status": "6"
}
}
]
}
}
},
"aggs": {
"intraday_return": {
"sum": {
"field": "sent"
}
}
},
"aggs": {
"intraday_return": {
"sum": {
"field": "failed"
}
}
}
}
How to put two aggression in one query? Please help me to solve this issue. Thank you
You can compute the sum using script
Example:
{
"size": 0,
"aggregations": {
"age_ranges": {
"range": {
"script": "DateTime.now().year - doc[\"birthdate\"].date.year",
"ranges": [
{
"from": 22,
"to": 25
}
]
}
}
}
}
your query should contain
"script" : "doc['sent'].value+doc['failed'].value"
There can be multiple sub aggregates
{
"query": {
"filtered": {
"query": {
"match": {
"team_id": {
"query": "1fa86701af05a863f59dd0f4b6546b32"
}
}
},
"filter": {
"and": [
{
"term": {
"status": "6"
}
}
]
}
}
},
"aggs": {
"intraday_return_sent": {
"sum": {
"field": "sent"
}
},
"intraday_return_failed": {
"sum": {
"field": "failed"
}
}
}
}

Resources