Elastic Search: Bool Query in nested properties - elasticsearch

Lets assume I have data structured like this:
{ "id": "120400871755634330808993320",
"name": "Metaalschroef binnenzeskant, DIN 912 RVS A4-80",
"description": "m16x70 cilinderschroef bzk a4-80 din912 klasse 80",
"fullDescription": "Metaalschroef met een binnenzeskant cilinderkop",
"synonyms": [],
"properties": [
{
"name": "draad",
"value": "16",
"sort": 99
},
{
"name": "lengte",
"value": "70",
"sort": 99
},
{
"name": "materiaal",
"value": "roestvaststaal",
"sort": 99
},
{
"name": "kwaliteit (materiaal)",
"value": "A4",
"sort": 99
},
{
"name": "DIN",
"value": "912",
"sort": 99
},
{
"name": "AISI",
"value": "316",
"sort": 99
},
{
"name": "draadsoort",
"value": "metrisch",
"sort": 99
},
{
"name": "Merk",
"value": "Elcee Holland",
"sort": 1
}
]
}
How do I write a boolean query where I select all documents that have a property with name "draad" and value "16" and a property with name "lengte" and value "70".
Right now I have this but it returns 0 results:
"query" : {
"nested" : {
"path" : "properties",
"query" : {
"bool" : {
"must" : [{
"bool" : {
"must" : [{
"term" : {
"properties.name" : "Merk"
}
}, {
"term" : {
"properties.value" : "Facom"
}
}
]
}
}, {
"bool" : {
"must" : [{
"term" : {
"properties.name" : "materiaal"
}
}, {
"term" : {
"properties.value" : "kunststof"
}
}
]
}
}
]
}
}
}
}
Replacing the highest level "must" with "should" returns too many results, which makes sense as it translates to an "or".

When using must, the engine is trying to search for nested documents with name:Merk and value:Facom. But also with name:materiaal and value:kunststof - which is impossible to happen in the same nested document at once.
When using should as you mentioned, it translate to or - which is indeed possible.
Problem is, you also getting the entire parent document with all it's nested documents.
In my own answer I'm showing the steps to create an index with nested documents (you should mark the field properties as nested type`).
After complete those steps, you'll be able to get results with the following query:
{
"_source": [
"id",
"name",
"description"
],
"query": {
"bool": {
"must": [
{
"nested": {
"path": "properties",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"properties.name": "Merk"
}
},
{
"term": {
"properties.value": "Facom"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"properties.name": "materiaal"
}
},
{
"term": {
"properties.value": "kunststof"
}
}
]
}
}
]
}
},
"inner_hits":{
"size": 10
}
}
}
]
}
}
}

I found a solution that is working very well!
My property object now looks like this:
{
"name": "breedte(mm)",
"value": "1000",
"unit": "mm",
"sort": 99,
"nameSlug": "breedte-mm",
"slug": "breedte-mm-1000"
},
I added a slug (containing a normalized string for key + value) and a nameslug which is a normalized string for the name.
My index is mapped like this:
"properties": {
"type": "nested",
"include_in_parent": true,
"properties": {
"name": {
"type": "keyword"
},
"nameSlug": {
"type": "keyword"
},
"slug": {
"type": "keyword"
},
"sort": {
"type": "long"
},
"unit": {
"type": "text",
"index": false
},
"value": {
"type": "keyword"
}
}
}
The "include_in_parent" is important here. It allows me to do the query below:
"query": {
"bool": {
"must": [
{
"terms": {
"properties.slug": [
"merk-orbis",
"merk-bahco"
]
}
},
{
"terms": {
"properties.slug": [
"materiaal-staal",
"materiaal-kunststof"
]
}
}
]
}
},
This queries searches for all documents where "merk" is "Orbis" or "Bahco" and where "materiaal" is "staal" or "kunststof".
My aggregations look like this:
"merk_query": {
"filter": {
"bool": {
"must": [
{
"terms": {
"properties.slug": [
"materiaal-staal",
"materiaal-kunststof"
]
}
}
]
}
},
"aggs": {
"merk_facets": {
"nested": {
"path": "properties"
},
"aggs": {
"merk_only": {
"filter": {
"term": {
"properties.nameSlug": {
"value": "merk"
}
}
},
"aggs": {
"facets": {
"terms": {
"field": "properties.name",
"size": 1
},
"aggs": {
"facetvalues": {
"terms": {
"field": "properties.value",
"size": 10
}
}
}
}
}
}
}
}
}
},
I run filteraggregate which filters all documents that match a facet (but not the current one I am bulding).
The result of this aggragate is something like this:
"merk_query": {
"doc_count": 7686,
"merk_facets": {
"doc_count": 68658,
"merk_only": {
"doc_count": 7659,
"facets": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Merk",
"doc_count": 7659,
"facetvalues": {
"doc_count_error_upper_bound": 10,
"sum_other_doc_count": 438,
"buckets": [
{
"key": "Orbis",
"doc_count": 6295
},
{
"key": "DX",
"doc_count": 344
},
{
"key": "AXA",
"doc_count": 176
},
{
"key": "Talen Tools",
"doc_count": 127
},
{
"key": "Nemef",
"doc_count": 73
},
{
"key": "bonfix",
"doc_count": 67
},
{
"key": "Bahco",
"doc_count": 64
},
{
"key": "Henderson",
"doc_count": 27
},
{
"key": "Maasland Groep",
"doc_count": 25
},
{
"key": "SYSTEC",
"doc_count": 23
}
]
}
}
]
}
}
}
}
},
And this is the end result in the browser:

Related

Further filtering of aggregations

I have a question regarding aggregation in elastic search. I have a document like the following:
{
"_index": "products",
"_type": "product",
"_id": "ID-12345",
"_score": 1,
"_source": {
"created_at": "2017-08-04T17:56:44.592Z",
"updated_at": "2017-08-04T17:56:44.592Z",
"product_information": {
"sku": "12345",
"name": "Product Name",
"price": 25,
"brand": "Brand Name",
"url": "URL"
},
"product_detail": {
"description": "Product description text here.",
"string_facets": [
{
"facet_name": "Colour",
"facet_value": "Grey"
},
{
"facet_name": "Category",
"facet_value": "Linen"
},
{
"facet_name": "Category",
"facet_value": "Throws & Blanket"
},
{
"facet_name": "Keyword",
"facet_value": "Contemporary"
},
{
"facet_name": "Keyword",
"facet_value": "Sophisticated"
}
]
}
}
}
I am storing product information such as Colour, Material, Category and Keywords within the product_detail.string_facets field. I'd like to use this for aggregation to get Colour/Material/Category/Keyword suggestions but as separate buckets. I.e, there is a separate bucket for each of those string_facet types as defined in product_detail.string_facets.facet_name.
This is the query I have at the moment which is returning data, but not as I expect. First the query (this was just to try and get Colours):
{
"from": 0,
"size": 12,
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Rug",
"fields": ["product_information.name", "product_detail.string_facets.facet_value"]
}
},
{
"multi_match": {
"query": "Blue",
"fields": ["product_information.name", "product_detail.string_facets.facet_name"]
}
}
],
"minimum_should_match": "100%"
}
},
"aggs": {
"suggestions": {
"filter": { "term": { "product_detail.string_facets.facet_name.keyword": "Colour" }},
"aggs": {
"colours": {
"terms": {
"field": "product_detail.string_facets.facet_value.keyword",
"size": 10
}
}
}
}
}
}
This is giving me output like the following:
"aggregations": {
"suggestions": {
"doc_count": 21,
"colours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 23,
"buckets": [
{
"key": "Rug",
"doc_count": 21
},
{
"key": "Blue",
"doc_count": 18
},
{
"key": "Bold",
"doc_count": 7
},
{
"key": "Modern",
"doc_count": 6
},
{
"key": "Multi-Coloured",
"doc_count": 5
},
{
"key": "Contemporary",
"doc_count": 4
},
{
"key": "Traditional",
"doc_count": 4
},
{
"key": "White",
"doc_count": 4
},
{
"key": "Luxurious",
"doc_count": 3
},
{
"key": "Minimal",
"doc_count": 3
}
]
}
}
}
It has given me the results of all facet_name rather those of facet_type Colour as I thought it would.
Any help would be greatly appreciated. Elasticsearch seems very powerful but the documentation is quite daunting!
You did not show how the mapping looks like, but I suppose that product_detail.string_facets field is just an inner object field and that is the reason why you get this kind of result. With this type of mapping Elasticsearch flattens the array into a simple list of field names and values. In your case it becomes:
{
"product_detail.string_facets.facet_name": ["Colour", "Category", "Keyword"],
"product_detail.string_facets.facet_value": ["Grey", "Linen", "Throws & Blanket", "Contemporary", "Sophisticated"]
}
As you can see, based on this structure, Elasticsearch cannot know how to aggregate the data.
To make it work product_detail.string_facets field should be of type nested. Mapping for string_facets should be similar to this (note "type": "nested"):
"string_facets": {
"type": "nested",
"properties": {
"facet_name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"facet_value": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
Now I index following document:
{
"created_at": "2017-08-04T17:56:44.592Z",
"updated_at": "2017-08-04T17:56:44.592Z",
"product_information": {
"sku": "12345",
"name": "Rug",
"price": 25,
"brand": "Brand Name",
"url": "URL"
},
"product_detail": {
"description": "Product description text here.",
"string_facets": [
{
"facet_name": "Colour",
"facet_value": "Blue"
},
{
"facet_name": "Colour",
"facet_value": "Red"
},
{
"facet_name": "Category",
"facet_value": "Throws & Blanket"
},
{
"facet_name": "Keyword",
"facet_value": "Contemporary"
}
]
}
}
Now, to get aggregation of colour suggestions as separate buckets, you can try this query (I simplified the bool query for the need of my document):
{
"from": 0,
"size": 12,
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Rug",
"fields": ["product_information.name", "product_detail.string_facets.facet_value"]
}
}
]
}
},
"aggs": {
"facets": {
"nested" : {
"path" : "product_detail.string_facets"
},
"aggs": {
"suggestions": {
"filter": { "term": { "product_detail.string_facets.facet_name.keyword": "Colour" }},
"aggs": {
"colours": {
"terms": {
"field": "product_detail.string_facets.facet_value.keyword",
"size": 10
}
}
}
}
}
}
}
}
And result:
{
...,
"hits": {
...
},
"aggregations": {
"facets": {
"doc_count": 5,
"suggestions": {
"doc_count": 2,
"colours": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Blue",
"doc_count": 1
},
{
"key": "Red",
"doc_count": 1
}
]
}
}
}
}
}

ElasticSearch - How to aggregation access log ignore GET parameter?

I want to aggregate access by function path.
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"path.keyword": "/hex/*"
}
}
]
}
},
"from": 0,
"size": 0,
"aggs": {
"path": {
"terms": {
"field": "path.keyword"
}
}
}
}
And i get the result like these..
{
"key": "/hex/user/admin_user/auth",
"doc_count": 38
},
{
"key": "/hex/report/chart/fastreport_lobby_all?start_date=2017-06-29&end_date=2017-07-05&category=date_range&value[]=payoff",
"doc_count": 35
},
{
"key": "/hex/report/chart/fastreport_lobby_all?start_date=2017-06-29&end_date=2017-07-05&category=lobby&value[]=payoff",
"doc_count": 35
},
{
"key": "/hex/report/chart/online_membership?start_date=2017-06-29&end_date=2017-07-05&category=datetime_range&value[]=user_total",
"doc_count": 34
}
There are two /hex/report/chart/fastreport_lobby_all?balabala... result.
It's not the real count about this function.
Do i have any method to count these as one?
{
"key": "/hex/report/chart/fastreport_lobby_all",
"doc_count": 70
}
I don't think this is possible without a custom analyzer like
PUT your_index
{
"settings": {
"analysis": {
"analyzer": {
"query_analyzer": {
"type": "custom",
"tokenizer": "split_query",
"filter": ["top1"
]
}
},
"filter":{
"top1":{
"type": "limit",
"max_token_count": 1
}
},
"tokenizer":{
"split_query":{
"type": "pattern",
"pattern": "\\?"
}
}
}
},
"mappings": {
"your_log_type": {
"properties": {
"path": {
"type": "text",
"fields": {
"keyword": {
"type":"keyword"
},
"no_query": {
"type":"string",
"fielddata":true,
"analyzer":"query_analyzer"
}
}
}
}
}
}
}
And then query on
POST test/log_type/_search
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"path.keyword": "/hex/*"
}
}
]
}
},
"from": 0,
"size": 0,
"aggs" : {
"genres" : {
"terms" : { "field" : "path.no_query" }
}
}
}

Exclude results based on key in elasticsearch

I have the below mapping for a type in elastic search:
"properties": {
"userid": {
"type": "integer"
},
"engid": {
"type": "short"
},
"score": {
"type": "short",
},
"name": {
"type": "string",
"index": "not_analyzed"
},
"submitTime": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
And my search query as:
{
"size": 10,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"range": {
"submitTime": {
"gt": "now-18d"
}
}
}
}
},
"aggs": {
"name": {
"terms": {
"field": "name",
"order": {
"_term": "asc"
}
},
"aggs": {
"score": {
"terms": {
"field": "score"
}
}
}
}
}
}
This is giving my expected result as:
"aggregations": {
"name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "---",
"doc_count": 169529,
"score": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 0,
"doc_count": 160133
},
{
"key": 5,
"doc_count": 9395
},
{
"key": 4,
"doc_count": 1
}
]
}
},
{
"key": "John",
"doc_count": 1,
"score": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 5,
"doc_count": 1
}
]
}
}
Now I want to remove the bucket from my results where name='---'. I tried using 'not', but it didn't worked. Any hint will be appreciated.
PS: I am new to elasticsearch, and just trying to expand my knowledge.
You need to exclude the --- value in your query
{
"size": 10,
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"range": {
"submitTime": {
"gt": "now-18d"
}
}
}
],
"must_not": [
{
"term": {
"name": "---"
}
}
]
}
}
}
},
"aggs": {
"name": {
"terms": {
"field": "name",
"order": {
"_term": "asc"
}
},
"aggs": {
"score": {
"terms": {
"field": "score"
}
}
}
}
}
}

ElasticSearch Nested Filter and Aggregation

So I have this mapping:
"employee": {
"properties": {
"DaysOff": {
"type": "nested",
"properties": {
"Date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"Days": {
"type": "double"
},
"ID": {
"type": "long"
}
}
}
}
}
So basically a employee can have days off. Each day off they have is stored in an array under the property DaysOff. Days can be a fraction of a day, so if an employee took half a day off then it would be 0.5.
So I have this search:
{
"size": 45,
"filter": {
"nested": {
"path": "DaysOff",
"filter": {
"range": {
"DaysOff.Date": {
"from": "now-2M",
"to": "now"
}
}
}
}
}
}
which brings me back 45 documents. which is correct. I'm just can't figure out how to now apply an aggregation to these documents in order to get back the sum of all the days that have been taken.
Using this resource I tried this aggs but didn't get me the correct result:
{
"size": 45,
"filter": {
"nested": {
"path": "DaysOff",
"filter": {
"range": {
"DaysOff.Date": {
"from": "now-2M",
"to": "now"
}
}
}
}
},
"aggs": {
"sum_docs": {
"nested": {
"path": "DaysOff"
},
"aggs": {
"stepped_down": {
"sum": {
"field": "DaysOff.Days"
}
}
}
}
}
}
You need to filter on those nested documents to get the correct results, From the docs
Because nested documents are indexed as separate documents, they can only be accessed within the scope of the nested query,
I created index like this
POST employee
{
"mappings": {
"emp_map": {
"properties": {
"DaysOff": {
"type": "nested",
"properties": {
"Date": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"Days": {
"type": "double"
},
"ID": {
"type": "long"
}
}
},
"name": {
"type": "string"
}
}
}
}
},
Then I indexed few documents like this,
PUT employee/emp_map/1
{
"name" : "messi",
"DaysOff" : [
{
"Date" : "2015-11-01",
"Days" : 1,
"ID" : 11
},
{
"Date" : "2014-11-01",
"Days" : 2,
"ID" : 11
},
{
"Date" : "2015-12-01",
"Days" : 0.5,
"ID" : 11
}
]
}
PUT employee/emp_map/2
{
"name" : "ronaldo",
"DaysOff" : [
{
"Date" : "2015-10-01",
"Days" : 3,
"ID" : 12
},
{
"Date" : "2014-11-01",
"Days" : 2,
"ID" : 12
},
{
"Date" : "2015-12-01",
"Days" : 0.5,
"ID" : 12
}
]
}
PUT employee/emp_map/3
{
"name" : "suarez",
"DaysOff" : [
{
"Date" : "2015-11-01",
"Days" : 4,
"ID" : 13
},
{
"Date" : "2015-11-09",
"Days" : 2,
"ID" : 13
},
{
"Date" : "2015-12-01",
"Days" : 1.5,
"ID" : 13
}
]
}
This is my query, notice the filter aggregation in nested aggregation, without that ES will give you sum of all the days taken off.
GET employee/_search
{
"query": {
"bool": {
"filter": {
"nested": {
"path": "DaysOff",
"query": {
"range": {
"DaysOff.Date": {
"from": "now-2M",
"to": "now"
}
}
}
}
}
}
},
"aggs": {
"emp_name": {
"terms": {
"field": "name",
"size": 10
},
"aggs": {
"nesting": {
"nested": {
"path": "DaysOff"
},
"aggs": {
"filter_date": {
"filter": {
"range": {
"DaysOff.Date": {
"from": "now-2M",
"to": "now"
}
}
},
"aggs": {
"sum_taken_off_days": {
"sum": {
"field": "DaysOff.Days"
}
}
}
}
}
}
}
}
},
"size": 0
}
This is the result I get,
"aggregations": {
"emp_name": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "messi",
"doc_count": 1,
"nesting": {
"doc_count": 3,
"filter_date": {
"doc_count": 2,
"sum_taken_off_days": {
"value": 1.5
}
}
}
},
{
"key": "ronaldo",
"doc_count": 1,
"nesting": {
"doc_count": 3,
"filter_date": {
"doc_count": 1,
"sum_taken_off_days": {
"value": 0.5
}
}
}
},
{
"key": "suarez",
"doc_count": 1,
"nesting": {
"doc_count": 3,
"filter_date": {
"doc_count": 3,
"sum_taken_off_days": {
"value": 7.5
}
}
}
}
]
}
}
P.S : This is per employee, you can remove emp_name terms aggregation to get sum of all employees.

How to aggregate sub buckets of each bucket on nested documents

Full sample code:
https://gist.github.com/anonymous/329eaaf5654096c529da
I have a simple, standard product/options mapping like this for a standard ecommerce site:
"mappings": {
"product": {
"properties" : {
"name":
{
"type": "string",
"fields": {
"raw": { "type": "string", "analyzer": "lowercase" }
},
"analyzer": "default"
},
"options" : {
"type": "nested",
"properties": {
"id": {"type": "integer"},
"name": {"type": "string"},
"values": {"type": "nested"}
}
},
"price":{"type": "integer"},
"createdAt": {
"type": "date",
"format": "basic_date_time"
}
}
}
}
Please note that 1 product has multiple options, and each option can have multiple values (ie.: a Shirt with option Color including blue, red; and option Size including M, XL)
Currently, after the query to search for products using multiple conditions, I aggregate the result to get a list of all options and options values in the result set:
"aggregations": {
"options": {
"nested": {
"path": "options"
},
"aggs": {
"options_ids": {
"terms": {
"field": "id"
}
},
"aggs": {
"nested": {
"path": "options.values"
},
"aggs": {
"options_values_ids": {
"terms": {
"field": "options.values.id"
}
}
}
}
}
}
}
All work well except I get something like this
"aggregations": {
"options": {
"doc_count": 4,
"options_ids": {
"buckets": [
{
"key": 1,
"doc_count": 2
},
{
"key": 2,
"doc_count": 2
}
]
},
"aggs": {
"doc_count": 7,
"options_values_ids": {
"buckets": [
{
"key": 1,
"doc_count": 2
},
{
"key": 5,
"doc_count": 2
},
{
"key": 2,
"doc_count": 1
},
{
"key": 3,
"doc_count": 1
},
{
"key": 6,
"doc_count": 1
}
]
}
}
}
}
As you can see, there is no way for me to know which option values belong to which options from the result. It will be much better if the available options values can be listed under each option. Is that possible at all?
You would need to nest your aggregations:
"aggregations": {
"options" : {
"aggs" : {
"options_ids" : {
"aggs" : {
"aggs" : {
"options_values_ids" : {
"terms" : {
"field" : "options.values.id"
}
}
},
"nested" : {
"path" : "options.values"
}
},
"terms" : {
"field" : "id"
}
}
},
"nested" : {
"path" : "options"
}
}
}

Resources