How do I write correct query in nested document? - elasticsearch

I have two indices: country and person
PUT person
{
"mappings": {
"properties": {
"info": {
"type": "nested",
"properties": {
"int_val": {"type": "integer"},
"str_val": {"type": "text"},
"field_id": {"type": "keyword"},
"country_id": {"type": "keyword"}
}
}
}
}
}
PUT country
{
"mappings": {
"properties": {
"country_ids": {"type": "keyword"}
}
}
}
PUT country/_doc/user1
{
"country_ids": ["111", "222", "333"]
}
PUT person/_doc/1
{
"info": [
{
"field_id": "1000",
"str_val": "Jack Kotlin",
"country_id": "444"
},
{
"field_id": "1000",
"str_val": "Jack Martin",
"country_id": "333"
},
{
"field_id": "1001",
"str_val": "Jack",
"country_id": "111"
},
{
"field_id": "2000",
"int_val": 30,
"country_id": "444"
},
{
"field_id": "2000",
"int_val": 30,
"country_id": "333"
},
{
"field_id": "2001",
"int_val": 30,
"country_id": "111"
}
]
}
If user1 queries '(field_id=1000 with str_val="Jack") & (field_id=2000 with int_val="Jack")' following result must be return:
{
"info": [
{
"field_id": "1000",
"str_val": "Jack Martin",
"country_id": "333"
},
{
"field_id": "2000",
"int_val": 30,
"country_id": "333"
}
]
}
Help me please!
I wrote a query for single part: (field_id=1000 with str_val="Jack")
GET person/_search
{
"query": {
"nested": {
"path": "info",
"query": {
"bool": {
"filter": [
{
"terms": {
"info.country_id": {
"index": "country",
"id": "user1",
"path": "country_ids"
}
}
}
],
"must": [
{"match": {"info.field_id": "1000"}},
{"match": {"info.str_val": "Jack"}}
]
}
},
"inner_hits": {}
}
},
"_source": false
}
and got a correct result:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.1530519,
"hits" : [
{
"_index" : "person",
"_id" : "1",
"_score" : 1.1530519,
"inner_hits" : {
"info" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.1530519,
"hits" : [
{
"_index" : "person",
"_id" : "1",
"_nested" : {
"field" : "info",
"offset" : 1
},
"_score" : 1.1530519,
"_source" : {
"field_id" : "1000",
"str_val" : "Jack Martin",
"country_id" : "333"
}
}
]
}
}
}
}
]
}
}
but I don't know how to write a query for multi part!

The following query will show:
Documents with (id=1000 AND str_field=Jack) AND (id=2000 AND int_field=30)
GET person/_search
{
"query": {
"nested": {
"path": "info",
"query": {
"bool": {
"filter": [
{
"terms": {
"info.country_id": {
"index": "country",
"id": "user1",
"path": "country_ids"
}
}
},
{
"bool": {
"must": [
{
"bool": {
"must": [
{
"term": {
"info.field_id": "2000"
}
},
{
"match": {
"info.int_val": 30
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"info.field_id": "1000"
}
},
{
"match": {
"info.str_val": "Jack"
}
}
]
}
}
]
}
}
]
}
},
"inner_hits": {}
}
},
"_source": false
}

Related

Using Metric Aggregation with Composite Aggregation

I have the following mapping for an index:
{
"test5" : {
"mappings" : {
"dynamic" : "false",
"properties" : {
"messageType" : {
"type" : "keyword"
},
"groupId" : {
"type" : "keyword"
},
"payload" : {
"type" : "nested",
"include_in_root" : true,
"properties" : {
"request" : {
"type" : "nested",
"include_in_root" : true,
"properties" : {
"data" : {
"type" : "nested",
"include_in_root" : true,
"properties" : {
"chargingPeriods" : {
"type" : "nested",
"include_in_root" : true,
"properties" : {
"endDateTime" : {
"type" : "date"
},
"power" : {
"type" : "double"
},
"startDateTime" : {
"type" : "date"
}
}
}
}
}
}
}
}
}
}
}
}
}
First use case, I want buckets in 2 min intervals based on payload.request.data.chargingPeriods.startDateTime and groupId with a filter criteria of messageType . BTW chargingPeriods is an array.
This query works for that use case:
GET test5/_search
{
"size": 0,
"aggs": {
"my_buckets": {
"composite": {
"sources": [
{ "sessionId": { "terms": { "field": "groupId"} } },
{
"date" : {
"date_histogram": {
"field": "payload.request.data.chargingPeriods.startDateTime",
"fixed_interval": "2m",
"format": "MM/dd/yyyy - hh:mm:ss",
"order": "asc"
}
}
}
]
}
}
},
"query": {
"terms": {
"messageType": [
"test"
]
}
}
}
Now I want metric aggregations done on these composite buckets returned and I tried this:
GET test5/_search
{
"size": 0,
"aggs": {
"my_buckets": {
"composite": {
"sources": [
{ "sessionId": { "terms": { "field": "groupId"} } },
{
"date" : {
"date_histogram": {
"field": "payload.request.data.chargingPeriods.startDateTime",
"fixed_interval": "2m",
"format": "MM/dd/yyyy - hh:mm:ss",
"order": "asc"
}
}
}
]
},
"aggregations": {
"metricAgg": {
"max": {
"field": "payload.request.data.chargingPeriods.power"
}
}
}
}
},
"query": {
"terms": {
"messageType": [
"test"
]
}
}
}
According to ES documentation https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-composite-aggregation.html, this should work by doing metric aggregation on the composite bucket
But instead of the metric aggregation being computed on the composite bucket, it is being computed across all the power fields in chargingPeriods array in the entire given document.
How I created the index:
PUT /test5
{
"settings": {
"number_of_shards": 1
},
"mappings" : {
"dynamic" : "false",
"properties" : {
"groupId" : {
"type" : "keyword"
},
"messageType" : {
"type" : "keyword"
},
"payload" : {
"type" : "nested",
"include_in_root": true,
"properties": {
"request": {
"type":"nested",
"include_in_root":true,
"properties": {
"data": {
"type":"nested",
"include_in_root": true,
"properties": {
"chargingPeriods": {
"type": "nested",
"include_in_root": true,
"properties" : {
"endDateTime":{
"type": "date"
},
"power": {
"type": "double"
},
"startDateTime":{
"type": "date"
}
}
}
}
}
}
}
}
}
}
}
}
Test Data:
POST test5/_doc/testdocu1
{
"groupId": "563",
"messageType": "test",
"payload": {
"request": {
"data": {
"chargingPeriods": [
{
"endDateTime": "2022-10-13T17:42:25Z",
"power": 9.62857,
"startDateTime": "2022-10-13T17:41:55Z"
},
{
"endDateTime": "2022-10-13T17:42:55Z",
"power": 9.6491,
"startDateTime": "2022-10-13T17:42:25Z"
},
{
"endDateTime": "2022-10-13T17:43:25Z",
"power": 9.6491,
"startDateTime": "2022-10-13T17:42:55Z"
},
{
"endDateTime": "2022-10-13T17:43:55Z",
"power": 9.66963,
"startDateTime": "2022-10-13T17:43:25Z"
},
{
"endDateTime": "2022-10-13T17:44:25Z",
"power": 9.67128,
"startDateTime": "2022-10-13T17:43:55Z"
},
{
"endDateTime": "2022-10-13T17:44:55Z",
"power": 9.65079,
"startDateTime": "2022-10-13T17:44:25Z"
},
{
"endDateTime": "2022-10-13T17:45:25Z",
"power": 9.66492,
"startDateTime": "2022-10-13T17:44:55Z"
},
{
"endDateTime": "2022-10-13T17:45:55Z",
"power": 9.68544,
"startDateTime": "2022-10-13T17:45:25Z"
},
{
"endDateTime": "2022-10-13T17:46:25Z",
"power": 9.68544,
"startDateTime": "2022-10-13T17:45:55Z"
},
{
"endDateTime": "2022-10-13T17:46:55Z",
"power": 9.67434,
"startDateTime": "2022-10-13T17:46:25Z"
}
]
}
}
}
}
My output:
"aggregations" : {
"my_buckets" : {
"after_key" : {
"sessionId" : "563",
"date" : "10/13/2022 - 05:46:00"
},
"buckets" : [
{
"key" : {
"sessionId" : "563",
"date" : "10/13/2022 - 05:40:00"
},
"doc_count" : 1,
"metricAgg" : {
"value" : 9.68544
}
},
{
"key" : {
"sessionId" : "563",
"date" : "10/13/2022 - 05:42:00"
},
"doc_count" : 4,
"metricAgg" : {
"value" : 9.68544
}
},
{
"key" : {
"sessionId" : "563",
"date" : "10/13/2022 - 05:44:00"
},
"doc_count" : 4,
"metricAgg" : {
"value" : 9.68544
}
},
{
"key" : {
"sessionId" : "563",
"date" : "10/13/2022 - 05:46:00"
},
"doc_count" : 1,
"metricAgg" : {
"value" : 9.68544
}
}
]
}
}
As you can see, it chose the max payload.request.data.chargingPeriods.power from all the elements, ignoring the composite buckets. For example
{
"key" : {
"sessionId" : "563",
"date" : "10/13/2022 - 05:40:00"
},
"doc_count" : 1,
"metricAgg" : {
"value" : 9.68544
}
},
metricAgg should have been 9.62857
It doesn't work the way you expect because you're aggregating nested data which you have include_in_root, and hence, all the nested data finds itself in the root document as if it was not nested, and so, the relation between the startDateTime and the power is basically lost.
The other issue is that your composite aggregation aggregates nested (payload...) and non-nested data (groupId), that won't work.
However, if you add the groupId field inside each element of your array, then you can make your query work like this:
GET test5/_search
{
"size": 0,
"aggs": {
"payload": {
"nested": {
"path": "payload"
},
"aggs": {
"request": {
"nested": {
"path": "payload.request"
},
"aggs": {
"data": {
"nested": {
"path": "payload.request.data"
},
"aggs": {
"charging": {
"nested": {
"path": "payload.request.data.chargingPeriods"
},
"aggs": {
"my_buckets": {
"composite": {
"sources": [
{
"sessionId": {
"terms": {
"field": "payload.request.data.chargingPeriods.groupId"
}
}
},
{
"date": {
"date_histogram": {
"field": "payload.request.data.chargingPeriods.startDateTime",
"fixed_interval": "2m",
"format": "MM/dd/yyyy - hh:mm:ss",
"order": "asc"
}
}
}
]
},
"aggregations": {
"metricAgg": {
"max": {
"field": "payload.request.data.chargingPeriods.power"
}
}
}
}
}
}
}
}
}
}
}
}
},
"query": {
"terms": {
"messageType": [
"test"
]
}
}
}
Results:
{
"key" : {
"sessionId" : "563",
"date" : "10/13/2022 - 05:40:00"
},
"doc_count" : 1,
"metricAgg" : {
"value" : 9.62857
}
},

How to group documents in Elasticsearch and get the documents in each group?

My Elasticsearch index contains products with a denormalized m:n relationship to categories.
My goal is to derive a categories index from it which contains the same information, but with the relationship inverted.
The index looks like this:
PUT /products
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"article_id": {
"type": "keyword"
},
"categories": {
"type": "nested",
"properties": {
"cat_name": {
"type": "keyword"
}
}
}
}
}
}
containing documents created like this:
POST /products/_doc
{
"name": "radio",
"article_id": "1001",
"categories": [
{ "cat_name": "audio" },
{ "cat_name": "electronics" }
]
}
POST /products/_doc
{
"name": "fridge",
"article_id": "1002",
"categories": [
{ "cat_name": "appliances" },
{ "cat_name": "electronics" }
]
}
I would like to get something like this back from Elasticsearch:
{
"name": "appliances",
"products": [
{
"name": "fridge",
"article_id": "1002"
}
]
},
{
"name": "audio",
"products": [
{
"name": "radio",
"article_id": "1001"
}
]
},
{
"name": "electronics",
"products": [
{
"name": "fridge",
"article_id": "1002"
},
{
"name": "radio",
"article_id": "1001"
}
]
}
which would eventually be put into an index such as:
PUT /categories
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"products": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"article_id": {
"type": "keyword"
}
}
}
}
}
}
I cannot figure out how to do this without loading and grouping all products programmatically.
Here's what I have tried:
Bucket aggregation on field categories.cat_name
This gives me the document count per category but not the product documents. Using top_hits sub-aggregation seems to be limited to 100 documents.
Group using collapse field with expansion
Collapsing is only possible on a single-valued field.
I'm using Elasticsearch 8.1.
The query you need is this one:
POST products/_search
{
"size": 0,
"aggs": {
"cats": {
"nested": {
"path": "categories"
},
"aggs": {
"categories": {
"terms": {
"field": "categories.cat_name",
"size": 10
},
"aggs": {
"root": {
"reverse_nested": {},
"aggs": {
"products": {
"terms": {
"field": "name",
"size": 10
}
}
}
}
}
}
}
}
}
}
Which produces exactly what you need (less the article id, but that's easy):
"buckets" : [
{
"key" : "electronics",
"doc_count" : 2,
"root" : {
"doc_count" : 2,
"products" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "fridge",
"doc_count" : 1
},
{
"key" : "radio",
"doc_count" : 1
}
]
}
}
},
{
"key" : "appliances",
"doc_count" : 1,
"root" : {
"doc_count" : 1,
"products" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "fridge",
"doc_count" : 1
}
]
}
}
},
{
"key" : "audio",
"doc_count" : 1,
"root" : {
"doc_count" : 1,
"products" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "radio",
"doc_count" : 1
}
]
}
}
}
]

elasticsearch find documents where the given number items in array has the same property value

first of all I would like to show simplified structure of document.
{
"_id": "413123123",
"_source": {
"description": {
"firstLine": "this is my description",
"secondLine": "some value"
},
"InsertDetails": {
"Timestamp": "2020-06-12T11:14:36+0000"
},
"Links": [
{
"LinkDetails": {
"linkId": 2342,
"type": "Link",
"dateCreation": "2012-09-21T08:42:09+0000",
"typeId": 404019,
"typeOfLink": "http"
}
},
{
"LinkDetails": {
"linkId": 321313,
"type": "Link",
"dateCreation": "2012-08-21T08:42:09+0000",
"typeId": 404019,
"typeOfLink": "http"
}
},
{
"LinkDetails": {
"linkId": 1231,
"type": "Link",
"dateCreation": "2012-09-21T08:42:09+0000",
"typeId": 32323,
"typeOfLink": "https"
}
},
{
"LinkDetails": {
"linkId": 53434,
"type": "Link",
"dateCreation": "2012-11-21T08:42:09+0000",
"typeId": 123231,
"typeOfLink": "wss"
}
}
]
}
}
I have a problem with forming query, which would find documents, where the following requirements are met:
two items in Links arrays has typeOfLink equal to http
description string contains word "this"
found items will be sorted by date desc
The version of elasticsearch is 2.3.2
I've tried with query such like this:
{
"query": {
"bool": {
"must": [
{
"bool": {
"must": [
{
"match": {
"Links.LinkDetails.typeOfLink": "http"
}
}
],
"minimum_should_match": 2
}
},
{
"match": {
"description.firstLine": "this"
}
}
]
}
},
"sort": [
{
"InsertDetails.Timestamp": {
"order": "desc"
}
}
]
}
The problem is that this query returns me also the documents, which has only one item in the array with the given value. I've tried to modify this query in different ways, but without any luck.
Added mapping
{
"my_index": {
"mappings": {
"en": {
"properties": {
"InsertDetails": {
"properties": {
"Timestamp": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
}
}
},
"description": {
"properties": {
"firstLine": {
"type": "string"
},
"secondLine": {
"type": "string"
}
}
},
"Links": {
"properties": {
"LinkDetails": {
"properties": {
"linkId": {
"type": "long"
},
"type": {
"type": "string"
},
"dateCreation": {
"type": "date",
"format": "strict_date_optional_time||epoch_millis"
},
"typeOfLink": {
"type": "string"
},
"typeId": {
"type": "long"
}
}
}
}
}
}
}
}
}
}
At first, you want to filter on a nested field. (array of object)
To have coherent result you must have to map this field as a nested one.
https://www.elastic.co/guide/en/elasticsearch/reference/current/nested.html
Then, you will have to use aggregations.
What you want is to aggregate only "http" values for type_of_link, and return results if the aggregation return more than 2 results.
You query will be a little more complicated:
{
"size": 0,
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "Links",
"query": {
"match": {
"Links.LinkDetails.typeOfLink": "http"
}
}
}
},
{
"match": {
"description.firstLine": "this"
}
}
]
}
},
"aggs": {
"links": {
"nested": {
"path": "Links"
},
"aggs": {
"http_only": {
"filter": {
"term": {
"Links.LinkDetails.typeOfLink.keyword": "http"
}
},
"aggs": {
"several_http": {
"terms": {
"field": "Links.LinkDetails.typeOfLink.keyword",
"min_doc_count": 2
}
,
"aggs": {
"complete_match": {
"top_hits": {
"size": 100
}
}
}
}
}
}
}
}
},
"sort": [
{
"InsertDetails.Timestamp": {
"order": "desc"
}
}
]
}
And your response will looks like:
"aggregations" : {
"links" : {
"doc_count" : 4,
"http_only" : {
"doc_count" : 2,
"several_http" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "http",
"doc_count" : 2,
"complete_match" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.98082924,
"hits" : [
{
"_index" : "test3",
"_type" : "_doc",
"_id" : "ed1AkXQBD_dLYq-V78bD",
"_nested" : {
"field" : "Links",
"offset" : 0
},
"_score" : 0.98082924,
"_source" : {
"LinkDetails" : {
"linkId" : 2342,
"type" : "Link",
"dateCreation" : "2012-09-21T08:42:09+0000",
"typeId" : 404019,
"typeOfLink" : "http"
}
}
},
{
"_index" : "test3",
"_type" : "_doc",
"_id" : "ed1AkXQBD_dLYq-V78bD",
"_nested" : {
"field" : "Links",
"offset" : 1
},
"_score" : 0.98082924,
"_source" : {
"LinkDetails" : {
"linkId" : 321313,
"type" : "Link",
"dateCreation" : "2012-08-21T08:42:09+0000",
"typeId" : 404019,
"typeOfLink" : "http"
}
}
}
]
}
}
}
]
}
}
}
}
By playing with the given aggregation you should be able to do what you want.

How to filter nested aggregations in ElasticSearch?

For example, let's assume we have a product index with the following mapping:
{
"product": {
"mappings": {
"producttype": {
"properties": {
"id": {
"type": "keyword"
},
"productAttributes": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"title": {
"type": "text",
"fields": {
"keyword": {
"type": "text",
"analyzer": "keyword"
}
},
"analyzer": "standard"
}
}
}
}
}
}
I am trying to find how many products which have specific product attributes using the following query(I am using a fuzzy query to allow some edit distance):
{
"size": 0,
"query": {
"nested": {
"query": {
"fuzzy": {
"productAttributes.name": {
"value": "SSD"
}
}
},
"path": "productAttributes"
}
},
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
But it returns all product attributes for each matched document and here is the response I get.
"aggregations" : {
"product_attribute_nested_agg" : {
"doc_count" : 6,
"terms_nested_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "SSD",
"doc_count" : 3
},
{
"key" : "USB 2.0",
"doc_count" : 3
}
]
}
}
}
Could you please guide me to how to filter buckets to only return matched attributes?
Edit:
Here are some document samples:
"hits" : {
"total" : 12,
"max_score" : 1.0,
"hits" : [
{
"_index" : "product",
"_type" : "producttype",
"_id" : "677d1164-c401-4d36-8a08-6aa14f7f32bb",
"_score" : 1.0,
"_source" : {
"title" : "Dell laptop",
"productAttributes" : [
{
"name" : "USB 2.0",
"value" : "4"
},
{
"name" : "SSD",
"value" : "250 GB"
}
]
}
},
{
"_index" : "product",
"_type" : "producttype",
"_id" : "2954935a-7f60-437a-8a54-00da2d71da46",
"_score" : 1.0,
"_source" : {
"productAttributes" : [
{
"name" : "USB 2.0",
"value" : "3"
},
{
"name" : "SSD",
"value" : "500 GB"
}
],
"title" : "HP laptop"
}
},
]
}
To filter only specific, you can use filter queries.
Query:
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"terms": {
"productAttributes.name": [
"SSD"
]
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}
This is what it does the trick:
"filter": {
"terms": {
"productAttributes.name": [
"SSD"
]
}
}
You need to do filter part of the aggregation.
Output:
"aggregations": {
"product_attribute_nested_agg": {
"doc_count": 4,
"inner": {
"doc_count": 2,
"terms_nested_agg": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "SSD",
"doc_count": 2
}
]
}
}
}
}
Filtering using Fuzziness :
GET /product/_search
{
"size": 0,
"aggs": {
"product_attribute_nested_agg": {
"nested": {
"path": "productAttributes"
},
"aggs": {
"inner": {
"filter": {
"fuzzy": {
"productAttributes.name": {
"value": "SSt",//here will match SSD
"fuzziness": 3//you can remove it to be as Auto
}
}
},
"aggs": {
"terms_nested_agg": {
"terms": {
"field": "productAttributes.name"
}
}
}
}
}
}
}
}

How to get matched field value of array field using painless script in Elasticsearch?

I'm using Elasticsearch 7.6
I have documents in Restaurant index which look like this:
"name" : "ABC restaurant",
"menu" : [
{
"name" : "chicken",
"count" : 23
},
{
"name" : "rice",
"count" : 10 }
]
Count means the number of orders received.
When a customer searches by menu name in the website, I would like to give a high score to a restaurant with a high count of the menu among several restaurants and expose it to the top of the search results.
To do this, it seems to be necessary to know the matched menu in each document in the painless script.
I'm wondering it is possible. And if so, how can I do it?
UPDATED
Thanks for your answer #jaspreet chahal
I made index like this:
PUT restaurant
{
"mappings": {
"properties": {
"name": {
"type": "text"
},
"menu":{
"type": "nested",
"properties": {
"name": {"type": "text"},
"count": {"type": "integer"}
}
}
}
}
}
POST /restaurant/_doc/1
{
"name": "ABC Restaurant",
"menu": [
{"name": "chicken", "count": 3},
{"name": "cake", "count": 5}
]
}
POST /restaurant/_doc/2
{
"name": "TEST Restaurant",
"menu": [
{"name": "chicken", "count": 10},
{"name": "cake", "count": 7},
{"name": "rice", "count": 2}
]
}
POST /restaurant/_doc/3
{
"name": "Good Restaurant",
"menu": [
{"name": "chicken", "count": 20},
{"name": "cake", "count": 13},
{"name": "rice", "count": 5}
]
}
What I'm trying to do is to get total score based on matched menu count while using multi match, like this:
GET restaurant/_search
{
"query": {
"bool": {
"must": [
{
"function_score": {
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "chicken",
"type": "cross_fields",
"fields": [
"menu.name",
"name"
],
"operator": "and"
}
}
]
}
},
"boost_mode": "replace",
"functions": [
{
"field_value_factor": {
"field": "menu.count",
"missing": 0
}
}
]
}
}
]
}
}
}
But the query above doens't get any result.
To make it work, I added 'include_in_root:True' to menu mapping. But in this case, I can't get proper score.. (It seems that the lowest score of the menu count was obtained regardless of the search word)
May I ask how to make this work as I expect?
Thanks !
UPDATE Again.
I added multi match to your query
GET restaurant/_search
{
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "Good Restaurant chicken",
"type": "cross_fields",
"fields": [
"menu.name",
"name"
]
}
},
{
"nested": {
"path": "menu",
"query": {
"function_score": {
"query": {
"bool": {
"should": [
{
"match": {
"menu.name": {
"query": "Good Restaurant chicken",
"operator": "or"
}
}
}
]
}
},
"boost_mode": "replace",
"functions": [
{
"field_value_factor": {
"field": "menu.count",
"missing": 0
}
}
]
}
}
}
}
]
}
}
}
It get all results well!
But the score was affected by multi match query.
This is result of query:
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 21.11436,
"hits" : [
{
"_index" : "restaurant",
"_type" : "_doc",
"_id" : "3",
"_score" : 21.11436,
"_source" : {
"name" : "Good Restaurant",
"menu" : [
{
"name" : "chicken",
"count" : 20
},
{
"name" : "cake",
"count" : 13
},
{
"name" : "rice",
"count" : 5
}
]
}
},
{
"_index" : "restaurant",
"_type" : "_doc",
"_id" : "2",
"_score" : 10.133532,
"_source" : {
"name" : "TEST Restaurant",
"menu" : [
{
"name" : "chicken",
"count" : 10
},
{
"name" : "cake",
"count" : 7
},
{
"name" : "rice",
"count" : 2
}
]
}
},
{
"_index" : "restaurant",
"_type" : "_doc",
"_id" : "1",
"_score" : 3.1335313,
"_source" : {
"name" : "ABC Restaurant",
"menu" : [
{
"name" : "chicken",
"count" : 3
},
{
"name" : "cake",
"count" : 5
}
]
}
}
]
}
}
Thank you very much for your answer :)
You can use function_score to give higher score to nested documents based on count value.
Query:
{
"query": {
"nested": {
"path": "menu",
"query": {
"function_score": {
"score_mode": "sum",
"boost_mode": "replace",
"query": {
"match": {
"menu.name": "chicken"
}
},
"functions": [
{
"field_value_factor": {
"field": "menu.count"
}
}
]
}
}
}
}
}
Result:
"hits" : [
{
"_index" : "index63",
"_type" : "_doc",
"_id" : "tA8IPHIBzLrvZDnz-ghE",
"_score" : 23.0,
"_source" : {
"name" : "ABC restaurant",
"menu" : [
{
"name" : "chicken",
"count" : 23
},
{
"name" : "rice",
"count" : 10
}
]
}
},
{
"_index" : "index63",
"_type" : "_doc",
"_id" : "tQ8JPHIBzLrvZDnz-AiA",
"_score" : 20.0,
"_source" : {
"name" : "XYZ restaurant",
"menu" : [
{
"name" : "chicken",
"count" : 20
},
{
"name" : "rice",
"count" : 8
}
]
}
}
]
Edit1:
For nested fields you need to use nested query, you cannot run search on these fields directly.
{
"query": {
"bool": {
"should": [
{
"match": {
"name": {
"operator": "and",
"query": "chicken"
}
}
},
{
"nested": {
"path": "menu",
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"match": {
"menu.name": {
"query": "chicken",
"operator": "and"
}
}
}
]
}
},
"boost_mode": "replace",
"functions": [
{
"field_value_factor": {
"field": "menu.count",
"missing": 0
}
}
]
}
}
}
}
]
}
}
}
Edit2: To consider score only from nested query , you can either give it higher boost so that documents matching your nested score are scored higher. If you don't want your multi-match to have any score. You can place it in constant_score with 0 boost, documents matching this will have 0 score
{
"query": {
"bool": {
"should": [
{
"constant_score": {
"filter": {
"multi_match": {
"query": "Good Restaurant chicken",
"type": "cross_fields",
"fields": [
"name"
]
}
},
"boost": 0
}
},
{
"nested": {
"path": "menu",
"query": {
"function_score": {
"query": {
"bool": {
"should": [
{
"match": {
"menu.name": {
"query": "Good Restaurant chicken",
"operator": "or"
}
}
}
]
}
},
"boost_mode": "replace",
"functions": [
{
"field_value_factor": {
"field": "menu.count",
"missing": 0
}
}
]
}
}
}
}
]
}
}
}

Resources