I am tryin to boost certain keywords in my mutlimatch query that are more important than other words
data set ['black kurta','blue kurta','green kurta','black pant' ]
eg( search for "black kurta" )
first should be black kurta then 'blue kurta','green kurta' and at last kurta
{
"query": {
"multi_match" : {
"query": "Black kurta",
"type": "best_fields",
"fields": [ "name^3","meta_title^3","meta_description","short_description","meta_keyword^3","description^1" ],
"tie_breaker": 0.3
}
}
}
Try this, notice the boost section.
"query": {
"bool": {
"should": [
{
"multi_match": {
"query": "black kurta",
"fields": [
"name",
"meta_title",
"meta_description",
"short_description", ...
],
"type": "phrase",
"boost": 10
}
},
{
"multi_match": {
"query": "blue kurta",
"fields": [
"name",
"meta_title",
"meta_description",
"short_description",
...
],
"operator": "and",
"boost": 4
}
},
{
"multi_match": {
"query": "green kurta",
"fields": [
"name",
"meta_title",
"meta_description",
"short_description",
...
],
"operator": "and",
"boost": 2
}
}
]
}
}
Related
I have the query in below format and it runs in ES 2.4
{"query":{"function_score":{"filter":{"bool":{"must":[{"exists":{"field":"x"}},{"query_string":{"query":"en","fields":["locale"]}},{"query_string":{"query":"US","fields":["channel"]}},{"query_string":{"query":"UG","fields":["usergroups"]}}]}},"query":{"bool":{"should":{"multi_match":{"query":"refund","fields":["doc","key","title","title.standard_analyzed^3","x"],"type":"phrase","slop":20}},"must":{"multi_match":{"fuzziness":"0","query":"refund","prefix_length":"6","fields":["doc","key","title","title.standard_analyzed^3","x"],"max_expansions":"30"}}}},"functions":[{"field_value_factor":{"field":"usage","factor":1,"modifier":"log2p","missing":1}}]}},"from":0,"size":21}
But when I try the same query in 6.8 it returns errors
{"error":{"root_cause":[{"type":"parsing_exception","reason":"no [query] registered for [function_score]",
If I put filters inside query, I get the response but the order of the docs don't match due to the difference in score
There should only be the "query" key below the function score. You have to add the filter in the bool query.
I don't know about your mapping but I would use the "Term" query instead of the query string.
{
"query": {
"function_score": {
"query": {
"bool": {
"filter": {
"bool": {
"must": [
{
"exists": {
"field": "x"
}
},
{
"query_string": {
"query": "en",
"fields": [
"locale"
]
}
},
{
"query_string": {
"query": "US",
"fields": [
"channel"
]
}
},
{
"query_string": {
"query": "UG",
"fields": [
"usergroups"
]
}
}
]
}
},
"should": {
"multi_match": {
"query": "refund",
"fields": [
"doc",
"key",
"title",
"title.standard_analyzed^3",
"x"
],
"type": "phrase",
"slop": 20
}
},
"must": {
"multi_match": {
"fuzziness": "0",
"query": "refund",
"prefix_length": "6",
"fields": [
"doc",
"key",
"title",
"title.standard_analyzed^3",
"x"
],
"max_expansions": "30"
}
}
}
},
"functions": [
{
"field_value_factor": {
"field": "usage",
"factor": 1,
"modifier": "log2p",
"missing": 1
}
}
]
}
},
"from": 0,
"size": 21
}
About FunctionScore (doc 6.8)
I am using the following query:
{
"_source": [
"title",
"bench",
"id_",
"court",
"date",
"content"
],
"size": 15,
"from": 0,
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "london",
"fields": [
"title",
"content"
]
}
}
],
"should": [
{
"multi_match": {
"query": "London is a beautiful city and has a lot of amazing landmarks. I love the Thames!",
"fields": [
"title",
"content^2"
],
"operator": "or"
}
}
]
}
},
"highlight": {
"pre_tags": [
"<tag1>"
],
"post_tags": [
"</tag1>"
],
"fields": {
"content": {}
},
"number_of_fragments": 5,
"fragment_size": 300
}
}
The rational of the query is that the word London must be present while those in the should query should just boost the score. What I would like to do is that within the should query, I would like to boost the phrase beautiful city and the word Thames. How do I do it?
PS: Content and Title are standard text fields with no analyzers applied on them.
Regards
You can add multiple clauses in should query
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "london",
"fields": [
"title",
"content"
]
}
}
],
"should": [
{
"multi_match": {
"query": "beautiful city",
"fields": [
"title",
"content^2"
],
"type": "phrase"
}
},
{
"multi_match": {
"query": "Thames",
"fields": [
"title",
"content^2"
]
}
}
]
}
}
}
The below search query is taking around 2356 ms to fetch 50 records.
Fuzziness leads to slower search. How can I improve performance using Fuzziness?
(Highlight cannot be skipped)
{
"from": 0,
"size": 50,
"query": {
"bool": {
"must": {
"multi_match": {
"query": "shall have the right",
"fields": [
"subType",
"title",
"type",
"content"
],
"fuzziness": "AUTO",
"minimum_should_match": "80%"
}
},
"should": {
"multi_match": {
"query": "shall have the right",
"fields": [
"subType",
"title",
"type",
"content"
],
"type": "phrase",
"slop": 1
}
}
}
},
"aggregations": {
"agg_example": {
"terms": {
"field": "type.keyword"
}
}
},
"highlight": {
"type": "unified",
"fields": {
"*": {}
}
}
}
In Elasticsearch 5.0, the Indices Query has been marked as deprecated.
The documentation tells me to "Search on the _index field instead", but it is not obvious to me how to do this. How can I change an example query like this to the new method(s)?
GET /_search
{
"query": {
"bool": {
"minimum_number_should_match": 1,
"should": [
{"indices": {
"indices": ["index1"],
"no_match_query": "none",
"query": {
"bool": {"must": [
{"multi_match": {"fields": ["field1", "field2"], "operator": "or", "query": "foobar", "type": "boolean", "use_dis_max": true}},
{"multi_match": {"fields": ["field1", "field2"], "operator": "or", "query": "xuul", "type": "boolean", "use_dis_max": true}}
]}}
}},
{"indices": {
"indices": ["index2", "index3"],
"no_match_query": "none",
"query": {"bool": {"must": [
{"multi_match": {"fields": ["field1", "field2"], "operator": "or", "query": "foobar", "type": "boolean", "use_dis_max": true}}
]}}}}
]}
}
}
You could try like this:
{
"bool": {
"minimum_number_should_match": 1,
"should": [
{
"bool": {
"filter": [
{
"terms": {
"_index": ["index1"]
}
},
{
"bool": {
"must": [
{
"multi_match": {
"fields": [
"field1",
"field2"
],
"operator": "or",
"query": "foobar",
"type": "boolean",
"use_dis_max": true
}
},
{
"multi_match": {
"fields": [
"field1",
"field2"
],
"operator": "or",
"query": "xuul",
"type": "boolean",
"use_dis_max": true
}
}
]
}
}
]
}
},
{
"bool": {
"filter": [
{
"terms": {
"_index": [
"index2",
"index3"
]
}
},
{
"bool": {
"must": [
{
"multi_match": {
"fields": [
"field1",
"field2"
],
"operator": "or",
"query": "foobar",
"type": "boolean",
"use_dis_max": true
}
}
]
}
}
]
}
}
]
}
}
I have an index set up for all my documents:
{
"mappings" {
"book" {
"_source": { "enabled": true },
"properties": [
"title": { "type": "string", "analyzer": "standard", "search_analyzer": "standard" },
"description": { "type": "string", "analyzer": "standard", "search_analyzer": "standard" },
"author": { "type": "string", "analyzer": "standard", "search_analyzer": "standard" }
]
}
}
}
I push this through into an index called "library".
What I want to do is execute a search with the following requirements. Assuming the user entered something like "big yellow shovel"
Execute a search of user entered keywords in three ways:
As is as a whole phrase: "simple yellow shovel"
As a set of AND keywords: "simple+yellow+shovel"
As a set of OR keywords: "simple|yellow|shovel"
Ensure that the keyword sets executed in order of priority (boosted?):
Full text first
AND'd second
OR'd third
Using a simple query works find for a single search:
{
"query": {
"simple_query_string": {
"query": "\"simple yellow shovel\""
}
}
}
How do I execute the multiple search with boosting?
Or should I be using something like a "match" query on the indexed fields?
I am not sure if I got this one correct. I have assumed priority order of
author>title>description
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"multi_match": {
"query": "simple yellow shovel",
"fields": [
"author^7",
"title^3",
"description"
],
"type": "phrase",
"boost": 10
}
}
]
}
},
{
"bool": {
"must": [
{
"multi_match": {
"query": "simple",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 5
}
},
{
"multi_match": {
"query": "yellow",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 5
}
},
{
"multi_match": {
"query": "shovel",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 5
}
}
]
}
},
{
"multi_match": {
"query": "simple",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 2
}
},
{
"multi_match": {
"query": "yellow",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 2
}
},
{
"multi_match": {
"query": "shovel",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 2
}
}
]
}
}
}
could anyone please verify this? You could refer to Boost Query link for more info. Is this what you are looking for?
I hope this helps!
EDIT : Rewritten with dis_max
{
"query": {
"bool": {
"should": [
{
"dis_max": {
"tie_breaker": 0.7,
"queries": [
{
"bool": {
"must": [
{
"multi_match": {
"query": "simple yellow shovel",
"fields": [
"author^7",
"title^3",
"description"
],
"type": "phrase",
"boost": 10
}
}
]
}
},
{
"bool": {
"must": [
{
"dis_max": {
"tie_breaker": 0.7,
"queries": [
{
"multi_match": {
"query": "simple",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 5
}
},
{
"multi_match": {
"query": "yellow",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 5
}
},
{
"multi_match": {
"query": "shovel",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 5
}
}
]
}
}
]
}
},
{
"multi_match": {
"query": "simple",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 2
}
},
{
"multi_match": {
"query": "yellow",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 2
}
},
{
"multi_match": {
"query": "shovel",
"fields": [
"author^7",
"title^3",
"description"
],
"boost": 2
}
}
]
}
}
]
}
}
}
This seems to give me much better results atleast on my dataset. This is a great source to understand dismax
Please play a lot with this and see if you are getting expected results.
Use the help of Explain API.
I've rewritten this using Dis Max Query. Keep in mind that you could try different types to get better results. See these:
best_fields
most_fields
cross_fields
Query:
POST /your_index/your_type/_search
{
"query": {
"dis_max": {
"tie_breaker": 0.7,
"boost": 1.2,
"queries": [
{
"multi_match": {
"query": "simple yellow showel",
"type": "phrase",
"boost": 3,
"fields": [
"title^3",
"author^2",
"description"
]
}
},
{
"multi_match": {
"query": "simple yellow showel",
"operator": "and",
"boost": 2,
"fields": [
"title^3",
"author^2",
"description"
]
}
},
{
"multi_match": {
"query": "simple yellow showel",
"fields": [
"title^3",
"author^2",
"description"
]
}
}
]
}
}
}
Dis Max query will pick document, which scored most from all three queries. And we give additional boost for "type": "phrase" and "operator": "and", while we leave last query untouched.