ElasticSearch - Use function score query field value factor with match query - elasticsearch

I want to use the example on official documents combined with normal match and boolean queries. How to do that?
GET /_search
{
"query": {
"function_score": {
"field_value_factor": {
"field": "likes",
"factor": 1.2,
"modifier": "sqrt",
"missing": 1
}
}
}
}
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-function-score-query.html#function-field-value-factor
Match query:
"query": {
"match" : {
"name" : "star wars"
}
}
Boolean query:
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "Star Wars"
}
}
],
"should": [
{
"term": {
"name.keyword": {
"value": "Star Wars"
}
}
}
]
}
}
}

Yes, this should be doable. If you read further down in the documentation that you linked to, there is an example:
GET /_search
{
"query": {
"function_score": {
"functions": [
{
"gauss": {
"price": {
"origin": "0",
"scale": "20"
}
}
},
{
"gauss": {
"location": {
"origin": "11, 12",
"scale": "2km"
}
}
}
],
"query": {
"match": {
"properties": "balcony"
}
},
"score_mode": "multiply"
}
}
}
Modifying that slightly for your use case should look something like this:
GET /_search
{
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field": "likes",
"factor": 1.2,
"modifier": "sqrt",
"missing": 1
}
}
],
"query": {
"match": {
"name": "Star Wars"
}
},
"score_mode": "multiply"
}
}
}
Disclaimer: I haven't tested this, just going off of the documentation.

Related

Minimum should match with filter doesn't return any result

I have a complicated query which works fine.the proble is that I'm going to add a condition(filter) to it to filter the result.I need the exact result that I currently get with filtering based on the field called "field7".
"query": {
"bool": {
"should": [
{
"match_bool_prefix": {
"field1": {
"query": "test",
"fuzziness": "auto",
"boost": 1
}
}
},
{
"match": {
"field2": {
"query": "test",
"boost": 10
}
}
},
{
"exists": {
"field": "field3",
"boost": 15
}
},
{
"exists": {
"field": "field4",
"boost": 10
}
},
{
"match_phrase_prefix": {
"field5": {
"query": ""
}
}
}
],
"must": [
{
"bool": {
"filter": [
{
"match": {
"field6": "A"
}
},
{"terms": { "field7": [3,4,5]}}
]
}
}
],
"minimum_should_match": 3
}
},
"size": 20
I have to use "minimum_should_match": 3,to meet my requirements(If i remove it I get unrelated results) but when i use it with filter the result gets notthing.Is there any suggestion how to get current result and filter it based on field7?
#Paris I believe you can use filter term query for field7 since you want to apply filter on the result-set from should+must query. So basically this should suffice:
"query": {
"bool": {
"should": [
{
"match_bool_prefix": {
"field1": {
"query": "test",
"fuzziness": "auto",
"boost": 1
}
}
},
{
"match": {
"field2": {
"query": "test",
"boost": 10
}
}
},
{
"exists": {
"field": "field3",
"boost": 15
}
},
{
"exists": {
"field": "field4",
"boost": 10
}
},
{
"match_phrase_prefix": {
"field5": {
"query": ""
}
}
}
],
"must": {
{"match": {"field6": "A"}}
},
"filter": {
{"term" : {"field7" : 3}},
{"term" : {"field7" : 4}},
{"term" : {"field7" : 5}},
}
}
},
"size": 20

searching elastic search filtering results with and and or conditions

I have an index with the following documents:
{
"first_name": "f1",
"last_name": "l1",
"location": "SF",
"vehicle": {
"type": "car",
"color": "red"
}
}
{
"first_name": "f2",
"last_name": "l2",
"location": "SF",
"vehicle": {
"type": "motorcycle",
"color": "blue"
}
}
{
"first_name": "f3",
"last_name": "l3",
"location": "SF",
"vehicle": {
"type": "bicycle",
"color": "green"
}
}
{
"first_name": "f4",
"last_name": "l4",
"location": "CA",
"vehicle": {
"type": "motorcycle",
"color": "green"
}
}
{
"first_name": "f5",
"last_name": "l5",
"location": "SF"
}
The vehicle document is a nested type.
I would like to filter results:
SELECT WHERE location=SF AND (vehicle.type=car OR
vehicle.type=airplane OR not-exists(vehicle.type)
I could not find a way to do it.
Is there a way to execute such a filter on Elasticsearch?
Thank you.
This query might be helpful.
POST IndexName/Type/_search
{
"query": {
"bool": {
"must": [
{
"term": {
"location": {
"value": "sf"
}
}
},
{
"bool": {
"should": [
{
"terms": {
"vehicle.type": [
"car",
"airplane"
]
}
},
{
"bool": {
"must_not": {
"exists": {
"field": "vehicle.type"
}
}
}
}
]
}
}
]
}
}
}
If you want to search location by case sensitive then use below query. I have used location.keyword for match case sensitive.
POST IndexName/Type/_search
{
"query": {
"bool": {
"must": [
{
"term": {
"location.keyword": {
"value": "SF"
}
}
},
{
"bool": {
"should": [
{
"terms": {
"vehicle.type": [
"car",
"airplane"
]
}
},
{
"bool": {
"must_not": {
"exists": {
"field": "vehicle.type"
}
}
}
}
]
}
}
]
}
}
}
I found a way to do it, it is a bit complex but:
GET indextests/_search
{
"query": {
"bool": {
"filter": [
{
"term": {
"location": "SF"
}
},
{
"bool": {
"should": [
{
"nested": {
"path": "vehicle",
"query": {
"terms": {
"vehicle.type": ["car", "bicycle"]
}
}
}
},
{
"bool": {
"must_not": [
{
"nested": {
"path": "vehicle",
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"exists": {
"field": "vehicle.type"
}
}
]
}
}
]
}
}
}
}
]
}
}
]
}
}
]
}
}
}

Filter not working for weighted search

I am pretty new to elasticsearch and have not really got the hold of it. So I have a search, the results of which will be weighed according to the weight of their tags, which works absolutely fine, but later when I introduced a filter, the search always gives me empty results. Here is what I have tried:
{
"nested": {
"path": "tags",
"score_mode": "sum",
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"match_phrase_prefix": {
"tags.tag": "big"
}
}
],
"filter": {
"term": {
"type.primary": "audio"
}
}
}
},
"field_value_factor": {
"field": "tags.weight"
},
"boost_mode": "multiply",
"boost": 10
}
}
}
}
The example result with the filter should be something like this:
{
"_index": "assets",
"_type": "Asset",
"_id": "5a1dc3c0848662ee49e36f43s",
"_score": 886.8744,
"_source": {
"name": "And Action Breakbeat",
"meta_data": {
"type": "audio/mp3",
"file_name": "music_zapsplat_and_action_breakbeat.mp3"
},
"file_key": "music_zapsplat_and_action_breakbeat.mp3",
"src": {
"url": "https://exapmle.com/music_zapsplat_and_action_breakbeat.mp3"
},
"type": {
"primary": "AUDIO",
"secondary": "mp3"
},
"thumbnail_url": "https://example.com/thumbnail/audio.jpg",
"tags": [
{
"tag": "big",
"weight": 10
},
{
"tag": "beat",
"weight": 5
},
{
"tag": "music",
"weight": 3.3333333333333335
}
],
"isDeleted": false,
}
}
Thank you!
You cannot match type.primary inside a nested query for tags. Try this query instead:
{
"query": {
"bool": {
"filter": {
"term": {
"type.primary": "audio"
}
},
"must": [
{
"nested": {
"path": "tags",
"query": {
"function_score": {
"query": {
"match_phrase_prefix": {
"tags.tag": "big"
}
},
"field_value_factor": {
"field": "tags.weight"
},
"score_mode": "sum",
"boost_mode": "multiply",
"boost": 10
}
}
}
}
]
}
}
}

elasticsearch scoring muliple using nested function_scores

I'm trying to find the right approach for nested scoring functions.
DATA:
PUT test
PUT test/test/_mapping
{
"properties": {
"driver_id": {
"type": "integer"
},
"driver_name": {
"type": "string"
},
"cities": {
"type": "nested",
"properties": {
"city_id": {
"type": "integer"
},
"used": {
"type": "float"
}
}
},
"cars": {
"type": "nested",
"properties": {
"car_id": {
"type": "integer"
},
"used": {
"type": "float"
}
}
}
}
}
PUT test/test/1
{
"id":1,
"driver_name":"Lady Smith",
"cars":[{"car_id":1,"brand":"Ford Focus","used":0.5},{"car_id":2,"brand":"Toyota Corola","used":0.5}],
"city":[{"city_id":3,"name":"Tel Aviv","used":0.8},{"city_id":4,"name":"New York","used":0.2}]
}
PUT test/test/2
{
"id":2,
"driver_name":"John Smith",
"cars":[{"car_id":1,"brand":"Ford Focus","used":0.3},{"car_id":2,"brand":"Toyota Corola","used":0.3}],
"city":[{"city_id":3,"name":"Tel Aviv","used":0.8},{"city_id":4,"name":"New York","used":0.2}]
}
PUT test/test/3
{
"id":3,
"driver_name":"Will Smith",
"cars":[{"car_id":1,"brand":"Ford Focus","used":0.1}],
"city":[{"city_id":3,"name":"New York","used":0.2}]
}
PUT test/test/4
{
"id":4,
"driver_name":"Ash Smith",
"cars":[],
"city":[]
}
To put it simply, given the data, I would like to get the best fit to the query of the driver that drives a Ford AND a Corolla in Tel Aviv.
Or, translated loosly to SQL:
SELECT driver_id,
cr.cars_score * ct.city_score AS driver_score
FROM drivers drv
LEFT JOIN (SELECT sum(used) / 2 as cars_score
FROM car_usage
WHERE car_id IN (1,2) GROUP BY driver_id) AS cr
ON (cr.driver_id = drv.driver_id)
LEFT JOIN (SELECT sum(used) / 1 as city_score
FROM city_usage
WHERE city_id IN (3) GROUP BY driver_id) AS ct
ON (ct.driver_id = drv.driver_id)
Tried the following:
{
"query": {
"bool": {
"disable_coord": true,
"must": [
{
"query": {
"bool": {
"disable_coord": true,
"must": [{
"function_score": {
"query": {
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 1
}
},
"boost_mode": "replace",
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
}
}
}, {
"function_score": {
"query": {
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 2
}
},
"boost_mode": "replace",
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
}
}
}
]
}
}
},
{
"function_score": {
"query": {
"nested": {
"path": "cities",
"query": {
"function_score": {
"filter": {
"term": {
"cities.city_id": 3
}
},
"score_mode": "multiply",
"boost_mode": "replace",
"functions": [
{
"field_value_factor": {
"field": "cities.used",
"missing": 0
}
}
]
}
}
}
}
}
}
]
}
}
}
which gave me weird results.
then tried:
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 1
}
},
"score_mode": "sum",
"boost_mode":"replace",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 2
}
},
"score_mode": "sum",
"boost_mode":"replace",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor":0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cities",
"query": {
"function_score": {
"filter": {
"term": {
"cities.city_id": 3
}
},
"score_mode":"multiply",
"boost_mode":"replace",
"functions": [
{
"field_value_factor": {
"field": "cities.used",
"missing": 0
}
}
]
}
}
}
}
]
}
}
}
which was closer, but seemed to just sum all scores.
A friend of mine suggested flatting the entire JSON, and losing the nested objects (making them properties) but I'm unsure as to whether that will make it easier to query the data.
UPDATE 1
another failed attempt :
{
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 1
}
},
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor": 0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cars",
"query": {
"function_score": {
"filter": {
"term": {
"cars.car_id": 2
}
},
"score_mode": "sum",
"functions": [
{
"field_value_factor": {
"field": "cars.used",
"factor": 0.5,
"missing": 0
}
}
]
}
}
}
},
{
"nested": {
"path": "cities",
"query": {
"function_score": {
"filter": {
"term": {
"cities.city_id": 3
}
},
"score_mode": "multiply",
"functions": [
{
"field_value_factor": {
"field": "cities.used",
"missing": 0
}
}
]
}
}
}
}
]
}
},
"score_mode": "multiply"
}
}
}
UPDATE 2
Following my alternative method of flattening the fields and losing the nested filters, I ended with the following:
PUT test2
PUT test2/test2/1
{
"id":1,
"driver_name":"Lady Smith",
"cars_1":{"brand":"Ford Focus","used":0.5},
"cars_2":{"brand":"Toyota Corola","used":0.5},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/2
{
"id":2,
"driver_name":"John Smith",
"cars_1":{"brand":"Ford Focus","used":0.3},
"cars_2":{"brand":"Toyota Corola","used":0.3},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/3
{
"id":3,
"driver_name":"Will Smith",
"cars_1":{"brand":"Ford Focus","used":0.1},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/4
{
"id":4,
"driver_name":"Ash Smith",
}
post test2/_search
{
"query": {
"bool": {
"disable_coord": true,
"must": [
{
"match": {
"name": "red pepper"
}
}
],
"should": [
{
"nested": {
"path": "words",
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field" : "words.weight",
"missing": 0
}
}
],
"query": {
"match": {
"words.text": "red pepper"
}
},
"score_mode": "sum",
"boost_mode": "replace"
}
},
"score_mode": "total"
}
}
]
}
}
}
GET test2/_search
{
"query": {
"function_score": {
"query":{
"bool":{
"must":[{
"exists":{"field":"cars_1"}
},{
"exists":{"field":"cars_2"}
},{
"exists":{"field":"cities_3"}
}]
}
},
"score_mode": "multiply",
"boost_mode": "replace",
"functions": [{
"script_score": {
"script": {
"inline": "(doc['cars_1.used'].value + doc['cars_2.used'].value) / 2 * doc['cities_3.used'].value"
}
}
}]
}
}
}
But I am not sure of the performance hit of the inline script. feels like I'm missing a simpler solution.
Just for future reference of people who visit this post,
I ended up changing my data model, and using script_score (lang:"painless" in 5.0.0)
Warning: this method, although did fit my need, had an impact on performance, eyeballing estimate would be a hit of around 3-5 fold slower response time.
For now, it's good enough for me.
PUT test2
PUT test2/test2/1
{
"id":1,
"driver_name":"Lady Smith",
"cars_1":{"brand":"Ford Focus","used":0.5},
"cars_2":{"brand":"Toyota Corola","used":0.5},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/2
{
"id":2,
"driver_name":"John Smith",
"cars_1":{"brand":"Ford Focus","used":0.3},
"cars_2":{"brand":"Toyota Corola","used":0.3},
"cities_3":{"name":"Tel Aviv","used":0.8},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/3
{
"id":3,
"driver_name":"Will Smith",
"cars_1":{"brand":"Ford Focus","used":0.1},
"cities_4":{"name":"New York","used":0.2}
}
PUT test2/test2/4
{
"id":4,
"driver_name":"Ash Smith",
}
post test2/_search
{
"query": {
"bool": {
"disable_coord": true,
"must": [
{
"match": {
"name": "red pepper"
}
}
],
"should": [
{
"nested": {
"path": "words",
"query": {
"function_score": {
"functions": [
{
"field_value_factor": {
"field" : "words.weight",
"missing": 0
}
}
],
"query": {
"match": {
"words.text": "red pepper"
}
},
"score_mode": "sum",
"boost_mode": "replace"
}
},
"score_mode": "total"
}
}
]
}
}
}
GET test2/_search
{
"query": {
"function_score": {
"query":{
"bool":{
"must":[{
"exists":{"field":"cars_1"}
},{
"exists":{"field":"cars_2"}
},{
"exists":{"field":"cities_3"}
}]
}
},
"score_mode": "multiply",
"boost_mode": "replace",
"functions": [{
"script_score": {
"script": {
"inline": "(doc['cars_1.used'].value + doc['cars_2.used'].value) / 2 * doc['cities_3.used'].value"
}
}
}]
}
}
}

ElasticSearch Function Score Query

Following is my function_score query. I want to give additional score to documents where the product quality is better.
But _score in the search response is always 0. What am I missing? Thx.
When I remove bool query and replace it with just a term filter, the score is non zero. I am guessing it is about the query bool but can not figure out why.
Elasticsearch version is 2.4
{
"from": 0,
"size": 20,
"query": {
"function_score": {
"query": {
"bool": {
"filter": [
{
"bool": {
"should": {
"terms": {
"categories.category1Id": [
63
]
}
}
}
}
]
}
},
"functions": [
{
"gauss": {
"updatedDate": {
"origin": "2016-10-03 05:10:18",
"scale": "0.5h",
"decay": 0.1,
"offset": "1h"
}
}
},
{
"filter": {
"term": {
"productQuality": "EXCELLENT"
}
},
"weight": 7
},
{
"filter": {
"term": {
"productQuality": "HIGH"
}
},
"weight": 5
},
{
"filter": {
"term": {
"productQuality": "MEDIUM"
}
},
"weight": 3
},
{
"filter": {
"term": {
"productQuality": "LOW"
}
},
"weight": 1
}
],
"score_mode": "sum"
}
}
}
As what #Val said.
bool.filter assigns a score of 0 to all documents, as no scoring query has been specified (link).
If you need the score, you can add "must": {"match_all": {}} in your query. match_all will assign 1.0 to all documents (link).
Here is your query with match_all:
{
"from": 0,
"size": 20,
"query": {
"function_score": {
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": [
{
"bool": {
"should": {
"terms": {
"categories.category1Id": [
63
]
}
}
}
}
]
}
},
"functions": [
{
"gauss": {
"updatedDate": {
"origin": "2016-10-03 05:10:18",
"scale": "0.5h",
"decay": 0.1,
"offset": "1h"
}
}
},
{
"filter": {
"term": {
"productQuality": "EXCELLENT"
}
},
"weight": 7
},
{
"filter": {
"term": {
"productQuality": "HIGH"
}
},
"weight": 5
},
{
"filter": {
"term": {
"productQuality": "MEDIUM"
}
},
"weight": 3
},
{
"filter": {
"term": {
"productQuality": "LOW"
}
},
"weight": 1
}
],
"score_mode": "sum"
}
}
}

Resources