Elasticsearch function score query stuck at zero score - elasticsearch

I have a query which I've simplified down to this:
GET /foos-33/_search
{
"from" : 0,
"size" : 25,
"query" : {
"function_score" : {
"query" : {
"bool" : {
"filter" : {
"bool" : {
"must" : [ {
"bool" : {
"must_not" : {
"terms" : {
"foo.id" : [ ]
}
}
}
} ]
}
}
}
},
"functions" : [ {
"field_value_factor" : {
"field" : "foo.strategicBoost",
"missing" : 1.0
}
} ],
"score_mode" : "sum"
}
},
"explain" : true,
"sort" : [ {
"counts.barsPerDay" : {
"order" : "desc"
}
} ]
}
The scores of the hits are always zero. The explain output sort of shows why this is happening, but I don't completely understand what's going on:
"_explanation": {
"value": 0,
"description": "function score, product of:",
"details": [
{
"value": 0,
"description": "ConstantScore(-() +*:*), product of:",
"details": [
{
"value": 0,
"description": "boost",
"details": []
},
{
"value": 1,
"description": "queryNorm",
"details": []
}
]
},
{
"value": 10,
"description": "min of:",
"details": [
{
"value": 10,
"description": "field value function: none(doc['foo.strategicBoost'].value?:1.0 * factor=1.0)",
"details": []
},
{
"value": 3.4028235e+38,
"description": "maxBoost",
"details": []
}
]
}
]
}
},
I tried to wrap it in a constant_score to change the constant score from 0 to 1, like this:
GET /foos-33/_search
{
"from" : 0,
"size" : 25,
"query" : {
"function_score" : {
"query" : {
"bool" : {
"constant_score": {
"boost": 1,
"filter" : {
"bool" : {
"must" : [ {
"bool" : {
"must_not" : {
"terms" : {
"foo.id" : [ ]
}
}
}
} ]
}
}
}
}
},
"functions" : [ {
"field_value_factor" : {
"field" : "foo.strategicBoost",
"missing" : 1.0
}
} ],
"score_mode" : "sum"
}
},
"explain" : true,
"sort" : [ {
"counts.barsPerDay" : {
"order" : "desc"
}
} ]
}
but that gave me an error message:
"failed_shards": [
{
"shard": 0,
"index": "foos-33",
"node": "A9s2Ui3mQE2SBZhY2VkZGw",
"reason": {
"type": "query_parsing_exception",
"reason": "[bool] query does not support [constant_score]",
"index": "foos-33",
"line": 8,
"col": 29
}
}
]
There is another way I could try to solve this problem - I could try to change the product to a sum or something - but I can't figure out where the product is coming from.

The top-level "product of" comes from the boost_mode, which defaults to multiply. Setting boost_mode to replace is the right fix in this case - the query score is always zero, so we don't care about it. Setting boost_mode to sum would be an equally valid fix in this case, too.

Related

"match-boolean-query doesn't return the "exact match"

I'm using "match-Boolean-prefix query but I can't get the exact match of the query.I can't use prefix queries because I also need "not exact match" results and I also need the fuzziness and word completion.I get every thing I need by match-boo-prefix query(the fuzziness not work that good though) but my problem is when I'm looking for exact match like "apple" it shows everything that includes "apple" I need the exact match gets higher ranking than others.
GET /_search
{
"query": {
"bool": {
"must": [
{
"match_bool_prefix": {
"name": {
"query": "apple",
"fuzziness": "auto"
}
}
},
{
"bool": {
"must_not": [
{
"match": {
"type": "3"
}
},
{
"match": {
"type": "4"
}
}
]
}
},
{
"match": {
"status": "A"
}
}
],
"should": [
{
"exists": {
"field": "",
"boost": 10
}
}
]
}
},
"indices_boost": [
{
"index1": 3
},
{
"index2": 1.3
},
{
"index3": 1.5
}
],
"size": 20
}
the result I'm getting with this query is :
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 20,
"successful" : 20,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4970,
"relation" : "eq"
},
"max_score" : 14.451834,
"hits" : [
{
"_index" : "index",
"_id" : "11434",
"_score" : 14.451834,
"_source" : {
"name" : "Apple Slices With Peanut Butter".
is there any solution for this?

Elasticsearch - Missing Field Value For Nested Field - Function Score Query | v7.10.2

I have already posted this on the ES group but I got no response and so posted it on SO. Link https://discuss.elastic.co/t/missing-field-value-for-nested-field-function-score-query-v7-10-2/291365
I have been trying for a long time now but the nested field value is always saying missing field value while calculating the score.
Mapping:
{
"doctor_idx" : {
"mappings" : {
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"service" : {
"type" : "nested",
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"serviceTypeEarliestAvailability" : {
"type" : "nested",
"properties" : {
"_class" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"earliestAvailableDateTimeByType" : {
"type" : "date",
"format" : "date_hour_minute_second"
},
"serviceType" : {
"type" : "text"
},
"servicesMedium" : {
"type" : "keyword"
}
}
}
}
}
}
}
}
}
For simplicity, I have kept just one record. A glimpse of the record
"serviceTypeEarliestAvailability" : [
{
"serviceType" : "Service Type",
"earliestAvailableDateTimeByType" : "2021-12-09T19:39:16",
"servicesMedium" : [
"MED1",
"MED2",
"MED3",
"MED4"
]
}
],
The following query gives: "A document doesn't have a value for a field! Use doc[].size()==0 to check if a document is missing a field!"
I tried using field_value_factor instead of script_score but it's the same problem and it complains about the missing field value.
Query 1: with script_score
GET /doctor_idx/_search
{
"explain": true,
"query": {
"nested": {
"path": "service",
"query": {
"nested": {
"score_mode": "max",
"path": "service.serviceTypeEarliestAvailability",
"query": {
"function_score": {
"query": {
"match_all": {
"boost": 1
}
},
"functions": [
{
"filter": {
"match": {
"service.serviceTypeEarliestAvailability.serviceType": "type" // no complaints about this
}
},
"weight": 10
},
{
"script_score": {
"script": {
"source": "(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())"
}
}
}
]
}
}
}
}
}
}
}
Query 2 : With field_value_factor
GET /doctor_idx/_search
{
"explain": true,
"query": {
"nested": {
"path": "service",
"query": {
"nested": {
"score_mode": "max",
"path": "service.serviceTypeEarliestAvailability",
"query": {
"function_score": {
"query": {
"match_all": {
"boost": 1
}
},
"functions": [
{
"filter": {
"match": {
"service.serviceTypeEarliestAvailability.serviceType": "type"
}
},
"weight": 10
},
{
"field_value_factor": {
"field": "service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType"
}
}
]
}
}
}
}
}
}
}
Error From ES for Query 1:
{
"error" : {
"root_cause" : [
{
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.get(ScriptDocValues.java:160)",
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.getValue(ScriptDocValues.java:154)",
"(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
" ^---- HERE"
],
"script" : "(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
"lang" : "painless",
"position" : {
"offset" : 79,
"start" : 0,
"end" : 98
}
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "doctor_idx",
"node" : "mvh5k24dQPqM-d7JVeNomQ",
"reason" : {
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.get(ScriptDocValues.java:160)",
"org.elasticsearch.index.fielddata.ScriptDocValues$Dates.getValue(ScriptDocValues.java:154)",
"(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
" ^---- HERE"
],
"script" : "(doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value.getMillis())",
"lang" : "painless",
"position" : {
"offset" : 79,
"start" : 0,
"end" : 98
},
"caused_by" : {
"type" : "illegal_state_exception",
"reason" : "A document doesn't have a value for a field! Use doc[<field>].size()==0 to check if a document is missing a field!"
}
}
}
]
},
"status" : 400
}
Error From ES for Query 2:
{
"error" : {
"root_cause" : [
{
"type" : "exception",
"reason" : "Missing value for field [service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType]"
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "doctor_idx",
"node" : "mvh5k24dQPqM-d7JVeNomQ",
"reason" : {
"type" : "exception",
"reason" : "Missing value for field [service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType]"
}
}
]
},
"status" : 500
}
The only record I have in the ES
{
"took" : 32,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "doctor_idx",
"_type" : "_doc",
"_id" : "xM20oH0Bmp1zsT0w8pQe",
"_score" : 1.0,
"_source" : {
"_class" : "com.insig.clinic_apps.services_server.booking.dao.DoctorSchema",
"id" : "xM20oH0Bmp1zsT0w8pQe",
"metadata" : {
"doctorId" : "xhnz2lGvXxelWyVekz82c2v6Srb2",
"fullName" : "Dave Insig Email Test",
"credentials" : [
"MD"
],
"languagesSpoken" : [
"EN",
"NOT_DEFINED"
],
"specialitiesServed" : [ ],
"city" : "VAUGHAN",
"provincesPermitted" : [ ],
"province" : "ON",
"country" : "CANADA"
},
"service" : {
"servicesMedium" : [
"IN_CLINIC",
"VIDEO",
"MESSAGING",
"PHONE"
],
"servicesTypeDuration" : [
{
"serviceType" : "Acne Symptoms",
"duration" : 5,
"servicesMedium" : [
"IN_CLINIC",
"MESSAGING",
"PHONE",
"VIDEO"
]
}
],
"serviceTypeEarliestAvailability" : [
{
"serviceType" : "Acne Symptoms",
"earliestAvailableDateTimeByType" : "2021-12-09T19:39:16",
"servicesMedium" : [
"IN_CLINIC",
"MESSAGING",
"PHONE",
"VIDEO"
]
}
],
"bufferTimeForNextAvailability" : 0
},
"earliestAvailableDateTime" : "2021-12-09T19:39:16",
"patientRating" : 4.384481,
"onTimeRating" : 3.171053
}
}
]
}
}
/!\ Partial solution /!\
Query 1
When I was trying to execute your query I got this error:
{
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "dynamic method [org.elasticsearch.script.JodaCompatibleZonedDateTime, toEpochMilli/0] not found"
}
}
So I did a slight change to your query .toInstant().toEpochMilli()
GET /so_custom_score/_search
{
"explain": true,
"query": {
"nested": {
"path": "service",
"query": {
"nested": {
"score_mode": "max",
"path": "service.serviceTypeEarliestAvailability",
"query": {
"function_score": {
"query": {
"match_all": {
"boost": 1
}
},
"functions": [
{
"filter": {
"match": {
"service.serviceTypeEarliestAvailability.serviceType": "type"
}
},
"weight": 10
},
{
"script_score": {
"script": {
"source": """
def availability = doc['service.serviceTypeEarliestAvailability.earliestAvailableDateTimeByType'].value;
return availability.toInstant().toEpochMilli();
"""
}
}
}
]
}
}
}
}
}
}
}
Well, wasted a lot of time here and figured that there was no problem with the queries. Elasticsearch v7.10.2 has an issue when I try to get the explanation of the query.
It works perfectly fine without the explain parameter.
See the at org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction$1.explainScore(FieldValueFactorFunction.java:103) ~[elasticsearch-7.10.2.jar:7.10.2]
PS: One of the comments in the OG question mentioned that the latest Elasticsearch v7.15 works fine even with explain parameter.

Elasticsearch `function_score` with `score_mode` confusion when used with nested objects

Background:
I have the following mapping for curriculum_posts documents. Notice the nested skills property.
{
"curriculum_posts" : {
"mappings" : {
"dynamic" : "false",
"properties" : {
"title" : {
"type" : "text",
"analyzer" : "english"
},
"skills" : {
"type" : "nested",
"properties" : {
"slug" : {
"type" : "text",
"fields" : {
"raw" : {
"type" : "keyword"
},
"text" : {
"type" : "text"
}
}
},
"start_skill_level" : {
"type" : "keyword"
},
"start_skill_level_value" : {
"type" : "integer"
}
}
}
}
}
}
}
A sample record looks like this:
{
"_source" : {
"skills" : [
{
"start_skill_level_value" : 1,
"slug" : "infrastructure-as-code-iac"
},
{
"start_skill_level_value" : 1,
"slug" : "devops"
}
],
"title" : "Terraform: Infrastructure as code"
}
}
I wanted to run a query that return all documents but with scores matching the number of skills.slug values that matched. My query looks like this:
{
"query": {
"nested": {
"path": "skills",
"query": {
"function_score": {
"query": { "match_all": {} },
"functions": [
{ "script_score": { "script": "0" } },
{
"filter": {
"term": { "skills.slug.raw": { "value": "devops" } }
},
"weight": 2
},
{
"filter": {
"term": { "skills.slug.raw": { "value": "infrastructure-as-code-iac" } }
},
"weight": 2
}
],
"score_mode": "sum",
"boost_mode": "replace"
}
}
}
}
}
I decided to use function_score with boost_mode: replace so that the scores from documents are ignored and only the function scores are taken. The score_mode: sum to ensure that the scores from the function matches are summed up.
The problem
So, for the above query, on the example document above, I was expecting the score to be 4.0 because it matches the skills.slug for both infrastructure-as-code-iac and devops. However, I the score in the result is only 2.0 for the document.
Question
I suppose I'm not understanding how function_score takes the scores from the functions or how my functions are effecting the score. Could someone help me understand the scoring here?
Some debugging
I looked at the explanation but I'm unable to decode much information from it. Nevertheless, here is the explanation:
{
"_index" : "curriculum_posts",
"_type" : "_doc",
"_id" : "18",
"matched" : true,
"explanation" : {
"value" : 2.0,
"description" : "Score based on 2 child docs in range from 83 to 93, best match:",
"details" : [
{
"value" : 2.0,
"description" : "sum of:",
"details" : [
{
"value" : 2.0,
"description" : "min of:",
"details" : [
{
"value" : 2.0,
"description" : "function score, score mode [sum]",
"details" : [
{
"value" : 0.0,
"description" : "script score function, computed with script:\"Script{type=inline, lang='painless', idOrCode='0', options={}, params={}}\"",
"details" : [
{
"value" : 1.0,
"description" : "_score: ",
"details" : [
{
"value" : 1.0,
"description" : "*:*",
"details" : [ ]
}
]
}
]
},
{
"value" : 2.0,
"description" : "function score, product of:",
"details" : [
{
"value" : 1.0,
"description" : "match filter: skills.slug.raw:infrastructure-as-code-iac",
"details" : [ ]
},
{
"value" : 2.0,
"description" : "product of:",
"details" : [
{
"value" : 1.0,
"description" : "constant score 1.0 - no function provided",
"details" : [ ]
},
{
"value" : 2.0,
"description" : "weight",
"details" : [ ]
}
]
}
]
}
]
},
{
"value" : 3.4028235E38,
"description" : "maxBoost",
"details" : [ ]
}
]
},
{
"value" : 0.0,
"description" : "match on required clause, product of:",
"details" : [
{
"value" : 0.0,
"description" : "# clause",
"details" : [ ]
},
{
"value" : 1.0,
"description" : "_type:__skills",
"details" : [ ]
}
]
}
]
}
]
}
}

Elasticsearch is not returning a document I expect in the search results

I have a collection of customers that have a first name, last name, email, description and owner id. I want to take a character string from the app, and search on all the fields, with a priority order. Im using boost to achieve that.
Currently I have a lot of test customers with the name Sean in various fields within the documents. I have 2 documents that contain an email with sean.jones#email.com. One document contains the same email in the description.
When I perform the following search, im missing the document in the search results that does not contain the email in the description.
Here is my query:
{
"query" : {
"bool" : {
"filter" : {
"match" : {
"ownerId" : "acct_123"
}
},
"must" : [
{
"bool" : {
"should" : [
{
"prefix" : {
"firstName" : {
"value" : "sean",
"boost" : 3
}
}
},
{
"prefix" : {
"lastName" : {
"value" : "sean",
"boost" : 3
}
}
},
{
"terms" : {
"boost" : 2,
"description" : [
"sean"
]
}
},
{
"prefix" : {
"email" : {
"value" : "sean",
"boost" : 1
}
}
}
]
}
}
]
}
}
}
Here is the document that Im missing:
{
"_index" : "xxx",
"_id" : "cus_123",
"_version" : 1,
"_type" : "customers",
"_seq_no" : 9096,
"_primary_term" : 1,
"found" : true,
"_source" : {
"firstName" : null,
"id" : "cus_123",
"lastName" : null,
"email" : "sean.jones#email.com",
"ownerId" : "acct_123",
"description" : null
}
}
When I look at the current results, all of the documents have a score of 3.0. They have "Sean" in the name as well, so they score higher. When I do an _explain on the document im missing, with the query above, I get the following:
{
"_index": "xxx",
"_type": "customers",
"_id": "cus_123",
"matched": true,
"explanation": {
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "sum of:",
"details": [
{
"value": 1.0,
"description": "ConstantScore(email._index_prefix:sean)",
"details": []
}
]
},
{
"value": 0.0,
"description": "match on required clause, product of:",
"details": [
{
"value": 0.0,
"description": "# clause",
"details": []
},
{
"value": 1.0,
"description": "ownerId:acct_123",
"details": []
}
]
}
]
}
}
Here are my mappings:
{
"properties": {
"firstName": {
"type": "text",
"index_prefixes": {
"max_chars": 10,
"min_chars": 1
}
},
"email": {
"analyzer": "my_email_analyzer",
"type": "text",
"index_prefixes": {
"max_chars": 10,
"min_chars": 1
}
},
"lastName": {
"type": "text",
"index_prefixes": {
"max_chars": 10,
"min_chars": 1
}
},
"description": {
"type": "text"
},
"ownerId": {
"type": "text"
}
}
}
"my_email_analyzer": {
"type": "custom",
"tokenizer": "uax_url_email"
}
If im understanding this correctly, because this document is only scoring a 1, its not meeting a particular threshold. Ive tried adjusting the min_score but I had no luck. Any thoughts on how I can get this document to be included in the search results?
thanks so much
It depends on what mean by "missing":
is it, that the document does not make it into the number of hits (the "total")?
or is it, that the document itself does not show up as a hit in the hits list?
If it's #2 you may want to increase the number of documents Elasticsearch fetches and returns, by adding a size-clause to your search request (default size is 10):
Example
"size": 50

When using gauss decay score funtion, it always scores 1 on nested elements

For documents like
{
"_id" : "abc123",
"_score" : 3.7613528,
"_source" : {
"id" : "abc123",
"pricePeriods" : [{
"periodTo" : "2016-01-02",
"eur" : 1036,
"gbp" : 782,
"dkk" : 6880,
"sek" : 9025,
"periodFrom" : "2015-12-26",
"nok" : 8065
}, {
"periodTo" : "2016-06-18",
"eur" : 671,
"gbp" : 457,
"dkk" : 4625,
"sek" : 5725,
"periodFrom" : "2016-01-02",
"nok" : 5430
} ]
}
}
I would like to have a gauss decay function score on the prices.
I have tried like this
"query" : {
"function_score" : {
"functions" : [{
"gauss" : {
"pricePeriods.dkk" : {
"origin" : "2500",
"scale" : "2500",
"decay" : 0.8
}
},
"filter" : {
"nested" : {
"filter" : {
"range" : {
"pricePeriods.periodTo" : {
"gte" : "2016-03-17T00:00:00.000"
}
}
},
"path" : "pricePeriods"
}
}
}
]
and it seems that the filter finds the prices I want to make a gauss on, but the resulting score is always 1.
Explain says
{ "value": 1,
"description": "min of:",
"details": [
{
"value": 1,
"description": "function score, score mode [multiply]",
"details": [
{
"value": 1,
"description": "function score, product of:",
"details": [
{
"value": 1,
"description": "match filter: ToParentBlockJoinQuery (+ConstantScore(pricePeriods.periodTo:[[32 30 31 36 2d 30 33 2d 31 37 54 30 30 3a 30 30 3a 30 30 2e 30 30 30] TO *]) #QueryWrapperFilter(_type:__pricePeriods))",
"details": []
},
{
"value": 1,
"description": "Function for field pricePeriods.dkk:",
"details": [
{
"value": 1,
"description": "exp(-0.5*pow(MIN[0.0],2.0)/1.4004437867889222E7)",
"details": []
}
]
}
]
}
]
}
I can see here that gauss apparently returns 1 when it can't find the field.
But the questions is why it can't find the field in nested docs and how to ix that.
The reason gauss function is returning 1 is because as you said it can't find the field as it is nested, you basically need to wrap your whole function_score query into nested query
{
"query": {
"nested": {
"path": "pricePeriods",
"query": {
"function_score": {
"functions": [
{
"gauss": {
"pricePeriods.dkk": {
"origin": "2500",
"scale": "2500",
"decay": 0.8
}
},
"filter": {
"range": {
"pricePeriods.periodTo": {
"gte": "2016-03-17T00:00:00.000"
}
}
}
}
]
}
}
}
}
}
Does this help?

Resources