Can I tell Elasticsearch to give particular field more boost? - elasticsearch

I'm using a simple search in Elasticsearch but I would like to give a particular url a boost so it would come up first in the search result. I'm not sure if it's possible?
Here's my mapping.
"hal": {
"properties": {
"label": {
"type": "string",
"fields": {
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
},
"url": {
"type": "string",
"index": "not_analyzed"
},
And here's my query
{
"fields": [ "url","brand"],
"query": {
"bool": {
"must": [{
"terms": {
"brand": ["brand"]
}
},{
"terms": {
"hal.label.raw": ["donald trump"]
}
}]
}
}
}
Now when I search I would get at least 500 results back. However, there's a particular pattern of url that I would like to give it a boost which is
http://www.anything.com/people/* So any url with /people would come up first in the search result. Is this even at all possible in Elasticsearch? Otherwise I would have to get everything and filter in the code instead.

You can add a should clause that will automatically boost any matching results (make sure url is set to type: string and index: not_analyzed):
{
"fields": [
"url",
"brand"
],
"query": {
"bool": {
"must": [
{
"terms": {
"brand": [
"brand"
]
}
},
{
"terms": {
"hal.label.raw": [
"donald trump"
]
}
}
],
"should": [
{
"wildcard": {
"url": "http://www.anything.com/people/*"
}
}
]
}
}
}
You can also specify a specific boost value:
{
"fields": [
"url",
"brand"
],
"query": {
"bool": {
"must": [
{
"terms": {
"brand": [
"brand"
]
}
},
{
"terms": {
"hal.label.raw": [
"donald trump"
]
}
}
],
"should": [
{
"wildcard": {
"url": {
"value": "http://www.anything.com/people/*",
"boost": 1
}
}
}
]
}
}
}

You can have regex query in should clause with high boost. Try the following query
{
"fields": [
"url",
"brand"
],
"query": {
"bool": {
"must": [
{
"terms": {
"brand": [
"brand"
]
}
},
{
"terms": {
"hal.label.raw": [
"donald trump"
]
}
}
],
"should": [
{
"regexp": {
"url": "http://www.anything.com/people/.*",
"boost" : 50
}
}
]
}
}
}

Related

Aggregation on query in Elasticsearch

i have index in es and this is the corresponding mapping :
'''
GET /vid_detect2/_mapping
{
"properties": {
"date":{"type":"date"},
"time":{ "type": "text",
"fielddata": true},
"frame_id": {"type":"integer"},
"camera_id":{"type":"integer"},
"path":{"type":"text"},
"objects" : {"type": "nested",
"properties": {
"class": { "type": "text" ,"fielddata":true },
"confidence": { "type": "float" },
"coordinates":{ "type": "nested" ,
"properties": { "x" :{"type":"float"},
"y" :{"type":"float"},
"w" :{"type":"float"},
"h" :{"type":"float"}
} }
}
}
}
}'''
I want to run following query first :
"query": {
"bool": {
"must": [
{
"nested": {
"path": "objects",
"query": {
"bool": {
"must": [
{ "match": { "objects.class": "person" }}
]
}}}}
]
}}
and then aggregate the returned results with respect to camera_id and further aggregate those aggregated results with date histogram. Please help.
Good start! You can now simply add an aggregation section to achieve what you want:
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "objects",
"query": {
"bool": {
"must": [
{
"match": {
"objects.class": "person"
}
}
]
}
}
}
}
]
}
},
"aggs": {
"camera": {
"terms": {
"field": "camera_id"
},
"aggs": {
"histo": {
"date_histogram": {
"field": "date",
"interval": "day"
}
}
}
}
}
}

function_score for query spanning multiple indices / filter not working as expected

I implemented function_score successfully for queries running against one index, but have trouble to apply function_score to a query spanning multiple indices with different fields in ElasticSearch 6.2.4.
This query:
POST /web_document,web_part/_search
{
"from": 0,
"size": 100,
"query": {
"function_score": {
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"multi_match": {
"type": "best_fields",
"query": "sampling probe",
"fields": [
"titleD.autocomplete^5",
"titleE.autocomplete^5"
]
}
}
],
"filter": [
{
"term": {
"_index": {
"value": "web_document"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"multi_match": {
"type": "best_fields",
"query": "sampling probe",
"fields": [
"textD.autocomplete^5",
"textE.autocomplete^5"
]
}
}
],
"filter": [
{
"term": {
"_index": {
"value": "web_part"
}
}
}
]
}
}
]
}
},
"functions": [
{
"filter": {
"bool": {
"must_not": [
{ "exists": { "field": "revDestDate" } },
{ "exists": { "field": "mutationDate" } }
]
}
},
"script_score": {
"script": "100"
}
},
{
"filter": {
"term": {
"_index": "web_part"
}
},
"gauss": {
"mutationDate": {
"origin": "now",
"scale": "90d",
"decay": 0.5
}
},
"weight": 20
},
{
"filter": {
"term": {
"_index": "web_document"
}
},
"gauss": {
"revDestDate": {
"origin": "now",
"scale": "90d",
"decay": 0.5
}
},
"weight": 20
}
],
"score_mode" : "first",
"boost_mode": "sum"
}
}
}
Returns the following error:
{
"error": {
…
"failed_shards": [
{
"index": "web_document",
…
"reason": {
"type": "parsing_exception",
"reason": "unknown field [mutationDate]",
…
}
},
{
"index": "web_part",
…
"reason": {
"type": "parsing_exception",
"reason": "unknown field [revDestDate]",
…
}
}
]
},
"status": 400
}
I also tried to incorporate the function_score in the induvidual query like this,
which returns the same error:
POST /web_document,web_part/_search
{
"from": 0,
"size": 100,
"query": {
"bool": {
"should": [
{
"function_score": {
"query": {
"bool": {
"must": [
{
"multi_match": {
"type": "best_fields",
"query": "sampling probe",
"fields": [
"titleD.autocomplete^5",
"titleE.autocomplete^5"
]
}
}
],
"filter": [
{
"term": {
"_index": {
"value": "web_document"
}
}
}
]
}
},
"functions": [
{
"gauss": {
"revDestDate": {
"origin": "now",
"scale": "90d",
"decay": 0.5
}
}
}
]
}
},
{
"function_score": {
"query": {
"bool": {
"must": [
{
"multi_match": {
"type": "best_fields",
"query": "sampling probe",
"fields": [
"textD.autocomplete^5",
"textE.autocomplete^5"
]
}
}
],
"filter": [
{
"term": {
"_index": {
"value": "web_part"
}
}
}
]
}
},
"functions": [
{
"gauss": {
"mutationDate": {
"origin": "now",
"scale": "90d",
"decay": 0.5
}
}
}
]
}
}
]
}
}
}
According to the comment from Darshan Pratil to this answer to the same problem this has worked in past versions.
How can this be done in elastic search 6.2.4?

Elasticsearch - Filtered query with weighted types

I have inherited an Elasticsearch query that I am trying to modify. The query I have at the moment is:
{
"fields": [
],
"from": 0,
"size": 51,
"query": {
"filtered": {
"query": {
"query_string": {
"fields": [
"data.*"
],
"default_operator": "AND",
"query": "*Search term*"
}
},
"filter": [
{
"terms": {
"type": [
"typeOne",
"typeTwo",
"typeThree"
]
}
}
]
}
}
}
Now what I have been trying to do is boost one of these terms over the other 2 in the results but have not been able to get it to work. I have tried adding a "boost" value but this has oddly given me the opposite effect - it disables any type that is given a boost.
I tried the following as the "filter" object:
"filter": [
{
"bool": {
"should": [
{
"term": {
"type": "typeOne"
}
},
{
"term": {
"type": "typeTwo"
}
},
{
"term": {
"type": "typeThree",
"boost": 2
}
}
]
}
}
]
But as I said before, instead of boosting "typeThree" it removes all "typeThree" from the results.
Can anyone help me boost a specific term type?
There are multiple ways to structure the query to achieve the above , one approach would be using function_score .It would look something on these lines
Example:
"query": {
"function_score": {
"functions": [
{
"filter": {
"term": {
"type": "typeThree"
}
},
"weight": 2
}
],
"score_mode": "sum",
"boost_mode": "sum",
"query": {
"filtered": {
"query": {
"query_string": {
"fields": [
"data.*"
],
"default_operator": "AND",
"query": "*search term*"
}
},
"filter": [
{
"terms": {
"type": [
"typeOne",
"typeTwo",
"typeThree"
]
}
}
]
}
}
}
}
You can enable explain to see how this affects the scoring
While keety's answer was 98% of the way there, it took a bit of extra googling to get it all together. The problem is that "weight" doesn't work here, instead you must use "boost_factor". The final query looks like this:
{
"fields": [
],
"from": 0,
"size": 51,
"query": {
"function_score": {
"functions": [
{
"filter": {
"term": {
"type": "typeOne"
}
},
"boost_factor": 1.2
},
{
"filter": {
"term": {
"type": "typeTwo"
}
},
"boost_factor": 1.1
},
{
"filter": {
"term": {
"type": "typeThree"
}
},
"boost_factor": 1
}
],
"score_mode": "sum",
"boost_mode": "sum",
"query": {
"filtered": {
"query": {
"query_string": {
"fields": [
"data.*"
],
"default_operator": "AND",
"query": "*search term*"
}
},
"filter": [
{
"terms": {
"type": [
"typeOne",
"typeTwo",
"typeThree"
]
}
}
]
}
}
}
}
}

elasticsearch sort not working with partial_fields

I have this kind of sort:
"sort": [
{
"_script": {
"script": "return doc.score*10 + doc['field2'].value",
"type": "number",
"order": "asc"
}
}
]
partial fields:
"filter": {
"partial_fields": {
"fields": {
"exclude": [
"field5*"
]
}
}
}
Problem is that sort does not work if partial_fields is set.. is there a reason for this ? or how do I have to remove partial_fields in order to get sort working ?
here's the whole query:
{
"size": 10,
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{
"text": {
"name_en": {
"query": "testing",
"operator": "or",
"boost": 20
}
}
}
]
}
},
"filter": {
"and": [
{
"term": {
"_type": "test"
}
}
]
}
},
"filter": {
"partial_fields": {
"fields": {
"exclude": [
"field2*"
]
}
}
}
},
"sort": [
{
"_script": {
"script": "return doc.score*1000 + doc['field2'].value",
"type": "number",
"order": "asc"
}
}
]
}
Thanks.
According to the documentation for partial fields, I do not see the usage as being nested under a filter node in the JSON request. I think this could be your issue, try moving the partial_fields section up to the same level as sort like the following:
{
"size": 10,
"query": {
"filtered": {
"query": {
"bool": {
"should": [
{
"text": {
"name_en": {
"query": "testing",
"operator": "or",
"boost": 20
}
}
}
]
}
},
"filter": {
"and": [
{
"term": {
"_type": "test"
}
}
]
}
}
},
"partial_fields": {
"fields": {
"exclude": [
"field2*"
]
}
},
"sort": [
{
"_script": {
"script": "return doc.score*1000 + doc['field2'].value",
"type": "number",
"order": "asc"
}
}
]
}

random_score does not work properly with "should"

In the following code I always get "Alexander McQueen" products coming first, no matter what I set the seed to.
How can I change my search query to properly shuffle results?
{
"query": {
"function_score": {
"random_score": {
"seed": 99287
},
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"query_string": {
"query": "(adidas originals)",
"default_operator": "AND",
"fields": [
"name^4",
"description"
]
}
},
{
"terms": {
"category": [
"Fashion",
"Sports",
"Other",
""
]
}
},
{
"term": {
"currency": {
"term": "USD"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"query_string": {
"query": "(alexander mcqueen)",
"default_operator": "AND",
"fields": [
"name^4",
"description"
]
}
},
{
"terms": {
"category": [
"Fashion"
]
}
},
{
"term": {
"currency": {
"term": "USD"
}
}
}
]
}
}
]
}
}
}
},
"size": 40,
"from": 0
}
That's because the random score is being multiplied by the _score from the original query. If you want the results to be purely based on the random score, then set the boost_mode to replace (instead of the default multiply).
See the function_score documentation.

Resources