Elasticsearch Normalised Score with Boost Documents - elasticsearch

I am building a query that takes a set of codes and geo_point locations. The result should be a list of documents ordered by distance to origin. However, I would like to be normalised with let say a score of 10 for the document in the origin location and decreasing according to distance from origin. I have actually managed to build this search but I also would like to increase the score of a document if this includes an additional variable in the list of codes.
These are the requirements:
The output should be a list of documents which score is normalised according to distance from origin.
Documents returned should contain at least one yvar (i.e. yvar1 OR yvar2 OR yvar3 OR yvar...).
Only documents after a certain date should be returned
Only documents containing all the xvars passed to the query must be returned.
If a document has an additional x variable (e.g xvar4) the score for this document, should be increased by 0.1. This is the bit I am struggling with.
This is my mapping:
{
"mappings": {
"properties": {
"codes": {
"type": "keyword"
},
"date": {
"type": "date",
"format": "dd/MM/yyyy"
},
"coordinates": {"type": "geo_point"}
}
}
}
Some example documents (NB: The distanceToOrigin is for analysing the output only):
{ "create" : { "_index": "my-index", "_id" : "1" } }
{ "id": 1, "coordinates": { "lat": 51.5132, "lon": -0.1362}, "available capacity": 5, "last updated": "01/11/2021", "ResponseCodes": ["xvar1", "xvar2", "xvar3", "yvar1", "yvar2", "yvar3" ] ,"distanceTOorigin": 0 }
{ "create" : { "_index": "my-index", "_id" : "2" } }
{ "id": 2, "coordinates": { "lat": 52.9114, "lon": 0.5580}, "available capacity": 5, "last updated": "01/11/2021", "ResponseCodes": ["xvar1", "xvar2", "xvar3", "xvar4", "yvar1", "yvar2", "yvar3" ] ,"distanceTOorigin": 114 }
{ "create" : { "_index": "my-index", "_id" : "3" } }
{ "id": 3, "coordinates": { "lat": 51.4890, "lon": -0.6029}, "available capacity": 5, "last updated": "01/11/2021", "ResponseCodes": ["xvar1", "xvar2", "xvar3", "yvar1", "yvar2", "yvar3" ] ,"distanceTOorigin": 22 }
{ "create" : { "_index": "my-index", "_id" : "4" } }
{ "id": 4, "coordinates": { "lat": 57.2555, "lon": -3.2692}, "available capacity": 5, "last updated": "01/11/2021", "ResponseCodes": ["xvar1", "xvar2", "xvar3", "yvar1", "yvar2", "yvar3" ] ,"distanceTOorigin": 530 }
My query which produces a normalised list of documents:
{
"query": {
"function_score": {
"query": { "match_all": {} },
"boost": "1",
"functions": [
{
"filter": [
{ "range": { "date":{ "gte": "01/11/2000" }}},
{ "terms_set": { "codes" : { "terms" : ["yvar1", "yvar2", "yvar3" ],
"minimum_should_match_script": { "source": "1" }}}}
],
"random_score": {},
"weight": 1
},
{
"filter": [
{ "terms_set": { "codes" : { "terms" : ["xvar1", "xvar2", "xvar3" ],
"minimum_should_match_script": { "source": "params.num_terms" }}}}
],
"weight": 1
},
{
"exp": {
"coordinates": {
"origin": "51.5132, -0.1362",
"offset": "0km",
"decay": 0.5,
"scale":"350km"}
},
"weight": 10
}
],
"max_boost": 10,
"score_mode": "max",
"boost_mode": "multiply"
}
}
}
This is what I tried as a query (substituting the match_all query) but does not work as I end up with a non-normalised list
"query": {
"bool": {
"should": [
{
"terms_set": { "codes" : { "terms" : ["xvar4"],
"minimum_should_match_script": { "source": "0" }, "boost" : 0.1}}
},
{
"match_all": {}
}
]
}
}
Any help for this ealsticsearch beginner will be greatly appreciated.

I found the solution by accessing the _score in a script_score query:
{
"query": {
"script_score": {
"query": {
"match": { "codes": "xvar4" }
},
"script": {
"source": "_score +0.1"
}
}
}
}

Related

elasticsearch Saved Search with Group by

index_name: my_data-2020-12-01
ticket_number: T123
ticket_status: OPEN
ticket_updated_time: 2020-12-01 12:22:12
index_name: my_data-2020-12-01
ticket_number: T124
ticket_status: OPEN
ticket_updated_time: 2020-12-01 12:32:11
index_name: my_data-2020-12-02
ticket_number: T123
ticket_status: INPROGRESS
ticket_updated_time: 2020-12-02 12:33:12
index_name: my_data-2020-12-02
ticket_number: T125
ticket_status: OPEN
ticket_updated_time: 2020-12-02 14:11:45
I want to create a saved search with group by ticket_number field get unique doc with latest ticket status (ticket_status). Is it possible?
You can simply query again, I am assuming you are using Kibana for visualization purpose. in your query, you need to filter based on the ticket_number and sort based on ticket_updated_time.
Working example
Index mapping
{
"mappings": {
"properties": {
"ticket_updated_time": {
"type": "date"
},
"ticket_number" :{
"type" : "text"
},
"ticket_status" : {
"type" : "text"
}
}
}
}
Index sample docs
{
"ticket_number": "T123",
"ticket_status": "OPEN",
"ticket_updated_time": "2020-12-01T12:22:12"
}
{
"ticket_number": "T123",
"ticket_status": "INPROGRESS",
"ticket_updated_time": "2020-12-02T12:33:12"
}
Now as you can see, both the sample documents belong to the same ticket_number with different status and updated time.
Search query
{
"size" : 1, // fetch only the latest status document, if you remove this, will get other ticket with different status.
"query": {
"bool": {
"filter": [
{
"match": {
"ticket_number": "T123"
}
}
]
}
},
"sort": [
{
"ticket_updated_time": {
"order": "desc"
}
}
]
}
And search result
"hits": [
{
"_index": "65180491",
"_type": "_doc",
"_id": "2",
"_score": null,
"_source": {
"ticket_number": "T123",
"ticket_status": "INPROGRESS",
"ticket_updated_time": "2020-12-02T12:33:12"
},
"sort": [
1606912392000
]
}
]
If you need to group by ticket_number field, then you can use aggregation as well
Index Mapping:
{
"mappings": {
"properties": {
"ticket_updated_time": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss"
}
}
}
}
Search Query:
{
"size": 0,
"aggs": {
"unique_id": {
"terms": {
"field": "ticket_number.keyword",
"order": {
"latestOrder": "desc"
}
},
"aggs": {
"latestOrder": {
"max": {
"field": "ticket_updated_time"
}
}
}
}
}
}
Search Result:
"buckets": [
{
"key": "T125",
"doc_count": 1,
"latestOrder": {
"value": 1.606918305E12,
"value_as_string": "2020-12-02 14:11:45"
}
},
{
"key": "T123",
"doc_count": 2,
"latestOrder": {
"value": 1.606912392E12,
"value_as_string": "2020-12-02 12:33:12"
}
},
{
"key": "T124",
"doc_count": 1,
"latestOrder": {
"value": 1.606825931E12,
"value_as_string": "2020-12-01 12:32:11"
}
}
]

How to filter query based on a field value

I'm working with elasticsearch Query dsl, and I can't find a way to express the following:
Return results that have the field "price" > min budget and have "price" < max Budget and have has_price=true and also return all results that have "has_price=false"
In other words, I would like to use a range filter on results only that have has_price field set to true, otherwise, on results that have has_price set to false don't take in consideration the filter
Here's the mapping:
{
"formations": {
"mappings": {
"properties": {
"code": {
"type": "text"
},
"date": {
"type": "date",
"format": "dd/MM/yyyy"
},
"description": {
"type": "text"
},
"has_price": {
"type": "boolean"
},
"place": {
"type": "text"
},
"price": {
"type": "float"
},
"title": {
"type": "text"
}
}
}
}
}
The following query combines the 2 scenarios as 2 should clauses in a bool-query. And as there are only should clauses, minimum_should_match will be 1, meaning that at least one should-clause has to match:
Abstract Code Snippet
GET formations/_search
{
"query": {
"bool": {
"should": [
{ <1st scenario: has_price = false> },
{ <2nd scenario> has_price = true AND price IN budget_range}
]
}
}
}
Actual Sample Code Snippets
# 1. Create the index and populate it with some sample documents
POST formations/_bulk
{"index": {"_id": 1}}
{"has_price": true, "price": 2.0}
{"index": {"_id": 2}}
{"has_price": true, "price": 3.0}
{"index": {"_id": 3}}
{"has_price": true, "price": 4.0}
{"index": {"_id": 4}}
{"has_price": false, "price": 2.0}
{"index": {"_id": 5}}
{"has_price": false, "price": 3.0}
{"index": {"_id": 6}}
{"has_price": false, "price": 4.0}
# 2. Query assuming min_budget = 2.0 and max_budget = 4.0
GET formations/_search
{
"query": {
"bool": {
"should": [
{
"bool": {
"filter": {
"term": {
"has_price": false
}
}
}
},
{
"bool": {
"filter": [
{
"term": {
"has_price": true
}
},
{
"range": {
"price": {
"gt": 2,
"lt": 4
}
}
}
]
}
}
]
}
}
}
# 3. Result Snippet (4 hits: 3 from 1st scenario & 1 from 2nd scenario)
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
...
Don't forget to add the Claus "minimum_should_match": 1 to your bool-query in case you add another non-should-clause to your bool-query.
Let me know if this answers your question & solves your issue.

elastic search query for aggregating on sub-fields of array

My elastic search index contain data in such format, the data is an array of object which contain date , order, visit for that date on that term :-
{
"term": "ふるさと納税",
"data": [
{
"date": "2018-01-25",
"visits": 17670,
"ranking": 1,
"orders": 154
},
{
"date": "2018-02-14",
"visits": 13758,
"ranking": 1,
"orders": 116
},
{
"date": "2017-12-24",
"visits": 142578,
"ranking": 1,
"orders": 2565
},
{
"date": "2018-03-08",
"visits": 21799,
"ranking": 1,
"orders": 312
}
]
},{
"term": "帯 中古 振袖",
"data": [
{
"date": "2018-01-30",
"ranking": 2966,
"orders": 0,
"visits": 345
}
]
}
i would like to sum all the visits and orders for the term within a defined date range
I have created this query :-
{
"_source": [],
"query": {
"bool": {
"filter": [
{"range": {"data.date": {"gte" : "2018-03-21"}}},
{"range": {"data.date": {"lte" : "2018-03-21"}}}
]
}
},
"aggs" : {
"by_term": {
"terms": {
"field": "term",
"order":{"sum_ranking":"desc"},
"size":100
},"aggs": {
"sum_ranking": {
"sum": {
"field" : "data.visits"
}
}
}
}
},
"from" : 0,
"size" : 0
}
it seems the filter is not working .
can any one help.
The mapping is :-
{
"settings" : {
"number_of_shards" : 1
},
"mappings" : {
"keyword" : {
"properties" : {
"term" : { "type" : "keyword" }
}
}
}
}

Elasticsearch: Intersection of array

Suppose my documents are like this
{
"salary": {
"max": 1572,
"min": 682
},
"skills": [
"Modula-3",
"Max/MSP",
"Arduino",
"SPARK",
"PL/SQL",
"Processing",
"Go",
"Mathematica",
"Modula-2",
"IDL",
"Heron",
"Scheme"
],
"company": "Merck",
"experience": 0,
"role": "Airport Security Screener",
"cities": [
"Ahmedabad",
"Mangaluru",
"Malegaon",
"Bokaro Steel City",
"Vadodara",
"Kollam"
]
}
And I want to do a query in which I will provide a set of cities and will get the documents ordered according the cardinality of intersection. i.e suppose my set of cities is ["Ahmedabad", "Mangaluru"], then the cardinatlity of intersection of this query with the above document is 2. What should be my query?
Sample Response
{"_score": 4.0202227, "cities": ["Ahmedabad","Mangaluru","Visakhapatnam", "Vijayawada"]}
{"_score": 2.27, "cities": ["Ahmedabad","Visakhapatnam", "Vijayawada"]}
{"_score": 1.79, "cities": ["Mangalauru","Vijayawada", "delhi", "bombay"]}
I am using elasticsearch 5.2.2
Maybe something like this will help you?
{
"query": {
"function_score": {
"query": {
"match": {
"cities": "Ahmedabad Mangaluru"
}
},
"functions": [
{
"filter": {
"match": {
"cities": "Ahmedabad"
}
},
"weight": 1
},
{
"filter": {
"match": {
"cities": "Mangaluru"
}
},
"weight": 1
}
],
"score_mode": "sum"
}
}
}

Elastic Search Won't Match For Arrays

I'm trying to search a document with the following structure:
{
"_index": "XXX",
"_type": "business",
"_id": "1252809",
"_score": 1,
"_source": {
"url": "http://Samuraijapanese.com",
"raw_name": "Samurai Restaurant",
"categories": [
{
"name": "Cafe"
},
{
"name": "Cajun Restaurant"
},
{
"name": "Candy Stores"
}
],
"location": {
"lat": "32.9948649",
"lon": "-117.2528171"
},
"address": "979 Lomas Santa Fe Dr",
"zip": "92075",
"phone": "8584810032",
"short_name": "samurai-restaurant",
"name": "Samurai Restaurant",
"apt": "",
"state": "CA",
"stdhours": "",
"city": "Solana Beach",
"hours": "",
"yelp": "",
"twitter": "",
"closed": 0
}
}
Searching it for url, raw_name, address, etc, all work, but searching the categories returns nothing. I'm trying to search like so: If I switch anything else in for categories.name it works:
"query": {
"filtered" : {
"filter" : {
"geo_distance" : {
"location" : {
"lon" : "-117.15726",
"lat" : "32.71533"
},
"distance" : "5mi"
}
},
"query" : {
"multi_match" : {
"query" : "Cafe",
"fields" : [
"categories.name"
]
}
}
}
},
"sort": [
{
"_score" : {
"order" : "desc"
}
},
{
"_geo_distance": {
"location": {
"lat": 32.71533,
"lon": -117.15726
},
"order": "asc",
"sort_mode": "min"
}
}
],
"script_fields": {
"distance_from_origin": {
"script": "doc['location'].arcDistanceInKm(32.71533,-117.15726)"
}
},
"fields": ["_source"],
"from": 0,
"size": 10
}
If I switch out, for example, categories.name with address, and change the search term to Lomas, it returns the result
Without seeing your type mapping I can't answer definitively, but I would guess you have mapped categories as nested. When querying sub-documents of type nested (opposed to object) you have to use a nested query.

Resources