ElasticSearch - Boost score for fuzzy words - elasticsearch

I want perform fuzzy search on user search words(apple iphone 5s). I want to give more score value to first(apple), little less for second and so on.
I started with the query given below but not working as I expected:
{
"query": {
"fuzzy_like_this_field": {
"name": {
"like_text": "appla^4 iphane^2 5^1",
"max_query_terms": 12
}
}
},
"fields": "name",
"sort": {
"_score": {
"order": "desc"
}
}
}
May I know how to write this query??

I found the answer.
{
"query" : {
"bool" : {
"should" : [
{
"fuzzy" : {
"name" : {
"min_similarity" : 0.5,
"boost" : 4,
"value" : "appla",
"prefix_length" : 0
}
}
},
{
"fuzzy" : {
"name" : {
"min_similarity" : 0.1,
"boost" : 2,
"value" : "iphane",
"prefix_length" : 1
}
}
} ,
{
"fuzzy" : {
"name" : {
"min_similarity" : 0.1,
"boost" : 1,
"value" : "5",
"prefix_length" : 1
}
}
}
]
}
}
}

Related

Not able to search elasticsearch document

I am newbie at elasticsearch. Using elasticsearch 7.8.1 for some custom search for my application.
Here is the sample dataset.
The search that need to happen is something like this:
select * from maintenance_logs
where vinNumber = "xyz"
and organizationId = 1
and dtcCode like %p101%
or subSystem like %p101%
or description like %p101%;
Here is the document stored:
GET /maintenance_logs/_search
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "maintenance_logs",
"_type" : "_doc",
"_id" : "41a47230-02d1-11ed-a8f8-813988188fd2",
"_score" : 1.0,
"_source" : {
"_class" : "com.domain.search.MaintenanceLog",
"id" : "41a47230-02d1-11ed-a8f8-813988188fd2",
"maintenanceActivity" : "test103",
"vinNumber" : "DH34ASD7SDFF84742",
"organizationId" : 1,
"partitionYear" : "2022",
"dtcCode" : "",
"subSystem" : "",
"description" : "",
"odometer" : 91000,
"statsDate" : "2022-07-13"
}
},
{
"_index" : "maintenance_logs",
"_type" : "_doc",
"_id" : "5fac7720-033d-11ed-97e1-a3441dab3d6a",
"_score" : 1.0,
"_source" : {
"_class" : "com.search.MaintenanceLog",
"id" : "5fac7720-033d-11ed-97e1-a3441dab3d6a",
"maintenanceActivity" : "test103",
"vinNumber" : "DH34ASD7SDFF84742",
"organizationId" : 1,
"partitionYear" : "2022",
"dtcCode" : "D101",
"subSystem" : "ac vent",
"description" : "ac vent replaced",
"odometer" : 91000,
"statsDate" : "2022-07-14"
}
}
]
}
}
This is how my Document looks:
#Document(indexName = "maintenance_logs", createIndex = true)
public class MaintenanceLog {
#Id
private String id;
private String maintenanceActivity;
private String vinNumber;
private Integer organizationId;
private String partitionYear;
private String dtcCode;
private String subSystem;
private String description;
private Integer odometer;
}
Here is my query: The intention is, I have a search bar where lets say I typed p101. Then it should look through all the documents.
do exact match on vin_number and organizatinid, and then whatever matches (partial match eg mysql like query) from any one of these attributes dtcCode or subSystem or maintenanceActivity or description.
GET /maintenance_logs/_search
{
"query": {
"bool" : {
"must" : [
{ "term" : { "vinNumber" : "DH34ASD7SDFF84742" } },
{ "term" : { "organizationId" : 1 } }
],
"should" : [
{ "term" : { "dtcCode": "p101*" } },
{ "term" : { "subSystem" : "p101*" }},
{ "term" : { "maintenanceActivity" : "p101*" }},
{ "term" : { "description" : "p101*" }}
],
"minimum_should_match" : 1,
"boost" : 1.0
}
}
}
You have two issues with your query:
One is you are using term query instead of wildcard query pattern.
Second is you are trying term query text type of field for vinNumber field.
To resolve this issue, You need to use wildcard query instead of term query and you need to use vinNumber.keyword insted of vinNumber (considering you are using multi type field for vinNumber as text and keyword both). Please check below query:
{
"query": {
"bool": {
"must": [
{
"term": {
"vinNumber.keyword": "DH34ASD7SDFF84742"
}
},
{
"term": {
"organizationId": 1
}
}
],
"minimum_should_match": 1,
"should": [
{
"wildcard": {
"dtcCode": {
"value": "d10*"
}
}
},
{
"wildcard": {
"subSystem": {
"value": "p101*"
}
}
},
{
"wildcard": {
"maintenanceActivity": {
"value": "p101*"
}
}
},
{
"wildcard": {
"description": {
"value": "p101*"
}
}
}
]
}
}
}
Also, you need to set "minimum_should_match": 1 as your query have AND condition with vinNumberand organizationId.
Term queries are used for exact text matching. If you need to do the partial search you can either use Wildcard Query, Regexp Query or Query String
If you are using the default mapping then you need to modify your query as :
{
"query": {
"bool": {
"must": [
{
"term": {
"vinNumber.keyword": "DH34ASD7SDFF84742"
}
},
{
"term": {
"organizationId": 1
}
}
],
"should": [
{
"query_string": {
"query": "*p101*",
"fields": [
"dtcCode"
]
}
},
{
"query_string": {
"query": "*p101*",
"fields": [
"subSystem"
]
}
},
{
"query_string": {
"query": "*p101*",
"fields": [
"maintenanceActivity"
]
}
},
{
"query_string": {
"query": "*p101*",
"fields": [
"description"
]
}
}
],
"minimum_should_match": 1
}
}
}
Note :
If you need to perform a partial search in such a way that, the text matches from the beginning of the value of the fields dtcCode,subSystem, etc. then you can simply go with Prefix Query as well.

Can Elastic Search do aggregations for within a document?

I have a mapping like this:
mappings: {
"seller": {
"properties" : {
"overallRating": {"type" : byte}
"items": [
{
itemName: {"type": string},
itemRating: {"type" : byte}
}
]
}
}
}
Each item will only have one itemRating. Each seller will only have one overall rating. There can be many items, and at most I'm expecting maybe 50 items with itemRatings. Not all items have to have an itemRating.
I'm trying to get an average rating for each seller that combines all itemRatings and the overallRating. I have looked into aggregations but all I have seen are aggregations for across all documents. The aggregation I'm looking to do is within the document itself, and I am not sure if that is possible. Any tips would be appreciated.
Yes this is very much possible with Elasticeasrch. To produce a combined rating, you simply need to subaggregate by the document id. The only thing present in the bucket would be the individual document . That is what you want.
Here is an example:
Create the index:
PUT /ratings
{
"mappings": {
"properties": {
"overallRating": {"type" : "float"},
"items": {
"type" : "nested",
"properties": {
"itemName" : {"type" : "keyword"},
"itemRating" : {"type" : "float"},
"overallRating": {"type" : "float"}
}
}
}
}
}
Add some data:
POST ratings/_doc/
{
"overallRating" : 1,
"items" : [
{
"itemName" : "labrador",
"itemRating" : 10,
"overallRating" : 1
},
{
"itemName" : "saint bernard",
"itemRating" : 20,
"overallRating" : 1
}
]
}
{
"overallRating" : 1,
"items" : [
{
"itemName" : "cat",
"itemRating" : 5,
"overallRating" : 1
},
{
"itemName" : "rat",
"itemRating" : 10,
"overallRating" : 1
}
]
}
Query the index for a combined rating and sort by the rating:
GET ratings/_search
{
"size": 0,
"query": {
"match_all": {}
},
"aggs": {
"average_rating": {
"composite": {
"sources": [
{
"ids": {
"terms": {
"field": "_id"
}
}
}
]
},
"aggs": {
"average_rating": {
"nested": {
"path": "items"
},
"aggs": {
"avg": {
"avg": {
"field": "items.compound"
}
}
}
}
}
}
},
"runtime_mappings": {
"items.compound": {
"type": "double",
"script": {
"source": "emit(doc['items.overallRating'].value + doc['items.itemRating'].value)"
}
}
}
}
The result (Pls note that i changed the exact values of ratings between writing the answer and running it in the console, so the averages are a bit different)
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"average_rating" : {
"after_key" : {
"ids" : "3vUp44EBbR3hrRYkA8pj"
},
"buckets" : [
{
"key" : {
"ids" : "3_Up44EBbR3hrRYkLsrC"
},
"doc_count" : 1,
"average_rating" : {
"doc_count" : 2,
"avg" : {
"value" : 151.0
}
}
},
{
"key" : {
"ids" : "3vUp44EBbR3hrRYkA8pj"
},
"doc_count" : 1,
"average_rating" : {
"doc_count" : 2,
"avg" : {
"value" : 8.5
}
}
}
]
}
}
}
One change for convenience:
I edited your mappings to add the overAllRating to each Item entry. This simplifies the calculations that come subsequently, simply because you only look in the nested scope and never have to step out.
I also had to use a "runtime mapping" to combine the value of each overAllRating and ItemRating, to produce a better average. I basically made a sum of every ItemRating with the OverAllRating and averaged those across every entry.
I had to use a top level composite "id" aggregation so that we only get results per document (which is what you want).
There is some pretty heavy lifting happening here, but it is very possible and easy to edit this as you require.
HTH.

Elastic how to use the aggregation buckets to update the documents

I'm new to elastic/painless and needed some assistance.
Having this query :
GET index1/_search/
{
"size": 0,
"aggs": {
"attrs_root": {
"nested": {
"path": "business_index_jd_list_agg"
},
"aggs": {
"attrs": {
"terms": {
"field": "jdl_id"
},
"aggs": {
"sumOfQuantity" : {
"sum" : {
"field" : "value"
}
}
}
}
}
}
}
}
and these results from that query :
[...]
aggregations" : {
"attrs_root" : {
"doc_count" : 5,
"attrs" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : -666,
"doc_count" : 1,
"sumOfQuantity" : {
"value" : 55.0
}
},
{
"key" : 93,
"doc_count" : 1,
"sumOfQuantity" : {
"value" : 25.0
},
[...]
]
}
}
}
}
How can I use that query and navigate through those results using a painless script to achieve to update each document in the index with that agregated info. Something like this:
{
"jdl_id" : -666,
"value" : 55.0
}
},
{
"jdl_id" : 93,
"value" : 25.0
}
},
[...]
Thank you.

Aggregation on .keyword to return only the keys that contain a specific string

New to aggregations in elasticsearch. Using 7.2. I am trying to write an aggregation on Tree.keyword to only return the count of documents that have a key that contains the word "Branch". I have tried sub aggregations, bucket_selector (which doesnt work for key strings) and scripts. Anyone have any ideas or suggestions on how to approach this?
Mapping:
{
"testindex" : {
"mappings" : {
"properties" : {
"Tree" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
}
}
}
}
}
Example Query that returns all the keys but what I need to do is limit to only return keys with "Branch" or better yet just the count of how many "Branch" keys there are:
GET testindex/_search
{
"aggs": {
"bucket": {
"terms": {
"field": "Tree.keyword"
}
}
}
}
Returns:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"Tree" : [
"Car:76",
"Branch:yellow",
"Car:one",
"Branch:blue"
]
}
}
]
},
"aggregations" : {
"bucket" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Car:76",
"doc_count" : 1
},
{
"key" : "Branch:yellow",
"doc_count" : 1
},
{
"key" : "Car:one",
"doc_count" : 1
},
{
"key" : "Branch:blue",
"doc_count" : 1
}
]
}
}
}
You have to add includes for limit result. Here's the code sample and hopefully this should help you.
GET testindex/_search
{
"_source": {
"includes": [
"Branch"
]
},
"aggs": {
"bucket": {
"terms": {
"field": "Tree.keyword"
}
}
}
}
It is possible to filter the values for which buckets will be created. This can be done using the include and exclude parameters which are based on regular expression strings or arrays of exact values. Additionally, include clauses that can filter using partition expressions.
For your case, it should be like this,
GET testindex/_search
{
"aggs": {
"bucket": {
"terms": {
"field": "Tree.keyword",
"include": "Branch:*"
}
}
}
}
Thanks for all the help! Unfortunately, none of those solutions worked for me. I ended up using a script to return all the branches and then setting everything else into a new key. Then used a bucket script to subtract 1 in Total_Buckets. Probably a better solution out there but hopefully it helps someone
GET testindex/_search
{
"aggs": {
"bucket": {
"cardinality": {
"field": "Tree.keyword",
"script": {
"lang": "painless",
"source": "if(_value.contains('Branches:')) { return _value} return 1;"
}
}
},
"Total_Branches": {
"bucket_script": {
"buckets_path": {
"my_var1": "bucket.value"
},
"script": "return params.my_var1-1"
}
}
}
}

Elasticsearch decay function score

I'm trying the below
PUT test/foo/1
{
"num": 100
}
GET test/foo/_search
{
"query" : {
"function_score" : {
"query" : {
"match" : {
"num": 100
}
},
"functions" : [
{
"filter" : {
"match_all" : {
}
},
"gauss" : {
"num" : {
"origin": 0,
"scale" : 500,
"offset" : 0,
"decay" : 0.1
},
"multi_value_mode" : "MIN"
}
}
],
"score_mode" : "sum",
"max_boost" : 3.4028235E38
}
}
}
---
{
"hits": {
"total": 1,
"max_score": 0.91201085,
"hits": [
{
"_index": "test",
"_type": "foo",
"_id": "1",
"_score": 0.91201085,
"_source": {
"num": 100
}
}
]
}
}
I'm using sum as score mode. Since the score of the query is 1 and the score of the decay function is 0.91201085 I was expecting the score to be 1.91201085. What am I missing?
use "boot_mode" : "sum" . You can also use explain in query to understand how the document was scored
POST testindexy/_search
{
"query" : {
"function_score" : {
"query" : {
"match" : {
"num": 100
}
},
"functions" : [
{
"filter" : {
"match_all" : {
}
},
"gauss" : {
"num" : {
"origin": 0,
"scale" : 500,
"offset" : 0,
"decay" : 0.1
},
"multi_value_mode" : "MIN"
}
}
],
"boost_mode": "sum",
"score_mode" : "sum",
"max_boost" : 3.4028235E38
}
}
}

Resources