Elastic search does not return proper result - elasticsearch

I have some tours as listed below.
Memorable Istanbul
Memorable Athens City Escape
Memorable Rome City Escape
Memorable Paris
I have created an index in elasticsearch as described below.
string newName = indexName + DateTime.Now.Ticks;
CreateIndexDescriptor createIndexDescriptor = new CreateIndexDescriptor(newName)
.Settings(s => s
.NumberOfShards(4)
.NumberOfReplicas(2)
.Setting("max_result_window", int.MaxValue)
.Setting("max_ngram_diff", 25)
.Analysis(a => a
.Analyzers(aa => aa
.Custom("my_analyzer", ca => ca
.Tokenizer("standart")
.Filters("lowercase", "my_ascii_folding", "my_stemm", "stop")
)
)
.TokenFilters(t => t
.AsciiFolding("my_ascii_folding", af => af.PreserveOriginal(true))
.Stemmer("my_stemm", sm => sm.Language("english"))
//.EdgeNGram("key_edgengram", ng => ng.MinGram(4).MaxGram(10))
)
.Tokenizers(t => t.Standard("standart"))
)
)
.Map<T>(mm => mm.AutoMap().Properties(p => p.SearchAsYouType(t => t.Name(n => n.SearchingArea).Analyzer("my_analyzer"))));
MemoryStream stream = new MemoryStream();
elasticClient.RequestResponseSerializer.Serialize(createIndexDescriptor, stream);
string jsonData = Encoding.UTF8.GetString(stream.ToArray());
var indexResponse = elasticClient.Indices.Create(createIndexDescriptor);
when I try to search "memor" or "memorable" it returns all of them. but I trt to search "memorab" or "memorabl" it does not return any result.
Here is my search query:
GET /tourIndex/_search
{
"from": 0,
"highlight": {
"fields": {
"searchingArea": {},
"route": {}
},
"post_tags": [
"</strong>"
],
"pre_tags": [
"<strong style='background: #efefef; padding: 3px;'>"
]
},
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"match": {
"searchingArea": {
"query": "memorab"
}
}
}
]
}
},
"size": 20,
"sort": [
{
"score": {
"order": "desc"
}
}
]
}
The result was same when I use "match_phrase_prefix" and "match_phrase".
Thanks for any help.

Adding a working example
Index Mapping:
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer",
"filter": "lowercase"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "edge_ngram",
"min_gram": 4,
"max_gram": 15,
"token_chars": [
"letter",
"digit"
]
}
}
},
"max_ngram_diff": 25
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "my_analyzer"
}
}
}
}
Index Data:
{
"title": "Memorable Istanbul"
}
{
"title": "Memorable Athens City Escape"
}
{
"title": "Memorable Rome City Escape"
}
{
"title": "Memorable Paris"
}
Search Query:
{
"query": {
"match": {
"title": "memorab"
}
}
}
Search Result:
"hits": [
{
"_index": "66262920",
"_type": "_doc",
"_id": "4",
"_score": 0.47070155,
"_source": {
"title": "Memorable Paris"
}
},
{
"_index": "66262920",
"_type": "_doc",
"_id": "1",
"_score": 0.41747034,
"_source": {
"title": "Memorable Istanbul"
}
},
{
"_index": "66262920",
"_type": "_doc",
"_id": "3",
"_score": 0.41747034,
"_source": {
"title": "Memorable Rome City Escape"
}
},
{
"_index": "66262920",
"_type": "_doc",
"_id": "2",
"_score": 0.38820273,
"_source": {
"title": "Memorable Athens City Escape"
}
}
]

Related

Elastic search Query - How to pass list of queries

I have created an Index with 10000+ documents. Here is the sample from that:
{
"_index": "index_1",
"_type": "_doc",
"_id": "48a454f9-71d2-41a0-9e62-08c149366f05",
"_score": 13.977877,
"_source": {
"customer_id":10,
"customer_name": Mike,
"customer_phone": 1111111111,
"customer_address": "XYZ"
}
},
{
"_index": "index_1",
"_type": "_doc",
"_id": "48a454f9-71d2-41a0-9e62-08c149366f71",
"_score": 12.977861,
"_source": {
"customer_id":20,
"customer_name": Angie,
"customer_phone": 2222222222,
"customer_address": "ABC"
}
},
{
"_index": "index_1",
"_type": "_doc",
"_id": "48a454f9-71d2-41a0-9e62-08c149366f62",
"_score": 10.978777,
"_source": {
"customer_id":30,
"customer_name": John,
"customer_phone": 3333333333,
"customer_address": "PQR"
}
},
{
"_index": "index_1",
"_type": "_doc",
"_id": "48a454f9-71d2-41a0-9e62-08c149366f54",
"_score": 11.817877,
"_source": {
"customer_id":40,
"customer_name": Andy,
"customer_phone": 4444444444,
"customer_address": "MNO"
}
},
{
"_index": "index_1",
"_type": "_doc",
"_id": "48a454f9-71d2-41a0-9e62-08c149366f32",
"_score": 14.457877,
"_source": {
"customer_id": 50,
"customer_name": Nick,
"customer_phone": 5555555555,
"customer_address": "CDE"
}
},
{
"_index": "index_1",
"_type": "_doc",
"_id": "48a454f9-71d2-41a0-9e62-08c149366f21",
"_score": 16.487877,
"_source": {
"customer_id":60,
"customer_name": Atlas,
"customer_phone": 6666666666,
"customer_address": "DFE"
}
}
I want to pass multiple queries at once as list in json body and get the result also in list format:
For example: -> I want to pass below 3 queries in the search condition at the same time:
1) customer_id = 10, customer_name = Mike, customer_phone = 1111111111
2) customer_id = 40, customer_name = Andy, customer_phone = 4444444444
3) customer_id = 50, customer_name = Nick, customer_phone = 5555555555
Although, I can combine these 3 queries using 'AND' and 'OR' like below:
{
"query": {
"query_string": {
"query": "(customer_id: 10 AND customer_name: Mike AND customer_phone: 1111111111) OR (customer_id: 40 AND customer_name: Andy AND customer_phone: 4444444444) OR (customer_id: 50 AND customer_name: Nick AND customer_phone: 5555555555)"
}
}
}
Other than combining the queries as above, is there any other better way to achieve the same (like passing the queries as list).
You can combinate should and must query.
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"customer_id": {
"value": 10
}
}
},
{
"match": {
"custumer_name": "Mike"
}
},
{
"term": {
"customer_phone": {
"value": 1111111111
}
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"customer_id": {
"value": 50
}
}
},
{
"match": {
"custumer_name": "Nick"
}
},
{
"term": {
"customer_phone": {
"value": 5555555555
}
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"customer_id": {
"value": 40
}
}
},
{
"match": {
"custumer_name": "Andy"
}
},
{
"term": {
"customer_phone": {
"value": 4444444444
}
}
}
]
}
}
]
}
}
}

ElasticSearch wildcard highlighting with hyphen

I am having trouble with wildcard query. When i have some hyphen - it does not highlight anything after it. I played with highlight settings but did not found any solution yet. Is it normal behavior?
I am making some index:
PUT testhighlight
PUT testhighlight/_mapping/_doc
{
"properties": {
"title": {
"type": "text",
"term_vector": "with_positions_offsets"
},
"content": {
"type": "text",
"term_vector": "with_positions_offsets"
}
}
}
Then i create documents:
PUT testhighlight/_doc/1
{
"title": "1",
"content": "test-input"
}
PUT testhighlight/_doc/2
{
"title": "2",
"content": "test input"
}
PUT testhighlight/_doc/3
{
"title": "3",
"content": "testinput"
}
Then i execute this search request:
GET testhighlight/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"title",
"content"
],
"query": "test*"
}
}
]
}
},
"highlight": {
"fields": {
"content": {
"boundary_max_scan": 10,
"fragment_offset": 5,
"fragment_size": 250,
"type": "fvh",
"number_of_fragments": 5,
"order": "score",
"boundary_scanner": "word",
"post_tags": [
"</span>"
],
"pre_tags": [
"""<span class="highlight-search">"""
]
}
}
}
}
It returns these hits:
"hits": [
{
"_index": "testhighlight",
"_type": "_doc",
"_id": "2",
"_score": 1.0,
"_source": {
"title": "2",
"content": "test input"
},
"highlight": {
"content": [
"""<span class="highlight-search">test</span> input"""
]
}
},
{
"_index": "testhighlight",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"title": "1",
"content": "test-input"
},
"highlight": {
"content": [
"""<span class="highlight-search">test</span>-input"""
]
}
},
{
"_index": "testhighlight",
"_type": "_doc",
"_id": "3",
"_score": 1.0,
"_source": {
"title": "3",
"content": "testinput"
},
"highlight": {
"content": [
"""<span class="highlight-search">testinput</span>"""
]
}
}
]
It looks alright, but didn't highlighted the whole "test-input" in document with ID 1. Is there any way to do so?

Elasticsearch query to return part of words searched for

I would like to know how I can return "thanks" or "thanking" if I search for "thank"
Currently I have a multi-match query which returns only occurrences of "thank" like "thank you" but not "thanksgiving" or "thanks". I am using ElasticSearch 7.9.1
query: {
bool: {
must: [
{match: {accountId}},
{
multi_match: {
query: "thank",
type: "most_fields",
fields: ["text", "address", "description", "notes", "name"],
}
}
],
filter: {match: {type: "personaldetails"}}
}
},
Also is it possible to combine the multimatch query with a queryString on one of the fields (say description, where I would do a querystring search only on description and a phrase match on other fields)
{ "query": {
"query_string": {
"query": "(new york city) OR (big apple)",
"default_field": "content"
}
}
}
Any input is appreciated.
thanks
You can use edge_ngrma tokenizer that first breaks text down into
words whenever it encounters one of a list of specified characters,
then it emits N-grams of each word where the start of the N-gram is
anchored to the beginning of the word.
Adding a working example with index data, mapping, search query, and search result
Index Mapping:
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "edge_ngram",
"min_gram": 5,
"max_gram": 20,
"token_chars": [
"letter",
"digit"
]
}
}
},
"max_ngram_diff": 50
},
"mappings": {
"properties": {
"notes": {
"type": "text",
"analyzer": "my_analyzer",
"search_analyzer": "standard" // note this
}
}
}
}
Index Data:
{
"notes":"thank"
}
{
"notes":"thank you"
}
{
"notes":"thanks"
}
{
"notes":"thanksgiving"
}
Search Query:
{
"query": {
"multi_match" : {
"query": "thank",
"fields": [ "notes", "name" ]
}
}
}
Search Result:
"hits": [
{
"_index": "65511630",
"_type": "_doc",
"_id": "1",
"_score": 0.1448707,
"_source": {
"notes": "thank"
}
},
{
"_index": "65511630",
"_type": "_doc",
"_id": "3",
"_score": 0.1448707,
"_source": {
"notes": "thank you"
}
},
{
"_index": "65511630",
"_type": "_doc",
"_id": "2",
"_score": 0.12199639,
"_source": {
"notes": "thanks"
}
},
{
"_index": "65511630",
"_type": "_doc",
"_id": "4",
"_score": 0.06264679,
"_source": {
"notes": "thanksgiving"
}
}
]
To combine multi-match query with query string, use the below query:
{
"query": {
"bool": {
"must": {
"multi_match": {
"query": "thank",
"fields": [
"notes",
"name"
]
}
},
"should": {
"query_string": {
"query": "(new york city) OR (big apple)",
"default_field": "content"
}
}
}
}
}

elasticsearch filter on nested array

lets say records have city field as an array of city names.
records ex:
record 1:
{
cities : [
{name: city1},
{name : city2},
{name : city3}
]
}
record 2:
{
cities : [
{name: city2},
{name : city3},
{name : city4}
]
}
record 3:
{
cities : [
{name: city3},
{name : city4},
{name : city5}
]
}
requirement:
My filter criteria is to fetch the records matches with city1 or city2 or city3 but since the record 1 matches all 3 it should come first and record 2 matches 2 so it should come 2nd and record 3 matches only one so it should come last.
You don't have to use the nested data-type as you don't have the nested properties or complex object, its very simple and easy to achieve.
Working example
Index mapping
{
"mappings": {
"properties": {
"cities": {
"type": "text"
}
}
}
}
Index sample docs
{
"cities": [
"tel-aviv", "bangalore", "sf"
]
}
{
"cities": [
"tel-aviv"
]
}
{
"cities": [
"sf"
]
}
Search query
{
"query": {
"bool": {
"should": [
{
"match": {
"cities": "tel-aviv"
}
},
{
"match": {
"cities": "bangalore"
}
},
{
"match": {
"cities": "sf"
}
}
]
}
}
}
And search result with proper expected result and score
"hits": [
{
"_index": "cities",
"_type": "_doc",
"_id": "1",
"_score": 1.850198,
"_source": {
"cities": [
"tel-aviv",
"bangalore",
"sf"
]
}
},
{
"_index": "cities",
"_type": "_doc",
"_id": "2",
"_score": 0.9983525,
"_source": {
"cities": [
"tel-aviv"
]
}
},
{
"_index": "cities",
"_type": "_doc",
"_id": "3",
"_score": 0.6133945,
"_source": {
"cities": [
"sf"
]
}
}
]
Adding another answer with nested bool queries:
Index Mapping:
{
"mappings": {
"properties":{
"Cities": {
"type": "nested",
"dynamic": "true"
}
}}
}
Index Data:
{
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "Hyderabad"
},
{
"id": 3,
"city": "Delhi"
}
]
}
{
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "abc"
},
{
"id": 3,
"city": "Def"
}
]
}
Search Query:
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "Cities",
"query": {
"bool": {
"must": [
{
"match": {
"Cities.city": "Bangalore"
}
}
]
}
}
}
},
{
"nested": {
"path": "Cities",
"query": {
"bool": {
"must": [
{
"match": {
"Cities.city": "Hyderabad"
}
}
]
}
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "nested-63806067",
"_type": "_doc",
"_id": "1",
"_score": 3.297317, <-- note this
"_source": {
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "Hyderabad"
},
{
"id": 3,
"city": "Delhi"
}
]
}
},
{
"_index": "nested-63806067",
"_type": "_doc",
"_id": "2",
"_score": 1.6486585, <-- note this
"_source": {
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "abc"
},
{
"id": 3,
"city": "Def"
}
]
}
}
]

Elasticsearch Similar Text Query

Given the following documents in an index (lets call it addresses):
{
ADDRESS: {
ID: 1,
LINE1: "steet 1",
CITY: "kuala lumpur",
COUNTRY: "MALAYSIA",
...
}
}
{
ADDRESS: {
ID: 2,
LINE1: "steet 1",
CITY: "kualalumpur city",
COUNTRY: "MALAYSIA",
...
}
}
{
ADDRESS: {
ID: 3,
LINE1: "steet 1",
CITY: "kualalumpur",
COUNTRY: "MALAYSIA",
...
}
}
{
ADDRESS: {
ID: 4,
LINE1: "steet 1",
CITY: "kuala lumpur city",
COUNTRY: "MALAYSIA",
...
}
}
At this point, I found the query to grab "kualalumpur", "kuala lumpur", "kualalumpur city" with the search text "kualalumpur".
But "kuala lumpur city" is missing from the result despite near similarity with "kualalumpur city".
Here is my query so far:
{
"query": {
"bool": {
"should": [
{"match": {"ADDRESS.STREET": {"query": "street 1", "fuzziness": 1, "operator": "AND"}}},
{
"bool": {
"should": [
{"match": {"ADDRESS.CITY": {"query": "kualalumpur", "fuzziness": 1, "operator": "OR"}}},
{"match": {"ADDRESS.CITY.keyword": {"query": "kualalumpur", "fuzziness": 1, "operator": "OR"}}}
]
}
}
],
"filter": {
"bool": {
"must": [
{"term": {"ADDRESS.COUNTRY.keyword": "MALAYSIA"}}
]
}
},
"minimum_should_match": 2
}
}
}
Given the condition, is it possible at all for Elasticsearch to return all four documents with search text "kualalumpur"?
You can use edge-n gram tokenizer on the country field to get the all four docs, tried it in my local and adding below working example.
Create custom analyzer and apply it on your field
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "edgeNGramTokenizer"
}
},
"tokenizer": {
"edgeNGramTokenizer": {
"token_chars": [
"letter",
"digit"
],
"min_gram": "1",
"type": "edgeNGram",
"max_gram": "40"
}
}
},
"max_ngram_diff": "50"
}
},
"mappings": {
"properties": {
"country": {
"type": "text",
"analyzer" : "ngram_analyzer"
}
}
}
}
Index your all four sample docs, like below
{
"country" : "kuala lumpur"
}
search query with term kualalumpur matches all four docs
{
"query": {
"match" : {
"country" : "kualalumpur"
}
}
}
"hits": [
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "3",
"_score": 5.0003963,
"_source": {
"country": "kualalumpur"
}
},
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "2",
"_score": 4.4082437,
"_source": {
"country": "kualalumpur city"
}
},
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "1",
"_score": 0.5621849,
"_source": {
"country": "kuala lumpur"
}
},
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "4",
"_score": 0.4956103,
"_source": {
"country": "kuala lumpur city"
}
}
]

Resources