elasticsearch match query is not working for numbers - elasticsearch

I have a search query which is used to search in report name.
I have indexed the field name with autocomplete,edge_ngram
Normal field name search is proper when i'm having a number / year in the field name it's not working.
Query :
{
"query": {
"function_score": {
"query": {
"bool": {
"should": [
{
"match": {
"field_name": {
"query": "hybrid seeds india 2017",
"operator": "and"
}
}
}
]
}
}
}
},
"from": 0,
"size": 10
}
Setting and the Mappings
{
"mappings": {
"pages": {
"properties": {
"report_name": {
"fields": {
"autocomplete": {
"search_analyzer": "report_name_search",
"analyzer": "report_name_index",
"type": "string"
},
"report_name": {
"index": "not_analyzed",
"type": "string"
}
},
"type": "multi_field"
}
}
}
},
"settings": {
"analysis": {
"filter": {
"report_name_ngram": {
"max_gram": 150,
"min_gram": 2,
"type": "edge_ngram"
}
},
"analyzer": {
"report_name_index": {
"filter": [
"lowercase",
"report_name_ngram"
],
"tokenizer": "keyword"
},
"report_name_search": {
"filter": [
"lowercase"
],
"tokenizer": "keyword"
}
}
}
}
}
Can you guys help me out in this.
Thanks in advance

Related

Elasticsearch - search for numbers / price with decimal places

How can I search for documents in Elasticsearch that have numeric field with value having decimal places?
My Mapping is as follows:
POST /itemnew/_doc
{
"mappings": {
"_doc": {
"properties": {
"name": {
"type": "string",
"analyzer": "edge_ngram_analyzer",
},
"purchase_price": {
"type": "double"
},
"sale_price": {
"type": "double"
},
"sku": {
"type": "string",
},
"settings": {
"index": {
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
}
},
"analyzer": {
"ngram_analyzer": {
"tokenizer": "standard",
}
Sample document is as follows:
PUT itemnew/_doc/3
{
"company_id":"4510339694428161" ,
"item_type": "goods",
"name":"Apple sam" ,
"purchase_price":"45.50" ,
"sale_price":"50",
"sku": "sku 123"
}
I get NumberFormatException when I try the following query: GET itemnew/_search
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "45.5",
"fields": [
"name",
"purchase_price",
"sale_price",
"sku"
],
"type": "most_fields"
```
How can I search for documents in Elasticsearch that have numeric field with value having decimal places?Please help me to solve this issue. Thank you }
You can use a lenient top-level parameter for a multi-match query here. Adding lenient just ignore exception that occurs due to format failures.
lenient (Optional, Boolean) If true, format-based errors, such as
providing a text query value for a numeric field, are ignored.
Defaults to false.
Adding a working example
Index Mapping:
PUT testidx
{
"settings": {
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram"
}
},
"analyzer": {
"ngram_analyzer": {
"tokenizer": "standard"
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analyzer": "ngram_analyzer"
},
"purchase_price": {
"type": "double"
},
"sale_price": {
"type": "double"
},
"sku": {
"type": "text"
}
}
}
}
Index Data:
PUT testidx/_doc/1
{
"company_id": "4510339694428161",
"item_type": "goods",
"name": "Apple sam",
"purchase_price": "45.50",
"sale_price": "50",
"sku": "sku 123"
}
Search Query:
POST testidx/_search
{
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "hello",
"fields": [
"name",
"purchase_price",
"sale_price",
"sku"
],
"lenient": true,
"type": "most_fields"
}
}
]
}
}
}

elastic search for mark character

I have two fields in Vietnamese: "mắt biếc" and "mật mã" in an index call books.
In books index, i use accifolding to transform from "mắt biếc" to "mat biec" and "mật mã" to "mat ma".
In two fields above, i need to query for a term : "mắt". But the score of two field is equal and what i want is "mắt biếc" have score greater than "mật mã".
So, how can i do that in elastic search.
You should use Function Score Query
Try this (base on version 7.x):
GET my_index/_search
{
"query": {
"function_score": {
"query": {
"match": {
"title": "mật"
}
},
"functions": [
{
"filter": {
"term": {
"title.keyword": {
"value": "mắt biếc"
}
}
},
"weight": 30
}
],
"max_boost": 30,
"score_mode": "max",
"boost_mode": "multiply"
}
}
}
Mappings example
PUT my_index
{
"settings": {
"analysis": {
"analyzer": {
"product_analyzer": {
"tokenizer": "standard",
"filter": [
"asciifolding"
]
}
}
}
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "product_analyzer",
"fields": {
"keyword": {
"type": "keyword"
}
}
},
"price": {
"type": "keyword"
},
"author": {
"type": "keyword"
},
"publisher": {
"type": "keyword"
}
}
}
}
You have to update your mappings in order to use title.keyword
Update Query
POST my_index/_mapping
{
"properties": {
"title": {
"type": "text",
"analyzer": "product_analyzer",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
And then
Update all documents
POST my_index/_update_by_query?conflicts=proceed
Hope this helps

Elasticsearch term query to number token

I need to explain some weird behavior of term query to Elasticsearch database which contains number part in the string. Query is pretty simple:
{
"query": {
"bool": {
"should": [
{
"term": {
"address.street": "8 kvetna"
}
}
]
}
}
}
The problem is that term 8 kvetna returns empty result. I tried to _analyze it ad it make regular tokens like 8, k, kv, kve .... Also I am pretty sure there is a value 8 kvetna in database.
Here is the mapping for the field:
{
"settings": {
"index": {
"refresh_interval": "1m",
"number_of_shards": "1",
"number_of_replicas": "1",
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete": {
"filter": [
"lowercase",
"asciifolding",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
}
"default": {
"filter": [
"lowercase",
"asciifolding"
],
"type": "custom",
"tokenizer": "standard"
}
}
}
}
},
"mappings": {
"doc": {
"dynamic": "strict",
"_all": {
"enabled": false
},
"properties": {
"address": {
"properties": {
"city": {
"type": "text",
"analyzer": "autocomplete"
},
"street": {
"type": "text",
"analyzer": "autocomplete"
}
}
}
}
}
}
}
What caused this weird result? I don't understand it. Thanks for any help.
Great start so far! Your only issue is that you're using a term query, while you should use a match one. A term query will try to do an exact match for 8 kvetna and that's not what you want. The following query will work:
{
"query": {
"bool": {
"should": [
{
"match": { <--- change this
"address.street": "8 kvetna"
}
}
]
}
}
}

Highlight with fuzziness and ngram

I guess the title of the topic spoiled you enough :D
I use edge_ngram and highlight to build an autocomplete search. I have added fuzziness in the query to allow users to mispell their search, but it brokes a bit the highlight.
When i write Sport this is what I get :
<em>Spor</em>t
<em>Spor</em>t mécanique
<em>Spor</em>t nautique
I guess it's because it matches with the token spor generated by the ngram tokenizer.
The query:
{
"query": {
"bool": {
"should": [
{
"match": {
"name": {
"query": "sport",
"operator": "and",
"fuzziness": "AUTO"
}
}
},
{
"match_phrase_prefix": {
"name.raw": {
"query": "sport"
}
}
}
]
}
},
"highlight": {
"fields": {
"name": {
"term_vector": "with_positions_offsets"
}
}
}
}
And the mapping:
{
"settings": {
"analysis": {
"analyzer": {
"partialAnalyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer",
"filter": ["asciifolding", "lowercase"]
},
"keywordAnalyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": ["asciifolding", "lowercase"]
},
"searchAnalyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["asciifolding", "lowercase"]
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [ "letter", "digit" ]
}
}
}
},
"mappings": {
"place": {
"properties": {
"name": {
"type": "string",
"index_analyzer": "partialAnalyzer",
"search_analyzer": "searchAnalyzer",
"term_vector": "with_positions_offsets",
"fields": {
"raw": {
"type": "string",
"analyzer": "keywordAnalyzer"
}
}
}
}
}
}
}
I tried to add a new match clause without fuzziness in the query to try to match the keyword before the match with fuzziness but it changed nothing.
'match': {
'name': {
'query': 'sport',
'operator': 'and'
}
Any idea how I can handle this?
Regards, Raphaël
You could do that with highlight_query I guess
Try this in your highlighting query.
"highlight": {
"fields": {
"name": {
"term_vector": "with_positions_offsets",
"highlight_query": {
"match": {
"name.raw": {
"query": "spotr",
"fuzziness": 2
}
}
}
}
}
}
I hope it helps.

Elasticsearch layered ordering

I would like to be able to return typeahead items in a certain order. For example, search for Para should return:
Paracetamol
Parafin
LIQUID PARAFFIN
ISOMETHEPTENE WITH PARACETAMOL
1) The suggestions that begin with the search term para should be ordered at the top and in alphabetical order
2) The rest of the items should appear below and also in alphabetical order
Is this possible with Elasticsearch?
Update
What if I wanted the output to be like this:
Paracetamol
Parafin
Amber Paraffin
ISOMETHEPTENE WITH PARACETAMOL
LIQUID PARAFFIN
So all the terms that contain the prefix are at the top and everything else in alphabetical order.
This is my suggestion (also, you need to enable scripting):
PUT /test
{
"settings": {
"analysis": {
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"standard",
"lowercase",
"ngram"
]
},
"search_ngram": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
},
"filter": {
"ngram": {
"type": "ngram",
"min_gram": 2,
"max_gram": 15
}
}
}
},
"mappings": {
"test": {
"properties": {
"text": {
"type": "string",
"index_analyzer": "autocomplete",
"search_analyzer": "search_ngram",
"index_options": "positions",
"fields": {
"not_analyzed_sorting": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
POST test/test/_bulk
{"index":{"_id":1}}
{"text":"Paracetamol"}
{"index":{"_id":2}}
{"text":"Paracetamol xxx yyy zzz"}
{"index":{"_id":3}}
{"text":"Parafin"}
{"index":{"_id":4}}
{"text":"LIQUID PARAFFIN"}
{"index":{"_id":5}}
{"text":"ISOMETHEPTENE WITH PARACETAMOL"}
GET /test/test/_search
{
"query": {
"match": {
"text": "Para"
}
},
"sort": [
{
"_script": {
"type": "number",
"script": "termInfo=_index[field_to_search].get(term_to_search.toLowerCase(),_POSITIONS);if (termInfo) {for(pos in termInfo){return pos.position}};return 0;",
"params": {
"term_to_search": "Para",
"field_to_search": "text"
},
"order": "asc"
}
},
{
"text.not_analyzed_sorting": {
"order": "asc"
}
}
]
}
UPDATE
For your updated question, even if I would have preferred to have another post, use the following query:
{
"query": {
"match": {
"text": "Para"
}
},
"sort": [
{
"_script": {
"type": "number",
"script": "termInfo=_index[field_to_search].get(term_to_search.toLowerCase(),_POSITIONS);if (termInfo) {for(pos in termInfo){if (pos.position==0) return pos.position; else return java.lang.Integer.MAX_VALUE}};return java.lang.Integer.MAX_VALUE;",
"params": {
"term_to_search": "Para",
"field_to_search": "text"
},
"order": "asc"
}
},
{
"text.not_analyzed_sorting": {
"order": "asc"
}
}
]
}

Resources