Elastic search edge_ngram match query on _all being ignored

Elastic search edge_ngram match query on _all being ignored - elasticsearch

I'm using elastic search number: "1.5.2" and I'm trying to implement an edge_ngram autocomplete search. I have the following mapping:
curl -XPUT 'localhost:8080/users' -d '{
"settings": {
"analysis": {
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 10
}
},
"analyzer": {
"edge_ngram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding",
"edge_ngram_filter"
]
},
"whitespace_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
},
"mappings": {
"user": {
"_all": {
"type":"string",
"index_analyzer": "edge_ngram_analyzer",
"search_analyzer": "whitespace_analyzer"
},
"properties": {
"id":{
"type": "integer",
"index": "no",
"include_in_all":false
},
"email": {
"type": "string"
},
"firstName": {
"type": "string"
},
"lastName": {
"type": "string"
}
}
}
}
}
}'
I then index an "user" document:
curl -XPUT 'localhost:8080/users/user/1' -d '{
"email": "a.smith#gmail.com",
"firstName": "Alexander",
"lastName": "Smith"
}'
When I run the following query nothing is returned:
curl -XGET 'localhost:8080/users/_search' -d '{
"query": {
"match":{
"_all":{
"query": "ale",
"operator":"and"
}
}
}
}'
Why is the _all match query not matching on the user document?

You can achieve the functionality of autocomplete by edge_ngram without overriding the _all field analysis. This is done by changing the names of the analyzers you have defined to default_index and default_search (you can alias them to reflect your original names ("edge_ngram_analyzer" and "whitespace_analyzer") if you want). Here is your configuration with the relevant changes:
curl -XPUT 'localhost:8080/users' -d '{
"settings": {
"analysis": {
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 10
}
},
"analyzer": {
"default_index": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"asciifolding",
"edge_ngram_filter"
]
},
"default_search": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding"
]
}
}
},
"mappings": {
"user": {
"properties": {
"id":{
"type": "integer",
"index": "no",
"include_in_all":false
},
"email": {
"type": "string"
},
"firstName": {
"type": "string"
},
"lastName": {
"type": "string"
}
}
}
}
}
}'
Hope I have managed to help :)

Related

Elastic search multiple analyzers on index

I have an index with Name field .
I want to use soundex analyzer and synonym analyzer on that field.
I want to achieve both in a single index .Is it even possible ?
Please help me experts out there
Index 1
{
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "phonetic_sample",
"creation_date": "1603097131476",
"analysis": {
"filter": {
"my_soundex": {
"replace": "false",
"type": "phonetic",
"encoder": "soundex"
}
},
"analyzer": {
"my_analyzer": {
"filter": [
"lowercase",
"my_soundex"
],
"tokenizer": "standard"
}
}
}
I query for Catherine and match Catherine,Katherine and Kathryn
Index 2
{
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "phonetic_synonym",
"creation_date": "1603121439096",
"analysis": {
"filter": {
"synonym": {
"format": "wordnet",
"type": "synonym",
"synonyms": [
"s(100000001,1,'Bill',v,1,0).",
"s(100000001,2,'William',v,1,0).",
"s(100000001,3,'Wilhelm',v,1,0)."
]
}
},
"analyzer": {
"synonym": {
"filter": [
"synonym"
],
"tokenizer": "whitespace"
}
}
}
I query for Bill and match Bill, William and Wilhelm

You can use multi-field with multiple analyzers. You can declare
sub-fields for the name field, each with a different analyzer.
Below is the modified index mapping.
Index Mapping:
{
"settings": {
"index": {
"analysis": {
"filter": {
"my_soundex": {
"type": "phonetic",
"encoder": "metaphone",
"replace": false
},
"synonym": {
"format": "wordnet",
"type": "synonym",
"synonyms": [
"s(100000001,1,'Bill',v,1,0).",
"s(100000001,2,'William',v,1,0).",
"s(100000001,3,'Wilhelm',v,1,0)."
]
}
},
"analyzer": {
"synonym": {
"filter": [
"synonym"
],
"tokenizer": "whitespace"
},
"my_analyzer": {
"filter": [
"lowercase",
"my_soundex"
],
"tokenizer": "standard"
}
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analzyer": "synonym",
"search_analyzer": "synonym",
"fields": {
"content": {
"type": "text",
"analyzer": "my_analyzer",
"search_analyzer": "my_analyzer"
}
}
}
}
}
}
Then you can refer to name and name.content in your queries. Your search query will be like this:
{
"query": {
"multi_match": {
"query": "Bill",
"fields": [
"name",
"name.content"
],
"type": "most_fields"
}
}
}

Defined a edgeNGram, search works only on complete words, not tokens

I'm using es 6.4 as AWS service. Here is my mapping -
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "my_tokenizer"
}
}
},
"tokenizer": {
"my_tokenizer": {
"type": "edge_ngram",
"min_gram": 3,
"max_gram": 20,
"token_chars": [
"letter"
]
}
}
},
"mappings": {
"tsetse": {
"properties": {
"id": {
"type": "integer"
},
"user_id": {
"type": "integer"
},
"description": {
"type": "text",
"analyzer": "my_analyzer"
},
"type": {
"type": "integer"
}
}
}
}
}
The index has a record with description = "greatest performance on earth". When I try to search, it always works on complete word - earth or performance. Does not return results on great or perf. What am I missing?
Here is updated mapping with EdgeNGram `
{
"settings": {
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20
}
},
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
}
},
"mappings": {
"tsetse": {
"properties": {
"id": {
"type": "integer"
},
"user_id": {
"type": "integer"
},
"description": {
"type": "text",
"analyzer": "my_analyzer"
},
"type": {
"type": "integer"
}
}
}
}
}
`
Gist script - https://gist.github.com/swati-patil/0b1cea74fc52b1b96d44ad239ad2580d
Thanks,

Thanks for the Gist. I can see you're not creating your index correctly:
you're using POST instead of PUT
you're specifying a type where you shouldn't
there are two closing curly braces that you need to remove at the end
Do it like this instead:
# first delete your index
curl -XDELETE 'my-instance-us-east1.amazonaws.com/my_index'
# then create it correctly
curl -XPUT "my-instance-us-east1.amazonaws.com/my_index" -H 'Content-Type: application/json' -d '{
"settings": {
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 20
}
},
"analyzer": {
"my_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
}
},
"mappings": {
"my_type": {
"properties": {
"text": {
"type": "text",
"analyzer": "my_analyzer"
}
}
}
}
}'
# then analyze works
curl -XPOST my-instance-us-east1.amazonaws.com/my_index/_analyze -H 'Content-Type: application/json' -d '{
"analyzer": "my_analyzer",
"text": "Greatest performance on earth"
}'
Then index your documents and run your queries, they will both work.

Boost if result begin with the word

I use Elasticsearch to search with autocompletion with an ngram filter. I need to boost a result if it starts with the search keyword.
My query is simple :
"query": {
"match": {
"query": "re",
"operator": "and"
}
}
And this is my results :
Restaurants
Couture et retouches
Restauration rapide
But I want them like this :
Restaurants
Restauration rapide
Couture et retouches
How can I boost a result starting with the keyword?
In case it can helps, here is my mapping :
{
"settings": {
"analysis": {
"analyzer": {
"partialAnalyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer",
"filter": ["asciifolding", "lowercase"]
},
"searchAnalyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": ["asciifolding", "lowercase"]
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [ "letter", "digit" ]
}
}
}
},
"mappings": {
"place": {
"properties": {
"name": {
"type": "string",
"index_analyzer": "partialAnalyzer",
"search_analyzer": "searchAnalyzer",
"term_vector": "with_positions_offsets"
}
}
}
}
}
Regards,

How about this idea, not 100% sure of it as it depends on the data I think:
create a sub-field in your name field that should be analyzed with keyword analyzer (pretty much staying as is)
change the query to be a bool with shoulds
one should is the query you have now
the other should is a match with phrase_prefix on the sub-field.
The mapping:
{
"settings": {
"analysis": {
"analyzer": {
"partialAnalyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer",
"filter": [
"asciifolding",
"lowercase"
]
},
"searchAnalyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"asciifolding",
"lowercase"
]
},
"keyword_lowercase": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"asciifolding",
"lowercase"
]
}
},
"tokenizer": {
"ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"place": {
"properties": {
"name": {
"type": "string",
"index_analyzer": "partialAnalyzer",
"search_analyzer": "searchAnalyzer",
"term_vector": "with_positions_offsets",
"fields": {
"as_is": {
"type": "string",
"analyzer": "keyword_lowercase"
}
}
}
}
}
}
}
The query:
{
"query": {
"bool": {
"should": [
{
"match": {
"name": {
"query": "re",
"operator": "and"
}
}
},
{
"match": {
"name.as_is": {
"query": "re",
"type": "phrase_prefix"
}
}
}
]
}
}
}

How get custome analyzer source from elasticsearch?

I made a _mapping request to elasticsearch and see that for one field custom analyzer is used. The output for field like that:
"myFieldName": {
"type": "string",
"analyzer": "someCustomAnalyzer"
}
So is there are a way to get source for that someCustomAnalyzer? I have tried request curl -XGET localhost:9200/_analyze?analyzer=someCustomAnalyzer
and got:
{
"error": "ElasticsearchIllegalArgumentException[text is missing]",
"status": 400
}
If I add text argument for query string I got analyzing result for analyzing, but I need analyzer definition.

You can see it with settings. It's more readable now in 1.5 than it used to be.
So if I create an index with a non-trivial analyzer:
PUT /test_index
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edge_ngram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"edge_ngram_filter"
]
}
}
}
},
"mappings": {
"doc": {
"_all": {
"enabled": true,
"index_analyzer": "edge_ngram_analyzer",
"search_analyzer": "standard"
},
"properties": {
"first_name": {
"type": "string",
"include_in_all": true
},
"last_name": {
"type": "string",
"include_in_all": true
},
"ssn": {
"type": "string",
"index": "not_analyzed",
"include_in_all": true
}
}
}
}
}
I can get the index settings with:
GET /test_index/_settings
...
{
"test_index": {
"settings": {
"index": {
"creation_date": "1430394627755",
"uuid": "78oYlYU9RS6LZ5YFyeaMRQ",
"analysis": {
"filter": {
"edge_ngram_filter": {
"min_gram": "2",
"type": "edge_ngram",
"max_gram": "20"
}
},
"analyzer": {
"edge_ngram_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"edge_ngram_filter"
],
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1",
"number_of_shards": "1",
"version": {
"created": "1050099"
}
}
}
}
}
Here is the code I used:
http://sense.qbox.io/gist/4a38bdb0cb7d381caa29b9ce2c3c154b63cdc1f8

Elasticsearch Dynamic Templates not working with Nested Documents

I Really thought I had this working, but I'm actually having issues. I have a dynamic template set up to match nested documents. I set up my mappings like so:
curl -XPUT 'http://localhost:9200/test/' -d '{
"mappings": {
"Item": {
"dynamic_templates": [
{
"metadata_template": {
"match_mapping_type": "string",
"path_match": "metadata.*",
"mapping": {
"type": "multi_field",
"fields": {
"{name}": {
"type": "{dynamic_type}",
"index": "analyzed"
},
"standard": {
"type": "{dynamic_type}",
"index": "analyzed",
"analyzer" : "standard"
}
}
}
}
}
]
}
},
"settings": {
"analysis": {
"filter": {
"my_ngram": {
"max_gram": 10,
"min_gram": 1,
"type": "nGram"
},
"lb_stemmer": {
"type": "stemmer",
"name": "english"
}
},
"analyzer": {
"default_index": {
"filter": [
"standard",
"lowercase",
"asciifolding",
"my_ngram"
],
"type": "custom",
"tokenizer": "keyword"
},
"default_search": {
"filter": [
"standard",
"lowercase"
],
"type": "custom",
"tokenizer": "standard"
}
}
}
}
}'
My expectation is that all fields that start with "metadata." should be stored in an analyzed field and in an unanalyzed field with the suffix ".standard". Am I completely misunderstanding this?
I add an item:
curl -XPUT localhost:9200/test/Item/1 -d '{
"name" : "test",
"metadata" : {
"strange_tag" : "CLEAN_2C_abcdefghij_07MAY2005_AB"
}
}'
This query works great:
{
"query": {
"match": {
"metadata.strange_tag": {
"query": "CLEAN_2C_abcdefghij_07MAY2005_AB",
"type": "boolean"
}
}
}
}
But the searching for the word CLEAN, or clean doesn't return any results. I expect that field to have gone through the ngram tokenizer. Anyone have a suggestion for what I'm doing wrong?

Looks l like I was incorrectly creating my NGRAM analyzer. Here is a working example:
curl -XDELETE 'localhost:9200/test'
curl -XPUT 'localhost:9200/test' -d '{
"settings": {
"analysis": {
"analyzer": {
"my_ngram_analyzer": {
"tokenizer": "my_ngram_tokenizer",
"filter": [
"standard",
"lowercase",
"asciifolding"
]
}
},
"tokenizer": {
"my_ngram_tokenizer": {
"type": "nGram",
"min_gram": "2",
"max_gram": "3",
"token_chars": [
"letter",
"digit"
]
}
}
}
},
"mappings": {
"Item": {
"dynamic_templates": [
{
"metadata_template": {
"match_mapping_type": "string",
"path_match": "*",
"mapping": {
"type": "multi_field",
"fields": {
"{name}": {
"type": "{dynamic_type}",
"index": "analyzed",
"analyzer" : "my_ngram_analyzer"
},
"standard": {
"type": "{dynamic_type}",
"index": "analyzed",
"analyzer": "standard"
}
}
}
}
}
]
}
}
}'

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Elastic search edge_ngram match query on _all being ignored - elasticsearch

Related

Elastic search multiple analyzers on index

Defined a edgeNGram, search works only on complete words, not tokens

Boost if result begin with the word

How get custome analyzer source from elasticsearch?

Elasticsearch Dynamic Templates not working with Nested Documents

Categories

Resources