Elasticsearch error while mapping - unknown setting - elasticsearch

I'm trying to get this code to work but I'm getting the below error:
Reference : https://www.youtube.com/watch?v=PQGlhbf7o7c
Please let me know how this can be fixed. Thank You.
Code:
PUT test
{
"settings": {
"index": {
"analysis": {
"filter": {},
"analyzer": {
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keywords"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 5,
"token_chars": [
"letter"
]
}
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
}
},
"analyzer": "standard"
}
}
}
}
}
Error
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.analyzer] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
}
],
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.analyzer] please check that any required plugins are installed, or check the breaking changes documentation for removed settings",
"suppressed" : [
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.completion.type] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.edgengram.analyzer] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.edgengram.search_analyzer] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.edgengram.type] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.keywordstring.analyzer] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.keywordstring.type] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.type] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
}
]
},
"status" : 400
}

You were almost there but the payload structure when creating an index should like this:
PUT test
{
"settings": {
"analysis": {
...
}
},
"mappings": {
"properties": {
...
}
}
}
In your case this would mean:
PUT test
{
"settings": {
"analysis": {
"filter": {},
"analyzer": {
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keywords"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 5,
"token_chars": [
"letter"
]
}
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
}
},
"analyzer": "standard"
}
}
}
}

Related

cannot create custom analyzer elaticsearch

I'm trying to create a custom analyzer in elasticsearch. here is the analyzer
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer" : "standard",
"filter" : ["custom_stopper", "custom_stems", "custom_synonyms"]
},
"filter" : {
"custom_stopper" : {
"type" : "stop",
"stopwords_path" : "analyze/stopwords.txt"
},
"custom_stems" : {
"type" : "stemmer_override",
"rules_path" : "analyze/stem.txt"
},
"custom_synonyms" : {
"type" : "synonyms",
"synonyms_path" : "analyze/synonym.txt"
}
}
}
}
}
}
but it throwing error
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "analyzer [filter] must specify either an analyzer type, or a tokenizer"
}
],
"type": "illegal_argument_exception",
"reason": "analyzer [filter] must specify either an analyzer type, or a tokenizer"
},
"status": 400
}
What I'm doing wrong here?
The filter must be on the same level with analyzer.
The structure looks somehow like this:
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"char_filter": [
"custom_stopper",
"custom_stems",
"custom_synonyms"
]
}
},
"filter": {
"custom_stopper": {
"type": "stop",
"stopwords_path": "analyze/stopwords.txt"
},
"custom_stems": {
"type": "stemmer_override",
"rules_path": "analyze/stem.txt"
},
"custom_synonyms": {
"type": "synonyms",
"synonyms_path": "analyze/synonym.txt"
}
}
}
}
}

Using different language analyzers with ngram Analyzer in one mapping in Elasticsearch

i want to use english and german custom analyzers together with other analyzers for example ngram. Is the following mapping correct? i am getting error for german analyzer. [unknown setting [index.filter.german_stop.type]. i searched but i did not find any information about using multiple language analyzers in custom type. Is it possible to use language specific ngram-filter?
PUT test {
"settings": {
"analysis": {
"analyzer": {
"english_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"english_stop",
"ngram_filter_en"
],
"tokenizer": "whitespace"
}
},
"filter": {
"english_stop": {
"type": "stop"
},
"ngram_filter_en": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25
}
},
"german_analyzer" : {
"type" : "custom",
"filter" : [
"lowercase",
"german_stop",
"ngram_filter_de"
],
"tokenizer" : "whitespace"
}
},
"filter" : {
"german_stop" : {
"type" : "stop"
},
"ngram_filter_de" : {
"type" : "edge_ngram",
"min_ngram" : "1",
"max_gram" : 25
}
}
},
"mappings" : {
"dynamic" : true,
"properties": {
"content" : {
"tye" : "text",
"properties" : {
"en" : {
"type" : "text",
"analyzer" : "english_analyzer"
},
"de" : {
"type" : "text",
"analyzer" : "german_analyzer"
}
}
}
}
There are small syntax errors.
You have your last filter object outside the analysis context.
You cannot have same keys multiple times in a JSON.
So, below settings would help
{
"analysis": {
"analyzer": {
"english_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"english_stop",
"ngram_filter_en"
],
"tokenizer": "whitespace"
}
},
"filter": {
"english_stop": {
"type": "stop"
},
"ngram_filter_en": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25
},
"german_stop": {
"type": "stop"
},
"ngram_filter_de": {
"type": "edge_ngram",
"min_ngram": "1",
"max_gram": 25
}
},
"german_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"german_stop",
"ngram_filter_de"
],
"tokenizer": "whitespace"
}
}
}
To understand the error in your mapping
{
"analysis": {
"analyzer": {
"filter": {
"english_stop": {
"type": "stop"
},
"ngram_filter_en": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25
}
},
"german_analyzer" : {
"type" : "custom",
"filter" : [
"lowercase",
"german_stop",
"ngram_filter_de"
],
"tokenizer" : "whitespace"
}
},
"filter" : {//**This is outside analysis, you cannot simply add another filter key inside analysis, so you can merge both as above**
"german_stop" : {
"type" : "stop"
},
"ngram_filter_de" : {
"type" : "edge_ngram",
"min_ngram" : "1",
"max_gram" : 25
}
}

Getting "Failed to build synonyms" message when trying to build synonyms filter

I'm using Elasticsearch 6.8 with python 3.7
I'm trying to create my own synonyms which refer to emoticons as text.
for example: ":-)" will refer as "happy-smiley".
I'm trying to build and create the synonyms and index with the following code:
def create_analyzer(es_api, index_name, doc_type):
body = {
"settings": {
"index": {
"analysis": {
"filter": {
"synonym_filter": {
"type": "synonym",
"synonyms": [
":-), happy-smiley",
":-(, sad-smiley"
]
}
},
"analyzer": {
"synonym_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase", "synonym_filter"]
}
}
}
}
},
"mappings": {
doc_type: {
"properties": {
"tweet": {"type": "text", "fielddata": "true"},
"existence": {"type": "text"},
"confidence": {"type": "float"}
}
}}
}
res = es_api.indices.create(index=index_name, body=body)
But I'm getting errors:
lasticsearch.exceptions.RequestError: RequestError(400, 'illegal_argument_exception', 'failed to build synonyms')
What is wrong and how can I fix it ?
I can say you whats wrong, (updated) how to fix this.
So if you will run this query in dev tools or bu cURL you will see the reason of error - think that Python cutting error details, so you cannot see reason.
PUT st_t3
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym_filter": {
"type": "synonym",
"synonyms": [
":-), happy-smiley",
":-(, sad-smiley"
]
}
},
"analyzer": {
"synonym_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"synonym_filter"
]
}
}
}
}
},
"mappings": {
"properties": {
"tweet": {
"type": "text",
"fielddata": "true"
},
"existence": {
"type": "text"
},
"confidence": {
"type": "float"
}
}
}
}
Response:
{
"error": {
"root_cause": [
{
"type": "remote_transport_exception",
"reason": "[127.0.0.1:9301][indices:admin/create]"
}
],
"type": "illegal_argument_exception",
"reason": "failed to build synonyms",
"caused_by": {
"type": "parse_exception",
"reason": "parse_exception: Invalid synonym rule at line 1",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "term: :-) was completely eliminated by analyzer"
}
}
},
"status": 400
}
So the reason "reason": "term: :-) was completely eliminated by analyzer" - means that Elastic not supporting this characters in synonym filter.
UPDATE
It can be done by char_filter filter.
Example:
PUT st_t3
{
"settings": {
"index": {
"analysis": {
"char_filter": {
"happy_filter": {
"type": "mapping",
"mappings": [
":-) => happy-smiley",
":-( => sad-smiley"
]
}
},
"analyzer": {
"smile_analyzer": {
"type": "custom",
"char_filter": [
"happy_filter"
],
"tokenizer": "standard",
"filter": [
"lowercase"
]
}
}
}
}
},
"mappings": {
"properties": {
"tweet": {
"type": "text",
"fielddata": "true"
},
"existence": {
"type": "text"
},
"confidence": {
"type": "float"
}
}
}
}
Test
POST st_t3/_analyze
{
"text": ":-) test",
"analyzer": "smile_analyzer"
}
Answer
{
"tokens" : [
{
"token" : "happy",
"start_offset" : 0,
"end_offset" : 2,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "smiley",
"start_offset" : 2,
"end_offset" : 3,
"type" : "<ALPHANUM>",
"position" : 1
},
{
"token" : "test",
"start_offset" : 4,
"end_offset" : 8,
"type" : "<ALPHANUM>",
"position" : 2
}
]
}

Must specify either an analyzer type, or a tokenizer

I am basically new to elastic search .I am trying to implement fuzzy search , synonym search ,edge ngram and autocomplete on "name_auto" field , but it seems like my index creation is failing.
another question can i implement all the analyzer for "name" field if so how can i do it.
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"synonym"
]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "synonym",
"analyzer": "synonym"
}
}
}
}
}
}
}
}
}
This is the output :
> {
> "error": {
> "root_cause": [
> {
> "type": "illegal_argument_exception",
> "reason": "analyzer [tokenizer] must specify either an analyzer type, or a tokenizer"
> }
> ],
> "type": "illegal_argument_exception",
> "reason": "analyzer [tokenizer] must specify either an analyzer type, or a tokenizer"
> },
> "status": 400
> }
where am i doing wrong please guide me through right direction.
Your tokenizer section is located inside the analyzer section, which is not correct. Try with this instead, it should work:
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"synonym"
]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "synonym",
"analyzer": "synonym"
}
}
}
}
}
}
}
}

How to add custom analyzer to mapping ElasticSearch-2.3.5 for partial searching?

I use ElasticSearch-2.3.5. I want to add my custom analyzer to mapping while index creating.
PUT /library
{
"settings": {
"analysis": {
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [
"letter",
"digit"
]
}
},
"analyzer": {
"index_ngram_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer",
"filter": [
"lowercase"
]
}
},
"search_term_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
},
"mappings": {
"book": {
"properties": {
"Id": {
"type": "long",
"search_analyzer": "search_term_analyzer",
"index_analyzer": "index_ngram_analyzer",
"term_vector":"with_positions_offsets"
},
"Title": {
"type": "string",
"search_analyzer": "search_term_analyzer",
"index_analyzer": "index_ngram_analyzer",
"term_vector":"with_positions_offsets"
}
}
}
}
}
I take a template example from official guide.
{
"settings" : {
"number_of_shards" : 1
},
"mappings" : {
"type1" : {
"properties" : {
"field1" : { "type" : "string", "index" : "not_analyzed" }
}
}
}
}
But I get an error trying to execute the first part of code. There is my error:
{
"error": {
"root_cause": [
{
"type": "mapper_parsing_exception",
"reason": "analyzer [search_term_analyzer] not found for field [Title]"
}
],
"type": "mapper_parsing_exception",
"reason": "Failed to parse mapping [book]: analyzer [search_term_analyzer] not found for field [Title]",
"caused_by": {
"type": "mapper_parsing_exception",
"reason": "analyzer [search_term_analyzer] not found for field [Title]"
}
},
"status": 400
}
I can do it if I put my mappings inside of settings, but I think that it is wrong way. So I try to find my book by using a part of title. I have the "King Arthur" book for example. My query looks like this:
POST /library/book/_search
{
"query": {
"match": {
"Title": "kin"
}
}
}
Nothing will be found. What I do wrong? Could you help me? It seems my analyzer and tokenizer don't work. How can I get the terms "k", "i", "ki", "king" etc.? Because I think that I have only two terms right now. There are 'king' and 'arthur'.
You have misplaced the search_term_analyzer analyzer, it should be inside the analyzer section
PUT /library
{
"settings": {
"analysis": {
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [
"letter",
"digit"
]
}
},
"analyzer": {
"index_ngram_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer",
"filter": [
"lowercase"
]
},
"search_term_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
},
"mappings": {
"book": {
"properties": {
"Id": {
"type": "long", <---- you probably need to make this a string or remove the analyzers
"search_analyzer": "search_term_analyzer",
"analyzer": "index_ngram_analyzer",
"term_vector":"with_positions_offsets"
},
"Title": {
"type": "string",
"search_analyzer": "search_term_analyzer",
"analyzer": "index_ngram_analyzer",
"term_vector":"with_positions_offsets"
}
}
}
}
}
Also make sure to use analyzer instead of index_analyzer, the latter as been deprecated in ES 2.x

Resources