I'm trying to get this code to work but I'm getting the below error:
Reference : https://www.youtube.com/watch?v=PQGlhbf7o7c
Please let me know how this can be fixed. Thank You.
Code:
PUT test
{
"settings": {
"index": {
"analysis": {
"filter": {},
"analyzer": {
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keywords"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 5,
"token_chars": [
"letter"
]
}
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
}
},
"analyzer": "standard"
}
}
}
}
}
Error
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.analyzer] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
}
],
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.analyzer] please check that any required plugins are installed, or check the breaking changes documentation for removed settings",
"suppressed" : [
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.completion.type] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.edgengram.analyzer] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.edgengram.search_analyzer] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.edgengram.type] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.keywordstring.analyzer] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.fields.keywordstring.type] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
},
{
"type" : "illegal_argument_exception",
"reason" : "unknown setting [index.mappings.properties.name.type] please check that any required plugins are installed, or check the breaking changes documentation for removed settings"
}
]
},
"status" : 400
}
You were almost there but the payload structure when creating an index should like this:
PUT test
{
"settings": {
"analysis": {
...
}
},
"mappings": {
"properties": {
...
}
}
}
In your case this would mean:
PUT test
{
"settings": {
"analysis": {
"filter": {},
"analyzer": {
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keywords"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 5,
"token_chars": [
"letter"
]
}
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
}
},
"analyzer": "standard"
}
}
}
}
Related
I'm trying to create a custom analyzer in elasticsearch. here is the analyzer
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer" : "standard",
"filter" : ["custom_stopper", "custom_stems", "custom_synonyms"]
},
"filter" : {
"custom_stopper" : {
"type" : "stop",
"stopwords_path" : "analyze/stopwords.txt"
},
"custom_stems" : {
"type" : "stemmer_override",
"rules_path" : "analyze/stem.txt"
},
"custom_synonyms" : {
"type" : "synonyms",
"synonyms_path" : "analyze/synonym.txt"
}
}
}
}
}
}
but it throwing error
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "analyzer [filter] must specify either an analyzer type, or a tokenizer"
}
],
"type": "illegal_argument_exception",
"reason": "analyzer [filter] must specify either an analyzer type, or a tokenizer"
},
"status": 400
}
What I'm doing wrong here?
The filter must be on the same level with analyzer.
The structure looks somehow like this:
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"char_filter": [
"custom_stopper",
"custom_stems",
"custom_synonyms"
]
}
},
"filter": {
"custom_stopper": {
"type": "stop",
"stopwords_path": "analyze/stopwords.txt"
},
"custom_stems": {
"type": "stemmer_override",
"rules_path": "analyze/stem.txt"
},
"custom_synonyms": {
"type": "synonyms",
"synonyms_path": "analyze/synonym.txt"
}
}
}
}
}
i want to use english and german custom analyzers together with other analyzers for example ngram. Is the following mapping correct? i am getting error for german analyzer. [unknown setting [index.filter.german_stop.type]. i searched but i did not find any information about using multiple language analyzers in custom type. Is it possible to use language specific ngram-filter?
PUT test {
"settings": {
"analysis": {
"analyzer": {
"english_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"english_stop",
"ngram_filter_en"
],
"tokenizer": "whitespace"
}
},
"filter": {
"english_stop": {
"type": "stop"
},
"ngram_filter_en": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25
}
},
"german_analyzer" : {
"type" : "custom",
"filter" : [
"lowercase",
"german_stop",
"ngram_filter_de"
],
"tokenizer" : "whitespace"
}
},
"filter" : {
"german_stop" : {
"type" : "stop"
},
"ngram_filter_de" : {
"type" : "edge_ngram",
"min_ngram" : "1",
"max_gram" : 25
}
}
},
"mappings" : {
"dynamic" : true,
"properties": {
"content" : {
"tye" : "text",
"properties" : {
"en" : {
"type" : "text",
"analyzer" : "english_analyzer"
},
"de" : {
"type" : "text",
"analyzer" : "german_analyzer"
}
}
}
}
There are small syntax errors.
You have your last filter object outside the analysis context.
You cannot have same keys multiple times in a JSON.
So, below settings would help
{
"analysis": {
"analyzer": {
"english_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"english_stop",
"ngram_filter_en"
],
"tokenizer": "whitespace"
}
},
"filter": {
"english_stop": {
"type": "stop"
},
"ngram_filter_en": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25
},
"german_stop": {
"type": "stop"
},
"ngram_filter_de": {
"type": "edge_ngram",
"min_ngram": "1",
"max_gram": 25
}
},
"german_analyzer": {
"type": "custom",
"filter": [
"lowercase",
"german_stop",
"ngram_filter_de"
],
"tokenizer": "whitespace"
}
}
}
To understand the error in your mapping
{
"analysis": {
"analyzer": {
"filter": {
"english_stop": {
"type": "stop"
},
"ngram_filter_en": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25
}
},
"german_analyzer" : {
"type" : "custom",
"filter" : [
"lowercase",
"german_stop",
"ngram_filter_de"
],
"tokenizer" : "whitespace"
}
},
"filter" : {//**This is outside analysis, you cannot simply add another filter key inside analysis, so you can merge both as above**
"german_stop" : {
"type" : "stop"
},
"ngram_filter_de" : {
"type" : "edge_ngram",
"min_ngram" : "1",
"max_gram" : 25
}
}
I'm using Elasticsearch 6.8 with python 3.7
I'm trying to create my own synonyms which refer to emoticons as text.
for example: ":-)" will refer as "happy-smiley".
I'm trying to build and create the synonyms and index with the following code:
def create_analyzer(es_api, index_name, doc_type):
body = {
"settings": {
"index": {
"analysis": {
"filter": {
"synonym_filter": {
"type": "synonym",
"synonyms": [
":-), happy-smiley",
":-(, sad-smiley"
]
}
},
"analyzer": {
"synonym_analyzer": {
"tokenizer": "standard",
"filter": ["lowercase", "synonym_filter"]
}
}
}
}
},
"mappings": {
doc_type: {
"properties": {
"tweet": {"type": "text", "fielddata": "true"},
"existence": {"type": "text"},
"confidence": {"type": "float"}
}
}}
}
res = es_api.indices.create(index=index_name, body=body)
But I'm getting errors:
lasticsearch.exceptions.RequestError: RequestError(400, 'illegal_argument_exception', 'failed to build synonyms')
What is wrong and how can I fix it ?
I can say you whats wrong, (updated) how to fix this.
So if you will run this query in dev tools or bu cURL you will see the reason of error - think that Python cutting error details, so you cannot see reason.
PUT st_t3
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym_filter": {
"type": "synonym",
"synonyms": [
":-), happy-smiley",
":-(, sad-smiley"
]
}
},
"analyzer": {
"synonym_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"synonym_filter"
]
}
}
}
}
},
"mappings": {
"properties": {
"tweet": {
"type": "text",
"fielddata": "true"
},
"existence": {
"type": "text"
},
"confidence": {
"type": "float"
}
}
}
}
Response:
{
"error": {
"root_cause": [
{
"type": "remote_transport_exception",
"reason": "[127.0.0.1:9301][indices:admin/create]"
}
],
"type": "illegal_argument_exception",
"reason": "failed to build synonyms",
"caused_by": {
"type": "parse_exception",
"reason": "parse_exception: Invalid synonym rule at line 1",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "term: :-) was completely eliminated by analyzer"
}
}
},
"status": 400
}
So the reason "reason": "term: :-) was completely eliminated by analyzer" - means that Elastic not supporting this characters in synonym filter.
UPDATE
It can be done by char_filter filter.
Example:
PUT st_t3
{
"settings": {
"index": {
"analysis": {
"char_filter": {
"happy_filter": {
"type": "mapping",
"mappings": [
":-) => happy-smiley",
":-( => sad-smiley"
]
}
},
"analyzer": {
"smile_analyzer": {
"type": "custom",
"char_filter": [
"happy_filter"
],
"tokenizer": "standard",
"filter": [
"lowercase"
]
}
}
}
}
},
"mappings": {
"properties": {
"tweet": {
"type": "text",
"fielddata": "true"
},
"existence": {
"type": "text"
},
"confidence": {
"type": "float"
}
}
}
}
Test
POST st_t3/_analyze
{
"text": ":-) test",
"analyzer": "smile_analyzer"
}
Answer
{
"tokens" : [
{
"token" : "happy",
"start_offset" : 0,
"end_offset" : 2,
"type" : "<ALPHANUM>",
"position" : 0
},
{
"token" : "smiley",
"start_offset" : 2,
"end_offset" : 3,
"type" : "<ALPHANUM>",
"position" : 1
},
{
"token" : "test",
"start_offset" : 4,
"end_offset" : 8,
"type" : "<ALPHANUM>",
"position" : 2
}
]
}
I am basically new to elastic search .I am trying to implement fuzzy search , synonym search ,edge ngram and autocomplete on "name_auto" field , but it seems like my index creation is failing.
another question can i implement all the analyzer for "name" field if so how can i do it.
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"synonym"
]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "synonym",
"analyzer": "synonym"
}
}
}
}
}
}
}
}
}
This is the output :
> {
> "error": {
> "root_cause": [
> {
> "type": "illegal_argument_exception",
> "reason": "analyzer [tokenizer] must specify either an analyzer type, or a tokenizer"
> }
> ],
> "type": "illegal_argument_exception",
> "reason": "analyzer [tokenizer] must specify either an analyzer type, or a tokenizer"
> },
> "status": 400
> }
where am i doing wrong please guide me through right direction.
Your tokenizer section is located inside the analyzer section, which is not correct. Try with this instead, it should work:
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"synonym"
]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "synonym",
"analyzer": "synonym"
}
}
}
}
}
}
}
}
I use ElasticSearch-2.3.5. I want to add my custom analyzer to mapping while index creating.
PUT /library
{
"settings": {
"analysis": {
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [
"letter",
"digit"
]
}
},
"analyzer": {
"index_ngram_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer",
"filter": [
"lowercase"
]
}
},
"search_term_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
},
"mappings": {
"book": {
"properties": {
"Id": {
"type": "long",
"search_analyzer": "search_term_analyzer",
"index_analyzer": "index_ngram_analyzer",
"term_vector":"with_positions_offsets"
},
"Title": {
"type": "string",
"search_analyzer": "search_term_analyzer",
"index_analyzer": "index_ngram_analyzer",
"term_vector":"with_positions_offsets"
}
}
}
}
}
I take a template example from official guide.
{
"settings" : {
"number_of_shards" : 1
},
"mappings" : {
"type1" : {
"properties" : {
"field1" : { "type" : "string", "index" : "not_analyzed" }
}
}
}
}
But I get an error trying to execute the first part of code. There is my error:
{
"error": {
"root_cause": [
{
"type": "mapper_parsing_exception",
"reason": "analyzer [search_term_analyzer] not found for field [Title]"
}
],
"type": "mapper_parsing_exception",
"reason": "Failed to parse mapping [book]: analyzer [search_term_analyzer] not found for field [Title]",
"caused_by": {
"type": "mapper_parsing_exception",
"reason": "analyzer [search_term_analyzer] not found for field [Title]"
}
},
"status": 400
}
I can do it if I put my mappings inside of settings, but I think that it is wrong way. So I try to find my book by using a part of title. I have the "King Arthur" book for example. My query looks like this:
POST /library/book/_search
{
"query": {
"match": {
"Title": "kin"
}
}
}
Nothing will be found. What I do wrong? Could you help me? It seems my analyzer and tokenizer don't work. How can I get the terms "k", "i", "ki", "king" etc.? Because I think that I have only two terms right now. There are 'king' and 'arthur'.
You have misplaced the search_term_analyzer analyzer, it should be inside the analyzer section
PUT /library
{
"settings": {
"analysis": {
"tokenizer": {
"ngram_tokenizer": {
"type": "nGram",
"min_gram": "1",
"max_gram": "15",
"token_chars": [
"letter",
"digit"
]
}
},
"analyzer": {
"index_ngram_analyzer": {
"type": "custom",
"tokenizer": "ngram_tokenizer",
"filter": [
"lowercase"
]
},
"search_term_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": "lowercase"
}
}
}
},
"mappings": {
"book": {
"properties": {
"Id": {
"type": "long", <---- you probably need to make this a string or remove the analyzers
"search_analyzer": "search_term_analyzer",
"analyzer": "index_ngram_analyzer",
"term_vector":"with_positions_offsets"
},
"Title": {
"type": "string",
"search_analyzer": "search_term_analyzer",
"analyzer": "index_ngram_analyzer",
"term_vector":"with_positions_offsets"
}
}
}
}
}
Also make sure to use analyzer instead of index_analyzer, the latter as been deprecated in ES 2.x