illegal_argument_exception in elasticsearch - elasticsearch

I am trying to implement fuzzy,synonym,autocomplete,ngram search on "name" and " name_auto" field. but i am getting an illegal exception error.
If i remove the synonym search only then my search works fine.but below i cant even create index.what might be the issue here please help.
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
},
"english_stop": {
"type": "stop",
"stopwords": "_english_"
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
}
},
"analyzer": {
"synonym": {
"tokenizer": "standard",
"filter": ["english_stop", "english_stemmer", "synonym"]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "string",
"analyzer": "synonym"
}
}
}
}
}
}
}
}
}
Below is the response i am getting
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "failed to build synonyms"
}
],
"type": "illegal_argument_exception",
"reason": "failed to build synonyms",
"caused_by": {
"type": "parse_exception",
"reason": "Invalid synonym rule at line 109",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "term: course of action analyzed to a token (action) with position increment != 1 (got: 2)"
}
}
},
"statu
s": 400
}

The solution was pretty simple i had to remove the "english_stop", "english_stemmer" filter.it seems like it wasnt supporting .
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "standard",
"filter": ["synonym"]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "string",
"analyzer": "synonym"
}
}
}
}
}
}
}
}
}

Related

Elastic Search analyzer and synonym not working

Here is my mapping and properties
POST hr-profile/employee-type
{
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"analysis": {
"filter": {
"my_metaphone": {
"replace": "false",
"type": "phonetic",
"encoder": "metaphone"
},
"synonym": {
"type": "synonym",
"synonyms_path": "analysis/names.txt"
}
},
"analyzer": {
"my_analyzer": {
"filter": [
"lowercase",
"my_metaphone"
],
"char_filter": [
"my_pattern"
],
"tokenizer": "standard"
},
"synonym": {
"filter": [
"synonym"
],
"char_filter": [
"my_pattern"
],
"tokenizer": "whitespace"
}
},
"char_filter": {
"my_pattern": {
"pattern": "\\.|\\;|\\,",
"type": "pattern_replace",
"replacement": " "
}
}
}
},
"mappings": {
"properties": {
"companyid": {
"type": "integer"
},
"emailaddress": {
"type": "text"
},
"employeeid": {
"type": "text"
},
"firstname": {
"type": "text",
"analyzer": "my_analyzer"
},
"lastname": {
"type": "text",
"analyzer": "my_analyzer"
},
"phonenumber": {
"type": "text"
},
"profileid": {
"type": "text"
}
}
}
}
I have data in the index but getting error
[match] analyzer [synonym] not found"
Help needed pls.

Must specify either an analyzer type, or a tokenizer

I am basically new to elastic search .I am trying to implement fuzzy search , synonym search ,edge ngram and autocomplete on "name_auto" field , but it seems like my index creation is failing.
another question can i implement all the analyzer for "name" field if so how can i do it.
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"synonym"
]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "synonym",
"analyzer": "synonym"
}
}
}
}
}
}
}
}
}
This is the output :
> {
> "error": {
> "root_cause": [
> {
> "type": "illegal_argument_exception",
> "reason": "analyzer [tokenizer] must specify either an analyzer type, or a tokenizer"
> }
> ],
> "type": "illegal_argument_exception",
> "reason": "analyzer [tokenizer] must specify either an analyzer type, or a tokenizer"
> },
> "status": 400
> }
where am i doing wrong please guide me through right direction.
Your tokenizer section is located inside the analyzer section, which is not correct. Try with this instead, it should work:
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"synonym"
]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "synonym",
"analyzer": "synonym"
}
}
}
}
}
}
}
}

Not able to search a phrase in elasticsearch 5.4

I am searching for a phrase in a email body. Need to get the exact data filtered like, if I search for 'Avenue New', it should return only results which has the phrase 'Avenue New' not 'Avenue Street', 'Park Avenue'etc
My mapping is like:
{
"exchangemailssql": {
"aliases": {},
"mappings": {
"email": {
"dynamic_templates": [
{
"_default": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"doc_values": true,
"type": "keyword"
}
}
}
],
"properties": {
"attachments": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"body": {
"type": "text",
"analyzer": "keylower",
"fielddata": true
},
"count": {
"type": "short"
},
"emailId": {
"type": "long"
}
}
}
},
"settings": {
"index": {
"refresh_interval": "3s",
"number_of_shards": "1",
"provided_name": "exchangemailssql",
"creation_date": "1500527793230",
"analysis": {
"filter": {
"nGram": {
"min_gram": "4",
"side": "front",
"type": "edge_ngram",
"max_gram": "100"
}
},
"analyzer": {
"keylower": {
"filter": [
"lowercase"
],
"type": "custom",
"tokenizer": "keyword"
},
"email": {
"filter": [
"lowercase",
"unique",
"nGram"
],
"type": "custom",
"tokenizer": "uax_url_email"
},
"full": {
"filter": [
"lowercase",
"snowball",
"nGram"
],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_replicas": "0",
"uuid": "2XTpHmwaQF65PNkCQCmcVQ",
"version": {
"created": "5040099"
}
}
}
}
}
I have given the search query like:
{
"query": {
"match_phrase": {
"body": "Avenue New"
}
},
"highlight": {
"fields" : {
"body" : {}
}
}
}
The problem here is that you're tokenizing the full body content using the keyword tokenizer, i.e. it will be one big lowercase string and you cannot search inside of it.
If you simply change the analyzer of your body field to standard instead of keylower, you'll find what you need using the match_phrase query.
"body": {
"type": "text",
"analyzer": "standard", <---change this
"fielddata": true
},

ElasticSearch : Can we apply both n-gram and language analyzers during indexing

Thanks a lot #Random , I have modified the mapping as follows. For testing I have used "movie" as my type for indexing.
Note: I have added search_analyzer also. I was not getting proper results without that.
However I have following doubts for using search_analyzer.
1] Can we use custom search_analyzer in case of language analyzers ?
2] am I getting all the results due to n-gram analyzer I have used and not due to english analyzer?
{
"settings": {
"analysis": {
"analyzer": {
"english_ngram": {
"type": "custom",
"filter": [
"english_possessive_stemmer",
"lowercase",
"english_stop",
"english_stemmer",
"ngram_filter"
],
"tokenizer": "whitespace"
},
"search_analyzer":{
"type": "custom",
"tokenizer": "whitespace",
"filter": "lowercase"
}
},
"filter": {
"english_stop": {
"type": "stop"
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
},
"english_possessive_stemmer": {
"type": "stemmer",
"language": "possessive_english"
},
"ngram_filter": {
"type": "ngram",
"min_gram": 1,
"max_gram": 25
}
}
}
},
"mappings": {
"movie": {
"properties": {
"title": {
"type": "string",
"fields": {
"en": {
"type": "string",
"analyzer": "english_ngram",
"search_analyzer": "search_analyzer"
}
}
}
}
}
}
}
Update :
Using search analyzer also is not working consistently.and need more help with this.Updating question with my findings.
I used following mapping as suggested (Note: This mapping does not use search analyzer), for simplicity lets consider only English analyzer.
{
"settings": {
"analysis": {
"analyzer": {
"english_ngram": {
"type": "custom",
"filter": [
"english_possessive_stemmer",
"lowercase",
"english_stop",
"english_stemmer",
"ngram_filter"
],
"tokenizer": "standard"
}
},
"filter": {
"english_stop": {
"type": "stop"
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
},
"english_possessive_stemmer": {
"type": "stemmer",
"language": "possessive_english"
},
"ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25
}
}
}
}
}
Created index:
PUT http://localhost:9200/movies/movie/1
{"title":"$peci#l movie"}
Tried following query:
GET http://localhost:9200/movies/movie/_search
{
"query": {
"multi_match": {
"query": "$peci mov",
"fields": ["title"],
"operator": "and"
}
}
}
}
I got no results for this, am I doing anything wrong ?
I am trying to get results for:
1] Special characters
2] Partial matches
3] Space separated partial and full words
Thanks again !
You can create a custom analyzer based on language analyzers. The only difference is that you add your ngram_filter token filter to the end of the chain. In this case you first get language-stemmed tokens (default chain) that converted to edge ngrams in the end (your filter). You can find the implementation of language analyzers here https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lang-analyzer.html#english-analyzer in order to override them. Here is an example of this change for english language:
{
"settings": {
"analysis": {
"analyzer": {
"english_ngram": {
"type": "custom",
"filter": [
"english_possessive_stemmer",
"lowercase",
"english_stop",
"english_stemmer",
"ngram_filter"
],
"tokenizer": "standard"
}
},
"filter": {
"english_stop": {
"type": "stop"
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
},
"english_possessive_stemmer": {
"type": "stemmer",
"language": "possessive_english"
},
"ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25
}
}
}
}
}
UPDATE
To support special characters you can try to use whitespace tokenizer instead of standard. In this case these characters will be part of your tokens:
{
"settings": {
"analysis": {
"analyzer": {
"english_ngram": {
"type": "custom",
"filter": [
"english_possessive_stemmer",
"lowercase",
"english_stop",
"english_stemmer",
"ngram_filter"
],
"tokenizer": "whitespace"
}
},
"filter": {
"english_stop": {
"type": "stop"
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
},
"english_possessive_stemmer": {
"type": "stemmer",
"language": "possessive_english"
},
"ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25
}
}
}
}
}

Some Multi word synonyms are not working in elasticsearch for nested fields

I am trying to use synonym analyzer at query time and not getting expected results. Can someone throw some light on this?
Here is my mapping for the index:
{
"jobs_user_profile_v2": {
"mappings": {
"profile": {
"_all": {
"enabled": false
},
"_ttl": {
"enabled": true
},
"properties": {
"rsa": {
"type": "nested",
"properties": {
"answer": {
"type": "string",
"index_analyzer": "autocomplete",
"search_analyzer": "synonym",
"position_offset_gap": 100
},
"answerId": {
"type": "long"
},
"answerOriginal": {
"type": "string",
"index": "not_analyzed"
},
"createdAt": {
"type": "long"
},
"label": {
"type": "string",
"index": "not_analyzed"
},
"labelOriginal": {
"type": "string",
"index": "not_analyzed"
},
"question": {
"type": "string",
"index": "not_analyzed"
},
"questionId": {
"type": "long"
},
"questionOriginal": {
"type": "string"
},
"source": {
"type": "integer"
},
"updatedAt": {
"type": "long"
}
}
}
}
}
}
}
}
The field to focus on is rsa.answer, which is the field I am querying.
My synonym mapping:
Beautician,Stylist,Make up artist,Massage therapist,Therapist,Spa,Hair Dresser,Salon,Beauty Parlour,Parlor => Beautician
Carpenter,Wood Worker,Furniture Carpenter => Carpenter
Cashier,Store Manager,Store Incharge,Purchase Executive,Billing Executive,Billing Boy => Cashier
Content Writer,Writer,Translator,Writing,Copywriter,Content Creation,Script Writer,Freelance Writer,Freelance Content Writer => Content Writer
My Search Query:
http://{{domain}}/jobs_user_profile_v2/_search
{
"query": {
"nested":{
"path": "rsa",
"query":{
"query_string": {
"query": "hair dresser",
"fields": ["answer"],
"analyzer" :"synonym"
}
},
"inner_hits": {
"explain": true
}
}
},
"explain" : true,
"sort" : [ {
"_score" : { }
} ]
}
It is showing proper Beautician and 'Cashierprofiles for search queryHair Dresserandbilling executivebut not showing anything forwood worker => carpenter` case.
My analyzer results:
http://{{domain}}/jobs_user_profile_v2/_analyze?analyzer=synonym&text=hair dresser
{
"tokens": [
{
"token": "beautician",
"start_offset": 0,
"end_offset": 12,
"type": "SYNONYM",
"position": 1
}
]
}
and for wood worker case
http://{{domain}}/jobs_user_profile_v2/_analyze?analyzer=synonym&text=wood worker
{
"tokens": [
{
"token": "carpenter",
"start_offset": 0,
"end_offset": 11,
"type": "SYNONYM",
"position": 1
}
]
}
It is also not working a few other cases.
My analyzer setting for index:
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"synonyms_path": "synonym.txt"
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "3",
"max_gram": "10"
}
},
"analyzer": {
"text_en_splitting_search": {
"type": "custom",
"filter": [
"stop",
"lowercase",
"porter_stem",
"word_delimiter"
],
"tokenizer": "whitespace"
},
"synonym": {
"filter": [
"stop",
"lowercase",
"synonym"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete": {
"filter": [
"lowercase",
"autocomplete_filter"
],
"type": "custom",
"tokenizer": "standard"
},
"text_en_splitting": {
"filter": [
"lowercase",
"porter_stem",
"word_delimiter"
],
"type": "custom",
"tokenizer": "whitespace"
},
"text_general": {
"filter": [
"lowercase"
],
"type": "custom",
"tokenizer": "standard"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"type": "custom",
"tokenizer": "edge_ngram_tokenizer"
},
"autocomplete_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "whitespace"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"token_chars": [
"letter",
"digit"
],
"min_gram": "2",
"type": "edgeNGram",
"max_gram": "10"
}
}
}
For the above case one multi-match is more ideal than query-string.
Multi-Match unlike query string does not tokenize the query terms before analyzing it . As a result multi-word synonyms may not work as expected.
Example:
{
"query": {
"nested": {
"path": "rsa",
"query": {
"multi_match": {
"query": "wood worker",
"fields": [
"rsa.answer"
],
"type" : "cross_fields",
"analyzer": "synonym"
}
}
}
}
}
If for some reason you prefer query-string then you would need to pass the entire query in double quotes to ensure it is not tokenized:
example :
post test/_search
{
"query": {
"nested": {
"path": "rsa",
"query": {
"query_string": {
"query": "\"wood worker\"",
"fields": [
"rsa.answer"
],
"analyzer": "synonym"
}
}
}
}
}

Resources