Elastic Search analyzer and synonym not working - elasticsearch

Here is my mapping and properties
POST hr-profile/employee-type
{
"settings": {
"index": {
"number_of_shards": 1,
"number_of_replicas": 1
},
"analysis": {
"filter": {
"my_metaphone": {
"replace": "false",
"type": "phonetic",
"encoder": "metaphone"
},
"synonym": {
"type": "synonym",
"synonyms_path": "analysis/names.txt"
}
},
"analyzer": {
"my_analyzer": {
"filter": [
"lowercase",
"my_metaphone"
],
"char_filter": [
"my_pattern"
],
"tokenizer": "standard"
},
"synonym": {
"filter": [
"synonym"
],
"char_filter": [
"my_pattern"
],
"tokenizer": "whitespace"
}
},
"char_filter": {
"my_pattern": {
"pattern": "\\.|\\;|\\,",
"type": "pattern_replace",
"replacement": " "
}
}
}
},
"mappings": {
"properties": {
"companyid": {
"type": "integer"
},
"emailaddress": {
"type": "text"
},
"employeeid": {
"type": "text"
},
"firstname": {
"type": "text",
"analyzer": "my_analyzer"
},
"lastname": {
"type": "text",
"analyzer": "my_analyzer"
},
"phonenumber": {
"type": "text"
},
"profileid": {
"type": "text"
}
}
}
}
I have data in the index but getting error
[match] analyzer [synonym] not found"
Help needed pls.

Related

illegal_argument_exception in elasticsearch

I am trying to implement fuzzy,synonym,autocomplete,ngram search on "name" and " name_auto" field. but i am getting an illegal exception error.
If i remove the synonym search only then my search works fine.but below i cant even create index.what might be the issue here please help.
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
},
"english_stop": {
"type": "stop",
"stopwords": "_english_"
},
"english_stemmer": {
"type": "stemmer",
"language": "english"
}
},
"analyzer": {
"synonym": {
"tokenizer": "standard",
"filter": ["english_stop", "english_stemmer", "synonym"]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "string",
"analyzer": "synonym"
}
}
}
}
}
}
}
}
}
Below is the response i am getting
{
"error": {
"root_cause": [
{
"type": "illegal_argument_exception",
"reason": "failed to build synonyms"
}
],
"type": "illegal_argument_exception",
"reason": "failed to build synonyms",
"caused_by": {
"type": "parse_exception",
"reason": "Invalid synonym rule at line 109",
"caused_by": {
"type": "illegal_argument_exception",
"reason": "term: course of action analyzed to a token (action) with position increment != 1 (got: 2)"
}
}
},
"statu
s": 400
}
The solution was pretty simple i had to remove the "english_stop", "english_stemmer" filter.it seems like it wasnt supporting .
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "standard",
"filter": ["synonym"]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "string",
"analyzer": "synonym"
}
}
}
}
}
}
}
}
}

Must specify either an analyzer type, or a tokenizer

I am basically new to elastic search .I am trying to implement fuzzy search , synonym search ,edge ngram and autocomplete on "name_auto" field , but it seems like my index creation is failing.
another question can i implement all the analyzer for "name" field if so how can i do it.
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"synonym"
]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "synonym",
"analyzer": "synonym"
}
}
}
}
}
}
}
}
}
This is the output :
> {
> "error": {
> "root_cause": [
> {
> "type": "illegal_argument_exception",
> "reason": "analyzer [tokenizer] must specify either an analyzer type, or a tokenizer"
> }
> ],
> "type": "illegal_argument_exception",
> "reason": "analyzer [tokenizer] must specify either an analyzer type, or a tokenizer"
> },
> "status": 400
> }
where am i doing wrong please guide me through right direction.
Your tokenizer section is located inside the analyzer section, which is not correct. Try with this instead, it should work:
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "whitespace",
"filter": [
"synonym"
]
},
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 25,
"token_chars": [
"letter"
]
}
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion"
},
"synonym_analyzer": {
"type": "synonym",
"analyzer": "synonym"
}
}
}
}
}
}
}
}

how to add filter and mappings to elasticsearch schema while creating the index?

I want to use filters like synonyms and stopwords along with mapping types in elastic search schema while indexing. Below is the json I am using. But when i use the json below, I am able to get the mappings but the filters are lost. What could be the reason? (I am using elasticsearch 6.2)
nlp_settings = {
"settings": {
"index" : {
"number_of_shards": 1,
"analysis": {
"analyzer": {
"synonym": {
"tokenizer": "standard",
"filter": ["synonym", "stop_words", "lowercase",
"stop_words_user", "synonym_user"]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms_path": "synonyms.txt"
},
"stop_words": {
"type": "stop",
"stopwords_path": "stopwords.txt"
},
"stop_words_user": {
"type": "stop",
"stopwords": "_none_"
},
"synonym_user": {
"type": "synonym",
"synonyms": default_synonym
}
}
}
}
},
"mappings": {
"doc": {
"properties": {
"section":{"type": "text"},
"document_name": {"type": "text"},
"dir_path_info": {"type": "text"},
"nlu_raw": {
"noun_list": {"type": "nested"},
"verb_list": {"type": "nested"},
},
"nlu": {
"noun": {"type": "nested"},
"verb": {"type": "nested"}
}
}
}
}
}
When I use the mappings along with the filters, I get the following JSON when I GET from this url http://localhost:9233/test/_settings
{
"test": {
"settings": {
"index": {
"creation_date": "1523962921677",
"number_of_shards": "5",
"number_of_replicas": "1",
"uuid": "FevdHGZjQm6ke2FgeNdnMQ",
"version": {
"created": "6020199"
},
"provided_name": "test"
}
}
}
}
However, what i actually want is
{
"test": {
"settings": {
"index": {
"number_of_shards": "1",
"provided_name": "test",
"creation_date": "1523963029203",
"analysis": {
"filter": {
"synonym": {
"type": "synonym",
"synonyms_path": "synonyms.txt"
},
"synonym_user": {
"type": "synonym",
"synonyms": [
"a, a"
]
},
"stop_words_user": {
"type": "stop",
"stopwords": [
"please",
"help"
]
},
"stop_words": {
"type": "stop",
"stopwords_path": "stopwords.txt"
}
},
"analyzer": {
"synonym": {
"filter": [
"synonym",
"stop_words",
"lowercase",
"stop_words_user",
"synonym_user"
],
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1",
"uuid": "CiBBgngdR_aNHkY1m0EtXw",
"version": {
"created": "6020199"
}
}
}
}
}
I get this, when I remove the mappings from the schema.
settings and mappings should be on the same level. So:
{
"settings": {
"number_of_shards": 1,
"analysis": {
"analyzer": {
"synonym": {
"tokenizer": "standard",
"filter": [
"synonym",
"stop_words",
"lowercase",
"stop_words_user",
"synonym_user"
]
}
},
"filter": {
"synonym": {
"type": "synonym",
"synonyms_path": "synonyms.txt"
},
"stop_words": {
"type": "stop",
"stopwords_path": "stopwords.txt"
},
"stop_words_user": {
"type": "stop",
"stopwords": "_none_"
},
"synonym_user": {
"type": "synonym",
"synonyms": "default_synonym"
}
}
}
},
"mappings": {
"doc": {
"properties": {
"section": {
"type": "text"
},
"document_name": {
"type": "text"
},
"dir_path_info": {
"type": "text"
},
"nlu_raw": {
"properties": {
"noun_list": {
"type": "nested"
},
"verb_list": {
"type": "nested"
}
}
},
"nlu": {
"properties": {
"noun": {
"type": "nested"
},
"verb": {
"type": "nested"
}
}
}
}
}
}
}

Not able to search a phrase in elasticsearch 5.4

I am searching for a phrase in a email body. Need to get the exact data filtered like, if I search for 'Avenue New', it should return only results which has the phrase 'Avenue New' not 'Avenue Street', 'Park Avenue'etc
My mapping is like:
{
"exchangemailssql": {
"aliases": {},
"mappings": {
"email": {
"dynamic_templates": [
{
"_default": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"doc_values": true,
"type": "keyword"
}
}
}
],
"properties": {
"attachments": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"body": {
"type": "text",
"analyzer": "keylower",
"fielddata": true
},
"count": {
"type": "short"
},
"emailId": {
"type": "long"
}
}
}
},
"settings": {
"index": {
"refresh_interval": "3s",
"number_of_shards": "1",
"provided_name": "exchangemailssql",
"creation_date": "1500527793230",
"analysis": {
"filter": {
"nGram": {
"min_gram": "4",
"side": "front",
"type": "edge_ngram",
"max_gram": "100"
}
},
"analyzer": {
"keylower": {
"filter": [
"lowercase"
],
"type": "custom",
"tokenizer": "keyword"
},
"email": {
"filter": [
"lowercase",
"unique",
"nGram"
],
"type": "custom",
"tokenizer": "uax_url_email"
},
"full": {
"filter": [
"lowercase",
"snowball",
"nGram"
],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_replicas": "0",
"uuid": "2XTpHmwaQF65PNkCQCmcVQ",
"version": {
"created": "5040099"
}
}
}
}
}
I have given the search query like:
{
"query": {
"match_phrase": {
"body": "Avenue New"
}
},
"highlight": {
"fields" : {
"body" : {}
}
}
}
The problem here is that you're tokenizing the full body content using the keyword tokenizer, i.e. it will be one big lowercase string and you cannot search inside of it.
If you simply change the analyzer of your body field to standard instead of keylower, you'll find what you need using the match_phrase query.
"body": {
"type": "text",
"analyzer": "standard", <---change this
"fielddata": true
},

Elastic Search Analyzer - Stemmer Not Working

The settings for one of my indexes is as follows, however the stemmer isn't being applied. For example a search for fox will not pick up articles that include the term foxes. I can't see why as the order of the filters is correct (lowercase precedes the stemmer).
{
"articles": {
"settings": {
"index": {
"creation_date": "1436255268907",
"analysis": {
"filter": {
"filter_stemmer": {
"type": "stemmer",
"language": "english"
},
"kill_filters": {
"pattern": ".*_.*",
"type": "pattern_replace",
"replacement": ""
},
"filter_stop": {
"type": "stop"
},
"filter_shingle": {
"min_shingle_size": "2",
"max_shingle_size": "5",
"type": "shingle",
"output_unigrams": "true"
},
"filter_stemmerposs": {
"type": "stemmer",
"language": "possessive_english"
}
},
"analyzer": {
"tags_analyzer": {
"type": "custom",
"filter": [
"standard",
"lowercase",
"filter_stemmerposs",
"filter_stemmer"
],
"tokenizer": "patterntoke"
},
"shingles_analyzer": {
"filter": [
"standard",
"lowercase",
"filter_stop",
"filter_shingle",
"kill_filters",
"filter_stemmerposs",
"filter_stemmer"
],
"char_filter": [
"html_strip"
],
"type": "custom",
"tokenizer": "standard"
}
},
"tokenizer": {
"patterntoke": {
"type": "pattern",
"pattern": ","
}
}
},
"number_of_shards": "5",
"number_of_replicas": "1",
"version": {
"created": "1060099"
},
"uuid": "H2NsE3eKT1y_ArPOPbjT6w"
}
}
}
}
And below is the mapping:
{
"articles": {
"mappings": {
"article": {
"properties": {
"accountid": {
"type": "double",
"include_in_all": false
},
"article": {
"type": "string",
"index_analyzer": "shingles_analyzer"
},
"articleid": {
"type": "double",
"include_in_all": false
},
"categoryid": {
"type": "double",
"include_in_all": false
},
"draftflag": {
"type": "double",
"include_in_all": false
},
"files": {
"type": "string",
"index_analyzer": "tags_analyzer"
},
"tags": {
"type": "string",
"index_analyzer": "tags_analyzer"
},
"title": {
"type": "string",
"index_analyzer": "shingles_analyzer"
},
"topicid": {
"type": "double",
"include_in_all": false
}
}
}
}
}
}
The sample documents are varied but for example 1 contains the token fox and another foxes (both derived from the article field) but each document is only found when the search is fox or foxes and not either which is what I'd expect. The search used Is a fuzzylikethis search (I'm using Nest .net to execute the query)

Resources