I have created a synonym analyser on an index:
curl http://localhost:9200/test_index/_settings?pretty
{
"test_index" : {
"settings" : {
"index" : {
"creation_date" : "1429175067557",
"analyzer" : {
"search_synonyms" : {
"filter" : [ "lowercase", "search_synonym_filter" ],
"tokenizer" : "standard"
}
},
"uuid" : "Zq6Id8xsRWGofJrNCb7M8w",
"number_of_replicas" : "1",
"analysis" : {
"filter" : {
"search_synonym_filter" : {
"type" : "synonym",
"synonyms" : [ "sneakers,pumps" ]
}
}
},
"number_of_shards" : "5",
"version" : {
"created" : "1050099"
}
}
}
}
}
But when I try to use it with the mapping:
curl -XPUT 'http://localhost:9200/test_index/_mapping/product_catalog?pretty' -H "Content-Type: application/json" \
-d '{"product_catalog": {"properties" : {"name": {"type": "string", "include_in_all": true, "analyzer":"search_synonyms"} }}}'
I get the error:
{
"error" : "MapperParsingException[Analyzer [search_synonyms] not found for field [name]]",
"status" : 400
}
I have also tried to just check the analyser with:
curl 'http://localhost:9200/test_index/_analyze?analyzer=search_synonyms&pretty=1&text=pumps'
but still get an error:
ElasticsearchIllegalArgumentException[failed to find analyzer [search_synonyms]]
Any ideas, I may be missing something but I can't think what.
The analyzer element has to be inside your analysis component. Change your index creator as follows:
{
"settings": {
"index": {
"creation_date": "1429175067557",
"uuid": "Zq6Id8xsRWGofJrNCb7M8w",
"number_of_replicas": "0",
"analysis": {
"filter": {
"search_synonym_filter": {
"type": "synonym",
"synonyms": [
"sneakers,pumps"
]
}
},
"analyzer": {
"search_synonyms": {
"filter": [
"lowercase",
"search_synonym_filter"
],
"tokenizer": "standard"
}
}
},
"number_of_shards": "5",
"version": {
"created": "1050099"
}
}
}
}
Related
I made a very simple test to figure out my mistake, but did not find it. I created two indexes and I'm trying to search documents in the ppa index that are similar to a given document in the ods index (like the second example here https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-mlt-query.html).
These are my settings, mappings and documents for the ppa index:
PUT /ppa
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
},
"brazilian_stemmer": {
"type": "stemmer",
"language": "brazilian"
}
},
"analyzer": {
"brazilian": {
"tokenizer": "standard",
"filter": [
"lowercase",
"brazilian_stop",
"brazilian_stemmer"
]
}
}
}
}
}
PUT /ppa/_mapping/ppa
{"properties": {"descricao": {"type": "text", "analyzer": "brazilian"}}}
POST /_bulk
{"index":{"_index":"ppa","_type":"ppa"}}
{"descricao": "erradicar a pobreza"}
{"index":{"_index":"ppa","_type":"ppa"}}
{"descricao": "erradicar a pobreza"}
These are my settings, mappings and documents for the ods index:
PUT /ods
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0,
"analysis": {
"filter": {
"brazilian_stop": {
"type": "stop",
"stopwords": "_brazilian_"
},
"brazilian_stemmer": {
"type": "stemmer",
"language": "brazilian"
}
},
"analyzer": {
"brazilian": {
"tokenizer": "standard",
"filter": [
"lowercase",
"brazilian_stop",
"brazilian_stemmer"
]
}
}
}
}
}
PUT /ods/_mapping/ods
{"properties": {"metaodsdescricao": {"type": "text", "analyzer": "brazilian"},"metaodsid": {"type": "integer"}}}
POST /_bulk
{"index":{"_index":"ods","_type":"ods", "_id" : "1" }}
{ "metaodsdescricao": "erradicar a pobreza","metaodsid": 1}
{"index":{"_index":"ods","_type":"ods", "_id" : "2" }}
{"metaodsdescricao": "crianças que vivem na pobreza", "metaodsid": 2}
Now, this search doesn't work:
GET /ppa/ppa/_search
{
"query": {
"more_like_this" : {
"fields" : ["descricao"],
"like" : [
{
"_index" : "ods",
"_type" : "ods",
"_id" : "1"
}
],
"min_term_freq" : 1,
"min_doc_freq" : 1,
"max_query_terms" : 20
}
}
}
But this one does work:
GET /ppa/ppa/_search
{
"query": {
"more_like_this" : {
"fields" : ["descricao"],
"like" : ["erradicar a pobreza"],
"min_term_freq" : 1,
"min_doc_freq" : 1,
"max_query_terms" : 20
}
}
}
What is happening?
Please, help me make this return something other than empty.
The "more like this" query work well when you have indexed a lot of data. The empty result can be symptom of very few documents present in the elastic index.
I tried to use a synonym analyzer for my already working elastic search type. Here's the mapping of my serviceEntity:
{
"serviceentity" : {
"properties":{
"ServiceLangProps" : {
"type" : "nested",
"properties" : {
"NAME" : {"type" : "string", "search_analyzer": "synonym"},
"LONG_TEXT" : {"type" : "string", "search_analyzer": "synonym"},
"DESCRIPTION" : {"type" : "string", "search_analyzer": "synonym"},
"MATERIAL" : {"type" : "string", "search_analyzer": "synonym"},
"LANGUAGE_ID" : {"type" : "string", "include_in_all": false}
}
},
"LinkProps" : {
"type" : "nested",
"properties" : {
"TITLE" : {"type" : "string", "search_analyzer": "synonym"},
"LINK" : {"type" : "string"},
"LANGUAGE_ID" : {"type" : "string", "include_in_all": false}
}
},
"MediaProps" : {
"type" : "nested",
"properties" : {
"TITLE" : {"type" : "string", "search_analyzer": "synonym"},
"FILENAME" : {"type" : "string"},
"LANGUAGE_ID" : {"type" : "string", "include_in_all": false}
}
}
}
}
}
And these are my setting
{
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"synonyms": [
"lorep, spaceship",
"ipsum, planet"
]
}
},
"analyzer": {
"synonym": {
"filter": [
"lowercase",
"synonym"
],
"tokenizer": "whitespace"
}
}
}
}
When In try to search for anything, I get this Error:
Caused by: org.elasticsearch.index.query.QueryParsingException: [nested] nested object under path [ServiceLangProps] is not of nested type
And I don't understand why. If I don't add any analyzer to my setting, everything works fine.
I'm using the java API to communicate with the elasticsearch instance. Therefore my code looks something like this for the multi match query:
MultiMatchQueryBuilder multiMatchBuilder = QueryBuilders.multiMatchQuery(fulltextSearchString, QUERY_FIELDS).analyzer("synonym");
The query string created by the java API looks like this:
{
"query" : {
"bool" : {
"must" : {
"bool" : {
"should" : [ {
"nested" : {
"query" : {
"bool" : {
"must" : [ {
"match" : {
"ServiceLangProps.LANGUAGE_ID" : {
"query" : "DE",
"type" : "boolean"
}
}
}, {
"multi_match" : {
"query" : "lorem",
"fields" : [ "ServiceLangProps.NAME", "ServiceLangProps.DESCRIPTION", "ServiceLangProps.MATERIALKURZTEXT", "ServiceLangProps.DESCRIPTION_RICHTEXT" ],
"analyzer" : "synonym"
}
} ]
}
},
"path" : "ServiceLangProps"
}
}, {
"nested" : {
"query" : {
"bool" : {
"must" : [ {
"match" : {
"LinkProps.LANGUAGE_ID" : {
"query" : "DE",
"type" : "boolean"
}
}
}, {
"match" : {
"LinkProps.TITLE" : {
"query" : "lorem",
"type" : "boolean"
}
}
} ]
}
},
"path" : "LinkProps"
}
}, {
"nested" : {
"query" : {
"bool" : {
"must" : [ {
"match" : {
"MediaProps.LANGUAGE_ID" : {
"query" : "DE",
"type" : "boolean"
}
}
}, {
"match" : {
"MediaProps.TITLE" : {
"query" : "lorem",
"type" : "boolean"
}
}
} ]
}
},
"path" : "MediaProps"
}
} ]
}
},
"filter" : {
"bool" : { }
}
}
}
}
If I try it on the LinkProps or MediaProps, I get the same error for the respective nested object.
Edit: I'm using version 2.4.6 of elasticsearch
Would be helpful to check the query string as well and knowing what version of ES is being used.
I couldnt see the synonyms_path as well as the fact you are using nested types can cause that error.
You probably have seen this already but in case you havent
https://www.elastic.co/guide/en/elasticsearch/reference/5.5/analysis-synonym-tokenfilter.html
I created a minimal example of what I'm trying to do.
My mapping looks like this:
{
"serviceentity" : {
"properties":{
"LinkProps" : {
"type" : "nested",
"properties" : {
"TITLE" : {"type" : "string", "search_analyzer": "synonym"},
"LINK" : {"type" : "string"},
"LANGUAGE_ID" : {"type" : "string", "include_in_all": false}
}
}
}
}
}
And my settings for the synonym analyzer in JAVA code:
XContentBuilder builder = jsonBuilder()
.startObject()
.startObject("analysis")
.startObject("filter")
.startObject("synonym") // The name of the analyzer
.field("type", "synonym") // The type (derivate)
.field("ignore_case", "true")
.array("synonyms", synonyms) // The synonym list
.endObject()
.endObject()
.startObject("analyzer")
.startObject("synonym")
.field("tokenizer", "whitespace")
.array("filter", "lowercase", "synonym")
.endObject()
.endObject()
.endObject()
.endObject();
The metadata which the ElasticSearch Head Chrome plugin spits out looks like this:
{
"analysis": {
"filter": {
"synonym": {
"ignore_case": "true",
"type": "synonym",
"synonyms": [
"Test, foo",
"Title, bar"
]
}
},
"analyzer": {
"synonym": {
"filter": [
"lowercase",
"synonym"
],
"tokenizer": "whitespace"
}
}
}
}
When I now use a search query to look for "Test" I get the same error as mentioned in my first post. Here's the query
{
"query": {
"bool": {
"must": {
"nested": {
"path": "LinkProps",
"query": {
"multi_match": {
"query": "Test",
"fields": [
"LinkProps.TITLE",
"LinkProps.LINK"
],
"analyzer": "synonym"
}
}
}
}
}
}
}
which leads to this error
{
"error": {
"root_cause": [
{
"type": "query_parsing_exception",
"reason": "[nested] nested object under path [LinkProps] is not of nested type",
"index": "minimal",
"line": 1,
"col": 44
}
],
"type": "search_phase_execution_exception",
"reason": "all shards failed",
"phase": "query",
"grouped": true,
"failed_shards": [
{
"shard": 0,
"index": "minimal",
"node": "6AhE4RCIQwywl49h0Q2-yw",
"reason": {
"type": "query_parsing_exception",
"reason": "[nested] nested object under path [LinkProps] is not of nested type",
"index": "minimal",
"line": 1,
"col": 44
}
}
]
},
"status": 400
}
When I check the analyzer with
GET http://localhost:9200/minimal/_analyze?text=foo&analyzer=synonym&pretty=true
I get the correct answer
{
"tokens": [
{
"token": "foo",
"start_offset": 0,
"end_offset": 3,
"type": "word",
"position": 0
},
{
"token": "test",
"start_offset": 0,
"end_offset": 3,
"type": "SYNONYM",
"position": 0
}
]
}
So the analyzer seems to set up correctly. Did I messed up the mappings? I guess the problem is not because I have nested objects or is it?
I just tried this
{
"query": {
"bool": {
"must": {
"query": {
"multi_match": {
"query": "foo",
"fields": [
"LinkProps.TITLE",
"LinkProps.LINK"
],
"analyzer": "synonym"
}
}
}
}
}
}
As you can see, I removed the "nested" wrapper
"nested": {
"path": "LinkProps",
...
}
which now leads at least in some results (Not sure yet, if these will finally be the correct results). I'm trying to apply this to the original project and keep you posted if this also worked.
Did try some examples from elasticsearch documentation and from google but nothing helped in figuring out..
just a sample data I have is just few blog posts. I am trying to see all posts with email address. When I use "email":"someone" I see all the posts matching someone but when I change to use someone#gmail.com nothing shows up!
"hits": [
{
"_index": "blog",
"_type": "post",
"_id": "2",
"_score": 1,
"_source": {
"user": "sreenath",
"email": "someone#gmail.com",
"postDate": "2011-12-12",
"body": "Trying to figure out this",
"title": "Elastic search testing"
}
}
]
when I use Get query is as shown below, I see all posts matching someone#anything.com. But I want to change this
{ "term" : { "email" : "someone" }} to { "term" : { "email" : "someone#gmail.com" }}
GET blog/post/_search
{
"query" : {
"filtered" : {
"filter" : {
"and" : [
{ "term" :
{ "email" : "someone" }
}
]
}
}
}
}
I did the curl -XPUT for the following, but did not help
curl -XPUT localhost:9200/test/ -d '
{
"settings" : {
"analysis" : {
"filter" : {
"email" : {
"type" : "pattern_capture",
"preserve_original" : 1,
"patterns" : [
"([^#]+)",
"(\\p{L}+)",
"(\\d+)",
"#(.+)"
]
}
},
"analyzer" : {
"email" : {
"tokenizer" : "uax_url_email",
"filter" : [ "email", "lowercase", "unique" ]
}
}
}
}
}
'
You have created a custom analyzer for email addresses but you are not using it. You need to declare the email field in your mapping type to actually use that analyzer, like below. Also make sure to create the right index with that analyzer, i.e. blog and not test
change this
|
v
curl -XPUT localhost:9200/blog/ -d '{
"settings" : {
"analysis" : {
"filter" : {
"email" : {
"type" : "pattern_capture",
"preserve_original" : 1,
"patterns" : [
"([^#]+)",
"(\\p{L}+)",
"(\\d+)",
"#(.+)"
]
}
},
"analyzer" : {
"email" : {
"tokenizer" : "uax_url_email",
"filter" : [ "email", "lowercase", "unique" ]
}
}
}
},
"mappings": { <--- add this
"post": {
"properties": {
"email": {
"type": "string",
"analyzer": "email"
}
}
}
}
}
'
i have a batch of "smartphones" products in my ES and I need to query them by using "smart phone" text. So I m looking into the compound word token filter. Specifically , I m planning to use a custom filter like this:
curl -XPUT 'localhost:9200/_all/_settings -d '
{
"analysis" : {
"analyzer":{
"second":{
"type":"custom",
"tokenizer":"standard",
"filter":["myFilter"]
}
"filter": {
"myFilter" :{
"type" : "dictionary_decompounder"
"word_list": ["smart", "phone"]
}
}
}
}
}
'
Is this the correct approach ? Also I d like to ask you how can i create and add the custom analyser to ES? I looked into several links but couldn't figure out how to do it. I guess I m looking for the correct syntax.
Thank you
EDIT
I m running 1.4.5 version.
and I verified that the custom analyser was added successfully:
{
"test_index" : {
"settings" : {
"index" : {
"creation_date" : "1453761455612",
"analysis" : {
"filter" : {
"myFilter" : {
"type" : "dictionary_decompounder",
"word_list" : [ "smart", "phone" ]
}
},
"analyzer" : {
"second" : {
"type" : "custom",
"filter" : [ "lowercase", "myFilter" ],
"tokenizer" : "standard"
}
}
},
"number_of_shards" : "5",
"number_of_replicas" : "1",
"version" : {
"created" : "1040599"
},
"uuid" : "xooKEdMBR260dnWYGN_ZQA"
}
}
}
}
Your approach looks good, I would also consider adding lowercase token filter, so that even Smartphone (notice Uppercase 'S') will be split into smart and phone.
Then You could create index with analyzer like this,
curl -XPUT 'localhost:9200/your_index -d '
{
"settings": {
"analysis": {
"analyzer": {
"second": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"myFilter"
]
}
},
"filter": {
"myFilter": {
"type": "dictionary_decompounder",
"word_list": [
"smart",
"phone"
]
}
}
}
},
"mappings": {
"my_type": {
"properties": {
"name": {
"type": "string",
"analyzer": "second"
}
}
}
}
}
'
Here you are creating index named your_index, custom analyzer named second and applied that to name field.
You can check if the analyzer is working as expected with analyze api like this
curl -XGET 'localhost:9200/your_index/_analyze' -d '
{
"analyzer" : "second",
"text" : "LG Android smartphone"
}'
Hope this helps!!
I just installed and testing elastic search it looks great and i need to know some thing i have an configuration file
elasticsearch.json in config directory
{
"network" : {
"host" : "127.0.0.1"
},
"index" : {
"number_of_shards": 3,
"number_of_replicas": 1,
"refresh_interval" : "2s",
"analysis" : {
"analyzer" : {
"index_analyzer" : {
"tokenizer" : "nGram",
"filter" : ["lowercase"]
},
"search_analyzer" : {
"tokenizer" : "nGram",
"filter" : ["lowercase"]
}
},
"// you'll need lucene dep for this: filter" : {
"snowball": {
"type" : "snowball",
"language" : "English"
}
}
}
}
}
and i have inserted an doc that contains a word searching if i search for keyword
search it says nothing found...
wont it stem before indexing or i missed some thing in config ....
How looks your query?
your config does not look good. try:
...
"index_analyzer" : {
"tokenizer" : "nGram",
"filter" : ["lowercase", "snowball"]
},
"search_analyzer" : {
"tokenizer" : "nGram",
"filter" : ["lowercase", "snowball"]
}
},
"filter" : {
"snowball": {
"type" : "snowball",
"language" : "English"
}
}
I've had trouble overriding the "default_search" and "default_index" analyzer as well.
This works though.
You can add "index_analyzer" to default all string fields with unspecified analyzers within a type, if need be.
curl -XDELETE localhost:9200/twitter
curl -XPOST localhost:9200/twitter -d '
{"index":
{ "number_of_shards": 1,
"analysis": {
"filter": {
"snowball": {
"type" : "snowball",
"language" : "English"
}
},
"analyzer": { "a2" : {
"type":"custom",
"tokenizer": "standard",
"filter": ["lowercase", "snowball"]
}
}
}
}
}
}'
curl -XPUT localhost:9200/twitter/tweet/_mapping -d '{
"tweet" : {
"date_formats" : ["yyyy-MM-dd", "dd-MM-yyyy"],
"properties" : {
"user": {"type":"string"},
"message" : {"type" : "string", "analyzer":"a2"}
}
}}'
curl -XPUT http://localhost:9200/twitter/tweet/1 -d '{ "user": "kimchy", "post_date": "2009-11-15T13:12:00", "message": "Trying out searching teaching, so far so good?" }'
curl -XGET localhost:9200/twitter/tweet/_search?q=message:search
curl -XGET localhost:9200/twitter/tweet/_search?q=message:try