How to query fields with path_hierarchy analyzer in elasticsearch? - elasticsearch

I have configured path_analyzer in elasticsearch using below configuration.
PUT /elastic_course
{
"settings": {
"analysis": {
"analyzer": {
"path_analyzer": {
"tokenizer": "path_tokenizer"
},
"reverse_path_analyzer": {
"tokenizer": "path_tokenizer"
}
},
"tokenizer": {
"path_tokenizer": {
"type": "path_hierarchy",
"delimiter": "/",
"replacement": "-"
},
"reverse_path_tokenizer": {
"type": "path_hierarchy",
"delimiter": "/",
"replacement": "-"
}
}
}
},
"mappings": {
"book" : {
"properties": {
"author": {
"type": "string",
"index": "not_analyzed"
},
"genre": {
"type": "string",
"index": "not_analyzed"
},
"score": {
"type": "double"
},
"synopsis": {
"type": "string",
"index":"analyzed",
"analyzer":"english"
},
"title": {
"type": "string"
},
"path":{
"type":"string",
"analyzer":"path_analyzer",
"fields": {
"reverse": {
"type":"string",
"analyzer":"reverse_path_analyzer"
}
}
}
}
}
}
}
Now I have inserted four documents where path values are :
/angular/structural
/angular/structural/directives
/angular/structural/components
/angular/logistics
Now I want to query my index such as :
it will retrieve only children of structural.
It will return all the leaf nodes i.e. components , directives and logistics.
I tried running below query but it retrieves all the records.
POST elastic_course/book/_search
{
"query": {
"regexp": {
"path.":"/structural"
}
}
}
any help?
Thanks.

Related

Elasticsearch Synonym search using wordnet not working

I tried to debug my synonym search .it seems like when i use wornet format and use the wn_s.pl file it doesn't work, but when i use a custom synonym.txt file then it works.Please let me know where i am doing wrong.please find my below index:
{
"settings": {
"index": {
"analysis": {
"filter": {
"synonym": {
"type": "synonym",
"format": "wordnet",
"synonyms_path": "analysis/wn_s.pl"
}
},
"analyzer": {
"synonym": {
"tokenizer": "standard",
"filter": ["lowercase",
"synonym"
]
}
},
"mappings": {
"properties": {
"firebaseId": {
"type": "text"
},
"name": {
"fielddata": true,
"type": "text",
"analyzer": "standard"
},
"name_auto": {
"type": "text"
},
"category_name": {
"type": "text",
"analyzer": "synonym"
},
"sku": {
"type": "text"
},
"price": {
"type": "text"
},
"magento_id": {
"type": "text"
},
"seller_id": {
"type": "text"
},
"square_item_id": {
"type": "text"
},
"square_variation_id": {
"type": "text"
},
"typeId": {
"type": "text"
}
}
}
}
}
}
}
I am trying to do synonym search on category_name ,i have items like shoes and dress etc .when i search for boots,flipflop or slipper nothing comes.
here is my query search:
{
"query": {
"match": {
"category_name": "flipflop"
}
}
}
Your wordnet synonym format is not correct. Please have a look here
For a fast implementation please look at the synonyms.json

ElasticSearch: analyzer on field must be set when search_analyzer is set

I have read about previous version of ES (< 2) where the "token_analyzer" key needs to be changed to "analyzer". But no matter what I do I am still getting this error:
"type": "mapper_parsing_exception",
"reason": "analyzer on field [email] must be set when search_analyzer is set"
Here is what I am passing into ES via a PUT function when I get the error:
{
"settings": {
"analysis": {
"analyzer": {
"my_email_analyzer": {
"type": "custom",
"tokenizer": "uax_url_email",
"filter": ["lowercase", "stop"]
}
}
}
},
"mappings" : {
"uuser": {
"properties": {
"email": {
"type": "text",
"search_analyzer": "my_email_analyzer",
"fields": {
"email": {
"type": "text",
"analyzer": "my_email_analyzer"
}
}
},
"facebookId": {
"type": "text"
},
"name": {
"type": "text"
},
"profileImageUrl": {
"type": "text"
},
"signupDate": {
"type": "date"
},
"username": {
"type": "text"
}
,
"phoneNumber": {
"type": "text"
}
}
}
}
}
Any ideas what is wrong?
Because you have specified a search_analyzer for the field, you also have to specify the analyzer to be used at indexing time. For example, add this line under where you specify the search_analyzer:
"analyzer": "standard",
To give you this:
{
"settings": {
"analysis": {
"analyzer": {
"my_email_analyzer": {
"type": "custom",
"tokenizer": "uax_url_email",
"filter": ["lowercase", "stop"]
}
}
}
},
"mappings" : {
"uuser": {
"properties": {
"email": {
"type": "text",
"search_analyzer": "my_email_analyzer",
"analyzer": "standard",
"fields": {
"email": {
"type": "text",
"analyzer": "my_email_analyzer"
}
}
},
"facebookId": {
"type": "text"
},
"name": {
"type": "text"
},
"profileImageUrl": {
"type": "text"
},
"signupDate": {
"type": "date"
},
"username": {
"type": "text"
}
,
"phoneNumber": {
"type": "text"
}
}
}
}
}
See also: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-analyzer.html

elasticsearch mapping exception when using dynamic templates

hi i am using elasticsearch to index some documents. but the documents will have some fileds like goal1Completion, goal2Completion....goal100Completion. so i was trying to do mapping with dynamic Templates. so i came up with following but it throws an error:
{
"mappings": {
"date": {
"properties": {
"sessions": {
"type": "long"
},
"viewId": {
"type": "string",
"index": "not_analyzed"
},
"webPropertyId": {
"type": "string",
"index": "not_analyzed"
},
"dynamic_templates": [
{
"goalCompletions": {
"match_pattern": "regex",
"match": "goal\\d+\\w+",
"mapping": {
"type": "long"
}
}
}
]
}
}
}
}
error:"reason": "Expected map for property [fields] on field [dynamic_templates] but got a class java.lang.String"
what could be thee problem here?
You need to pull dynamic_template from properties map.
{
"mappings": {
"date": {
"properties": {
"sessions": {
"type": "long"
},
"viewId": {
"type": "string",
"index": "not_analyzed"
},
"webPropertyId": {
"type": "string",
"index": "not_analyzed"
}
},
"dynamic_templates": [ <--- Pull this out of properties
{
"goalCompletions": {
"match_pattern": "regex",
"match": "goal\\d+\\w+",
"mapping": {
"type": "long"
}
}
}
]
}
}
}

Matching closest ancestor with Path Hierarchy Tokenizer

I've got an Elasticsearch v5 index set up for mapping config hashes to URLs.
{
"settings": {
"analysis": {
"analyzer": {
"url-analyzer": {
"type": "custom",
"tokenizer": "url-tokenizer"
}
},
"tokenizer": {
"url-tokenizer": {
"type": "path_hierarchy",
"delimiter": "/"
}
}
}
},
"mappings": {
"route": {
"properties": {
"uri": {
"type": "string",
"index": "analyzed",
"analyzer": "url-analyzer"
},
"config": {
"type": "object"
}}}}}
I would like to match the longest path prefix with the highest score, so that given the documents
{ "uri": "/trousers/", "config": { "foo": 1 }}
{ "uri": "/trousers/grey", "config": { "foo": 2 }}
{ "uri": "/trousers/grey/lengthy", "config": { "foo": 3 }}
when I search for /trousers, the top result should be trousers, and when I search for /trousers/grey/short the top result should be /trousers/grey.
Instead, I'm finding that the top result for /trousers is /trousers/grey/lengthy.
How can I index and query my documents to achieve this?
I have one solution, after drinking on it: what if we treat the URI in the index as a keyword, but still use the PathHierarchyTokenizer on the search input?
Now we store the following docs:
/trousers
/trousers/grey
/trousers/grey/lengthy
When we submit a query for /trousers/grey/short, the search_analyzer can build the input [trousers, trousers/grey, trousers/grey/short].
The first two of our documents will match, and we can trivially select the longest match using a custom sort.
Now our mapping document looks like this:
{
"settings": {
"analysis": {
"analyzer": {
"uri-analyzer": {
"type": "custom",
"tokenizer": "keyword"
},
"uri-query": {
"type": "custom",
"tokenizer": "uri-tokenizer"
}
},
"tokenizer": {
"uri-tokenizer": {
"type": "path_hierarchy",
"delimiter": "/"
}
}
}},
"mappings": {
"route": {
"properties": {
"uri": {
"type": "text",
"fielddata": true,
"analyzer": "uri-analyzer",
"search_analyzer": "uri-query"
},
"config": {
"type": "object"
}
}
}
}
}
```
and our query looks like this:
{
"sort": {
"_script": {
"script": "doc.uri.length",
"order": "asc",
"type": "number"
}
},
"query": {
"match": {
"uri": {
"query": "/trousers/grey/lengthy",
"type": "boolean"
}
}
}
}

elasticsearch "having not" query

Some documents has category fields.. Some of these docs has category fields its value equals to "-1". I need a query return documents which have category fields and "not equal to -1".
I tried this:
GET webproxylog/_search
{
"query": {
"filtered": {
"filter": {
"not":{
"filter": {"and": {
"filters": [
{"term": {
"category": "-1"
}
},
{
"missing": {
"field": "category"
}
}
]
}}
}
}
}
}
}
But not work.. returns docs not have "category field"
EDIT
Mapping:
{
"webproxylog": {
"mappings": {
"accesslog": {
"properties": {
"category": {
"type": "string",
"index": "not_analyzed"
},
"clientip": {
"type": "string",
"index": "not_analyzed"
},
"clientmac": {
"type": "string",
"index": "not_analyzed"
},
"clientname": {
"type": "string",
"index": "not_analyzed"
},
"duration": {
"type": "long"
},
"filetype": {
"type": "string",
"index": "not_analyzed"
},
"hierarchycode": {
"type": "string",
"index": "not_analyzed"
},
"loggingdate": {
"type": "date",
"format": "dateOptionalTime"
},
"reqmethod": {
"type": "string",
"index": "not_analyzed"
},
"respsize": {
"type": "long"
},
"resultcode": {
"type": "string",
"index": "not_analyzed"
},
"url": {
"type": "string",
"analyzer": "slash_analyzer"
},
"user": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
If your category field is string and is analyzed by default, then your -1 will be indexed as 1 (stripping the minus sign).
You will need that field to be not_analyzed or to add a sub-field which is not analyzed (as my solution below).
Something like this:
DELETE test
PUT /test
{
"mappings": {
"test": {
"properties": {
"category": {
"type": "string",
"fields": {
"notAnalyzed": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
}
}
POST /test/test/1
{"category": "-1"}
POST /test/test/2
{"category": "2"}
POST /test/test/3
{"category": "3"}
POST /test/test/4
{"category": "4"}
POST /test/test/5
{"category2": "-1"}
GET /test/test/_search
{
"query": {
"bool": {
"must_not": [
{
"term": {
"category.notAnalyzed": {
"value": "-1"
}
}
},
{
"filtered": {
"filter": {
"missing": {
"field": "category"
}
}
}
}
]
}
}
}

Resources