Elasticsearch Similar Text Query - elasticsearch

Given the following documents in an index (lets call it addresses):
{
ADDRESS: {
ID: 1,
LINE1: "steet 1",
CITY: "kuala lumpur",
COUNTRY: "MALAYSIA",
...
}
}
{
ADDRESS: {
ID: 2,
LINE1: "steet 1",
CITY: "kualalumpur city",
COUNTRY: "MALAYSIA",
...
}
}
{
ADDRESS: {
ID: 3,
LINE1: "steet 1",
CITY: "kualalumpur",
COUNTRY: "MALAYSIA",
...
}
}
{
ADDRESS: {
ID: 4,
LINE1: "steet 1",
CITY: "kuala lumpur city",
COUNTRY: "MALAYSIA",
...
}
}
At this point, I found the query to grab "kualalumpur", "kuala lumpur", "kualalumpur city" with the search text "kualalumpur".
But "kuala lumpur city" is missing from the result despite near similarity with "kualalumpur city".
Here is my query so far:
{
"query": {
"bool": {
"should": [
{"match": {"ADDRESS.STREET": {"query": "street 1", "fuzziness": 1, "operator": "AND"}}},
{
"bool": {
"should": [
{"match": {"ADDRESS.CITY": {"query": "kualalumpur", "fuzziness": 1, "operator": "OR"}}},
{"match": {"ADDRESS.CITY.keyword": {"query": "kualalumpur", "fuzziness": 1, "operator": "OR"}}}
]
}
}
],
"filter": {
"bool": {
"must": [
{"term": {"ADDRESS.COUNTRY.keyword": "MALAYSIA"}}
]
}
},
"minimum_should_match": 2
}
}
}
Given the condition, is it possible at all for Elasticsearch to return all four documents with search text "kualalumpur"?

You can use edge-n gram tokenizer on the country field to get the all four docs, tried it in my local and adding below working example.
Create custom analyzer and apply it on your field
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"ngram_analyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "edgeNGramTokenizer"
}
},
"tokenizer": {
"edgeNGramTokenizer": {
"token_chars": [
"letter",
"digit"
],
"min_gram": "1",
"type": "edgeNGram",
"max_gram": "40"
}
}
},
"max_ngram_diff": "50"
}
},
"mappings": {
"properties": {
"country": {
"type": "text",
"analyzer" : "ngram_analyzer"
}
}
}
}
Index your all four sample docs, like below
{
"country" : "kuala lumpur"
}
search query with term kualalumpur matches all four docs
{
"query": {
"match" : {
"country" : "kualalumpur"
}
}
}
"hits": [
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "3",
"_score": 5.0003963,
"_source": {
"country": "kualalumpur"
}
},
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "2",
"_score": 4.4082437,
"_source": {
"country": "kualalumpur city"
}
},
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "1",
"_score": 0.5621849,
"_source": {
"country": "kuala lumpur"
}
},
{
"_index": "fuzzy",
"_type": "_doc",
"_id": "4",
"_score": 0.4956103,
"_source": {
"country": "kuala lumpur city"
}
}
]

Related

Elastic search does not return proper result

I have some tours as listed below.
Memorable Istanbul
Memorable Athens City Escape
Memorable Rome City Escape
Memorable Paris
I have created an index in elasticsearch as described below.
string newName = indexName + DateTime.Now.Ticks;
CreateIndexDescriptor createIndexDescriptor = new CreateIndexDescriptor(newName)
.Settings(s => s
.NumberOfShards(4)
.NumberOfReplicas(2)
.Setting("max_result_window", int.MaxValue)
.Setting("max_ngram_diff", 25)
.Analysis(a => a
.Analyzers(aa => aa
.Custom("my_analyzer", ca => ca
.Tokenizer("standart")
.Filters("lowercase", "my_ascii_folding", "my_stemm", "stop")
)
)
.TokenFilters(t => t
.AsciiFolding("my_ascii_folding", af => af.PreserveOriginal(true))
.Stemmer("my_stemm", sm => sm.Language("english"))
//.EdgeNGram("key_edgengram", ng => ng.MinGram(4).MaxGram(10))
)
.Tokenizers(t => t.Standard("standart"))
)
)
.Map<T>(mm => mm.AutoMap().Properties(p => p.SearchAsYouType(t => t.Name(n => n.SearchingArea).Analyzer("my_analyzer"))));
MemoryStream stream = new MemoryStream();
elasticClient.RequestResponseSerializer.Serialize(createIndexDescriptor, stream);
string jsonData = Encoding.UTF8.GetString(stream.ToArray());
var indexResponse = elasticClient.Indices.Create(createIndexDescriptor);
when I try to search "memor" or "memorable" it returns all of them. but I trt to search "memorab" or "memorabl" it does not return any result.
Here is my search query:
GET /tourIndex/_search
{
"from": 0,
"highlight": {
"fields": {
"searchingArea": {},
"route": {}
},
"post_tags": [
"</strong>"
],
"pre_tags": [
"<strong style='background: #efefef; padding: 3px;'>"
]
},
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"match": {
"searchingArea": {
"query": "memorab"
}
}
}
]
}
},
"size": 20,
"sort": [
{
"score": {
"order": "desc"
}
}
]
}
The result was same when I use "match_phrase_prefix" and "match_phrase".
Thanks for any help.
Adding a working example
Index Mapping:
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer",
"filter": "lowercase"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "edge_ngram",
"min_gram": 4,
"max_gram": 15,
"token_chars": [
"letter",
"digit"
]
}
}
},
"max_ngram_diff": 25
},
"mappings": {
"properties": {
"title": {
"type": "text",
"analyzer": "my_analyzer"
}
}
}
}
Index Data:
{
"title": "Memorable Istanbul"
}
{
"title": "Memorable Athens City Escape"
}
{
"title": "Memorable Rome City Escape"
}
{
"title": "Memorable Paris"
}
Search Query:
{
"query": {
"match": {
"title": "memorab"
}
}
}
Search Result:
"hits": [
{
"_index": "66262920",
"_type": "_doc",
"_id": "4",
"_score": 0.47070155,
"_source": {
"title": "Memorable Paris"
}
},
{
"_index": "66262920",
"_type": "_doc",
"_id": "1",
"_score": 0.41747034,
"_source": {
"title": "Memorable Istanbul"
}
},
{
"_index": "66262920",
"_type": "_doc",
"_id": "3",
"_score": 0.41747034,
"_source": {
"title": "Memorable Rome City Escape"
}
},
{
"_index": "66262920",
"_type": "_doc",
"_id": "2",
"_score": 0.38820273,
"_source": {
"title": "Memorable Athens City Escape"
}
}
]

Elasticsearch query to return part of words searched for

I would like to know how I can return "thanks" or "thanking" if I search for "thank"
Currently I have a multi-match query which returns only occurrences of "thank" like "thank you" but not "thanksgiving" or "thanks". I am using ElasticSearch 7.9.1
query: {
bool: {
must: [
{match: {accountId}},
{
multi_match: {
query: "thank",
type: "most_fields",
fields: ["text", "address", "description", "notes", "name"],
}
}
],
filter: {match: {type: "personaldetails"}}
}
},
Also is it possible to combine the multimatch query with a queryString on one of the fields (say description, where I would do a querystring search only on description and a phrase match on other fields)
{ "query": {
"query_string": {
"query": "(new york city) OR (big apple)",
"default_field": "content"
}
}
}
Any input is appreciated.
thanks
You can use edge_ngrma tokenizer that first breaks text down into
words whenever it encounters one of a list of specified characters,
then it emits N-grams of each word where the start of the N-gram is
anchored to the beginning of the word.
Adding a working example with index data, mapping, search query, and search result
Index Mapping:
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "my_tokenizer"
}
},
"tokenizer": {
"my_tokenizer": {
"type": "edge_ngram",
"min_gram": 5,
"max_gram": 20,
"token_chars": [
"letter",
"digit"
]
}
}
},
"max_ngram_diff": 50
},
"mappings": {
"properties": {
"notes": {
"type": "text",
"analyzer": "my_analyzer",
"search_analyzer": "standard" // note this
}
}
}
}
Index Data:
{
"notes":"thank"
}
{
"notes":"thank you"
}
{
"notes":"thanks"
}
{
"notes":"thanksgiving"
}
Search Query:
{
"query": {
"multi_match" : {
"query": "thank",
"fields": [ "notes", "name" ]
}
}
}
Search Result:
"hits": [
{
"_index": "65511630",
"_type": "_doc",
"_id": "1",
"_score": 0.1448707,
"_source": {
"notes": "thank"
}
},
{
"_index": "65511630",
"_type": "_doc",
"_id": "3",
"_score": 0.1448707,
"_source": {
"notes": "thank you"
}
},
{
"_index": "65511630",
"_type": "_doc",
"_id": "2",
"_score": 0.12199639,
"_source": {
"notes": "thanks"
}
},
{
"_index": "65511630",
"_type": "_doc",
"_id": "4",
"_score": 0.06264679,
"_source": {
"notes": "thanksgiving"
}
}
]
To combine multi-match query with query string, use the below query:
{
"query": {
"bool": {
"must": {
"multi_match": {
"query": "thank",
"fields": [
"notes",
"name"
]
}
},
"should": {
"query_string": {
"query": "(new york city) OR (big apple)",
"default_field": "content"
}
}
}
}
}

Elasticsearch Filter - AND/OR behaviour

I have this query where I am searching for all documents which match type: location and then applying a filter on the result using exact match on postalCode and countryCode but a prefix on the address.
The filter works fine and behaves as an AND condition i.e all 3 matches. How can I achieve an OR condition in the filter? With the OR condition - It should return results even if one filter matches.
Elasticsearch version - 7.9
GET index/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"type": "location"
}
}
],
"filter": [
{
"term": {
"postalCode": "12345"
}
},
{
"prefix": {
"address": "555"
}
},
{
"term": {
"countryCode": "US"
}
}
]
}
}
}
You can use a combination of bool should clause inside the filter clause.
Adding a working example with index data,search query, and search result
Index Data:
{
"postalCode": "12345",
"address": "555",
"countryCode": "US",
"type":"location"
}
{
"postalCode": "9",
"address": "555",
"countryCode": "US",
"type":"location"
}
{
"postalCode": "9",
"address": "4",
"countryCode": "US",
"type":"location"
}
{
"postalCode": "9",
"address": "4",
"countryCode": "AK",
"type":"location"
}
Search Query:
{
"query": {
"bool": {
"must": [
{
"match": {
"type": "location"
}
}
],
"filter": [
{
"bool": {
"should": [
{
"term": {
"postalCode": "12345"
}
},
{
"prefix": {
"address": "555"
}
},
{
"term": {
"countryCode.keyword": "US"
}
}
],
"minimum_should_match":1
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "65192559",
"_type": "_doc",
"_id": "2",
"_score": 0.10536051,
"_source": {
"postalCode": "9",
"address": "555",
"countryCode": "US",
"type": "location"
}
},
{
"_index": "65192559",
"_type": "_doc",
"_id": "1",
"_score": 0.10536051,
"_source": {
"postalCode": "12345",
"address": "555",
"countryCode": "US",
"type": "location"
}
},
{
"_index": "65192559",
"_type": "_doc",
"_id": "3",
"_score": 0.10536051,
"_source": {
"postalCode": "9",
"address": "4",
"countryCode": "US",
"type": "location"
}
}
]

elasticsearch filter on nested array

lets say records have city field as an array of city names.
records ex:
record 1:
{
cities : [
{name: city1},
{name : city2},
{name : city3}
]
}
record 2:
{
cities : [
{name: city2},
{name : city3},
{name : city4}
]
}
record 3:
{
cities : [
{name: city3},
{name : city4},
{name : city5}
]
}
requirement:
My filter criteria is to fetch the records matches with city1 or city2 or city3 but since the record 1 matches all 3 it should come first and record 2 matches 2 so it should come 2nd and record 3 matches only one so it should come last.
You don't have to use the nested data-type as you don't have the nested properties or complex object, its very simple and easy to achieve.
Working example
Index mapping
{
"mappings": {
"properties": {
"cities": {
"type": "text"
}
}
}
}
Index sample docs
{
"cities": [
"tel-aviv", "bangalore", "sf"
]
}
{
"cities": [
"tel-aviv"
]
}
{
"cities": [
"sf"
]
}
Search query
{
"query": {
"bool": {
"should": [
{
"match": {
"cities": "tel-aviv"
}
},
{
"match": {
"cities": "bangalore"
}
},
{
"match": {
"cities": "sf"
}
}
]
}
}
}
And search result with proper expected result and score
"hits": [
{
"_index": "cities",
"_type": "_doc",
"_id": "1",
"_score": 1.850198,
"_source": {
"cities": [
"tel-aviv",
"bangalore",
"sf"
]
}
},
{
"_index": "cities",
"_type": "_doc",
"_id": "2",
"_score": 0.9983525,
"_source": {
"cities": [
"tel-aviv"
]
}
},
{
"_index": "cities",
"_type": "_doc",
"_id": "3",
"_score": 0.6133945,
"_source": {
"cities": [
"sf"
]
}
}
]
Adding another answer with nested bool queries:
Index Mapping:
{
"mappings": {
"properties":{
"Cities": {
"type": "nested",
"dynamic": "true"
}
}}
}
Index Data:
{
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "Hyderabad"
},
{
"id": 3,
"city": "Delhi"
}
]
}
{
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "abc"
},
{
"id": 3,
"city": "Def"
}
]
}
Search Query:
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "Cities",
"query": {
"bool": {
"must": [
{
"match": {
"Cities.city": "Bangalore"
}
}
]
}
}
}
},
{
"nested": {
"path": "Cities",
"query": {
"bool": {
"must": [
{
"match": {
"Cities.city": "Hyderabad"
}
}
]
}
}
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "nested-63806067",
"_type": "_doc",
"_id": "1",
"_score": 3.297317, <-- note this
"_source": {
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "Hyderabad"
},
{
"id": 3,
"city": "Delhi"
}
]
}
},
{
"_index": "nested-63806067",
"_type": "_doc",
"_id": "2",
"_score": 1.6486585, <-- note this
"_source": {
"Cities": [
{
"id": 1,
"city": "Bangalore"
},
{
"id": 2,
"city": "abc"
},
{
"id": 3,
"city": "Def"
}
]
}
}
]

Elasticsearch : search results on clicking on Hashtag

I have a hashtag with tags in camel Case like #teamIndia. Now when this hashtag is clicked, it should fetch all results which have "#teamIndia" in it, It should first show results with "#teamIndia", then results with "teamIndia" and then "team India" and then "team" or "India" and so on.
What I am doing:
Search text:
"#teamIndia", "#NEWYORK", "#profession", "#2016"
POST /clip
{
"settings": {
"analysis": {
"char_filter" : {
"space_hashtags" : {
"type" : "mapping",
"mappings" : ["#=>|#"]
}
},
"filter": {
"substring": {
"max_gram": "20",
"type": "nGram",
"min_gram": "1",
"token_chars": [
"whitespace"
]
},
"camelcase": {
"type": "word_delimiter",
"type_table": ["# => ALPHANUM", "# => ALPHANUM"]
},
"stopword": {
"type": "stop",
"stopwords": ["and", "is", "the"]
}
},
"analyzer": {
"substring_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "standard"
},
"camelcase_analyzer": {
"type" : "custom",
"char_filter" : "space_hashtags",
"tokenizer" : "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"description": {
"type": "string",
"analyzer": "substring_analyzer",
"search_analyzer": "standard"
},
"raw": {
"type": "string",
"index": "not_analyzed"
},
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer"
}
}
},
....
}
}
}
}
Docs example :-
POST /clip/Clip/2 {"id" : 1, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/3 {"id" : 2, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/3 {"id" : 2, "description" : "Know how a software engineer surprised his wife! <a href="search/clips?q=%23theProvider&source=hashtag" ng-click="handleModalClick()"> #theProvider </a> rioOlympic <a href="search/clips?q=%23DUBAI&source=hashtag" ng-click="handleModalClick()"> #DUBAI </a> <a href="search/clips?q=%23TheBestAndTheBeast&source=hashtag" ng-click="handleModalClick()"> #TheBestAndTheBeast </a> <a href="search/clips?q=%23rioOlympic&source=hashtag" ng-click="handleModalClick()"> #rioOlympic </a>"}
** Search Query **
GET clip/_search
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must":
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "teamIndia"
}
},
"should": {
"match":
{ "description.raw": "#teamIndia"}
}
}
}
}
}
}
Excepted Result:
"#teamIndia",
"teamIndia",
"team India",
"team",
"India",
and similar for other test keywords.
One of the reasons the query in the original post does not work as intended is because description.raw is not_analyzed .
As a result #teamIndia would never match a document with description: "Animals and Pets and #teamIndia" since the description.raw would contain
the non-analyzed term Animals and Pets and #teamIndia and not #teamIndia
Assuming that the documents you have are like 2nd example in the OP.
Example:
{"id" : 2, "description" : "Animals and Pets and #teamIndia"}
OR
{"id":7,"description":"This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"}
Then you should be able to rank documents in the following order :
1) description containing "#teamIndia",
2) description containing "teamIndia"
3) description containing "team India"
4) description containing "India"
by enabling preserve_orginal and catenate_words in the wordlimiter filter as shown in the example below
Example:
Index Documents
PUT clip
{
"settings": {
"analysis": {
"char_filter": {
"zwsp_normalize": {
"type": "mapping",
"mappings": [
"\\u200B=>",
"\\u200C=>",
"\\u200D=>"
]
},
"html_decoder": {
"type": "mapping",
"mappings": [
"<=> <",
">=> >"
]
}
},
"filter": {
"camelcase": {
"type": "word_delimiter",
"preserve_original": "true",
"catenate_all": "true"
},
"stopword": {
"type": "stop",
"stopwords": [
"and",
"is",
"the"
]
}
},
"analyzer": {
"camelcase_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"camelcase",
"lowercase",
"stopword"
],
"char_filter": [
"zwsp_normalize",
"html_decoder",
"html_strip"
]
}
}
}
},
"mappings": {
"Clip": {
"properties": {
"description": {
"type": "multi_field",
"fields": {
"hashtag": {
"type": "string",
"index": "analyzed",
"analyzer": "camelcase_analyzer",
"norms": {
"enabled": false
}
}
}
}
}
}
}
}
POST /clip/Clip/1
{
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
POST /clip/Clip/2
{
"id": 2,
"description": "Animals and Pets and teamIndia"
}
POST /clip/Clip/3
{
"id": 3,
"description": "Animals and Pets and team India"
}
POST /clip/Clip/4
{
"id": 4,
"description": "Animals and Pets and India"
}
POST /clip/Clip/7
{
"id": 7,
"description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"
}
Query Result:
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#teamIndia"
}
}
]
}
}
}
}
}
Results:
"hits": {
"total": 5,
"max_score": 1.4969246,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 1.4969246,
"_source": {
"id": 7,
"description": "This <a href="search/clips?q=%23teamIndia&source=hashtag">#teamIndia</a>"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "1",
"_score": 1.4969246,
"_source": {
"id": 1,
"description": "Animals and Pets and #teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "2",
"_score": 1.0952718,
"_source": {
"id": 2,
"description": "Animals and Pets and teamIndia"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "3",
"_score": 0.5207714,
"_source": {
"id": 3,
"description": "Animals and Pets and team India"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "4",
"_score": 0.11123338,
"_source": {
"id": 4,
"description": "Animals and Pets and India"
}
}
]
}
Example #dubai:
POST /clip/Clip/5
{
"id": 5,
"description": "#dubai is hot"
}
POST /clip/Clip/6
{
"id": 6,
"description": "dubai airport is huge"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#dubai"
}
}
]
}
}
}
}
}
"hits": {
"total": 2,
"max_score": 1.820827,
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "5",
"_score": 1.820827,
"_source": {
"id": 5,
"description": "#dubai is hot"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "6",
"_score": 0.5856731,
"_source": {
"id": 6,
"description": "dubai airport is huge"
}
}
]
}
Example #professionalAndPunctual :
POST /clip/Clip/7
{
"id": 7,
"description": "professionalAndPunctual"
}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#professionalAndPunctual"
}
}
]
}
}
}
}
}
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "7",
"_score": 2.2149992,
"_source": {
"id": 7,
"description": "professionalAndPunctual"
}
}
]
Edited Example
Example: #TheBestAndTheBea‌​st
POST /clip/Clip/10
{"id" : 10, "description" : "TheBestAndTheBeast"}
POST /clip/Clip/11
{"id" :11, "description" : "bikes in DUBAI TheBestAndTheBeast profession"}
POST /clip/Clip/12
{"id" : 12, "description" : "Know how a software engineer surprised his wife! #theProvider rioOlympic #DUBAI #TheBestAndTheBeast #rioOlympic "}
POST clip/_search?search_type=dfs_query_then_fetch
{
"size": 100,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"query_string": {
"fields": [
"description.hashtag"
],
"query": "#TheBestAndTheBeast"
}
}
]
}
}
}
}
}
#Results
"hits": [
{
"_index": "clip",
"_type": "Clip",
"_id": "12",
"_score": 1.8701664,
"_source": {
"id": 12,
"description": "Know how a software engineer surprised his wife! #theProvider rioOlympic #DUBAI #TheBestAndTheBeast #rioOlympic "
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "10",
"_score": 0.9263139,
"_source": {
"id": 10,
"description": "TheBestAndTheBeast"
}
},
{
"_index": "clip",
"_type": "Clip",
"_id": "11",
"_score": 0.9263139,
"_source": {
"id": 11,
"description": "bikes in DUBAI TheBestAndTheBeast profession"
}
}
]
Analyzer Example :
get clip/_analyze?analyzer=camelcase_analyzer&text=%23DUBAI
{
"tokens": [
{
"token": "#dubai",
"start_offset": 0,
"end_offset": 6,
"type": "word",
"position": 0
},
{
"token": "dubai",
"start_offset": 1,
"end_offset": 6,
"type": "word",
"position": 0
}
]
}
get clip/_analyze?analyzer=camelcase_analyzer&text=This%20%26lt%3Ba%20href%3D%26quot%3Bsearch%2Fclips%3Fq%3D%2523teamIndia%26amp%3Bsource%3Dhashtag%26quot%3B%26gt%3B%23teamIndia%26lt%3B%2Fa%26gt%3B
{
"tokens": [
{
"token": "this",
"start_offset": 0,
"end_offset": 4,
"type": "word",
"position": 0
},
{
"token": "#teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 1
},
{
"token": "india",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "team",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
},
{
"token": "teamindia",
"start_offset": 78,
"end_offset": 98,
"type": "word",
"position": 2
}
]
}

Resources