Elasticsearch equivalent to SelectMany - elasticsearch

I have a nested type in the Elasticsearch index:
class Category
{
public string Name;
public Category Child;
public Guid[] ProductIds;
}
For example:
Category1
|_Category2
|_Category3
|_Category4
Category3 and Category4 have 2 products each.
What is the right NEST query to get all 4 product ids by searching "Category3"?
I imaging something like this, but can't actually get it work:
var response = await elasticClient.SearchAsync<Category>(s => s
.StoredFields(sf => sf
.Fields(f => f.ProductIds)
)
.Query(q => q
.Nested(n => n
.Path(p => p.Child)
.Query(mu => mu
.Bool(b => b
.Must(m => m
.Match(m => m
.Query("Category Name")
)
)
)
)
)
)
)

One of the options could be to create a sort of breadcrumb hierarchy column and use anlyzer & facet to search over it.
So index can have similar settings for column & analyzer
"btb_breadcrumb": {
"type": "text",
"fields": {
"facet": {
"type": "text",
"analyzer": "custom_path_tree",
"fielddata": "true"
}
}
},
"settings" : {
"analysis": {
"analyzer": {
"english_exact": {
"tokenizer": "standard",
"filter": [
"lowercase"
]
},
"custom_path_tree": {
"tokenizer": "custom_hierarchy"
},
"custom_path_tree_reversed": {
"tokenizer": "custom_hierarchy_reversed"
}
},
"tokenizer": {
"custom_hierarchy": {
"type": "path_hierarchy",
"delimiter": ">"
},
"custom_hierarchy_reversed": {
"type": "path_hierarchy",
"delimiter": ">",
"reverse": "true"
}
}
}
}
and then you can search on this column like this
{
"bool" :
{
"must" :
[
{"match" : {"btb_breadcrumb.facet" : "catid1>catid2>"}}
]
}
}
now this will give you all items with catid2 and all its child categories. Similarly if you want to get all items that fall under any children of catid1, then you can use following query.
{
"bool" :
{
"must" :
[
{"match" : {"btb_breadcrumb.facet" : "catid1>"}}
]
}
}

Related

Convert DSL Query to NEST.net

I have the following mapping, I am trying to rewrite it using NEST but can't seem to get it quite right yet.
PUT company
{
"settings": {
"index": {
"analysis": {
"filter": {},
"analyzer": {
"keyword_analyzer": {
"filter": [
"lowercase",
"asciifolding",
"trim"
],
"char_filter": [],
"type": "custom",
"tokenizer": "keyword"
},
"edge_ngram_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "edge_ngram_tokenizer"
},
"edge_ngram_search_analyzer": {
"tokenizer": "lowercase"
}
},
"tokenizer": {
"edge_ngram_tokenizer": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 5,
"token_chars": [
"letter"
]
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"companyId": {
"type": "keyword"
},
"companyName": {
"type": "text",
"fields": {
"keywordstring": {
"type": "text",
"analyzer": "keyword_analyzer"
},
"edgengram": {
"type": "text",
"analyzer": "edge_ngram_analyzer",
"search_analyzer": "edge_ngram_search_analyzer"
},
"completion": {
"type": "completion",
"contexts": [
{
"name": "companyGroupId",
"type": "category",
"path": "companyGroupId"
}
]
}
},
"analyzer": "standard"
},
"companyTradingName": {
"type": "text",
"index": false
},
"companyGroupId": {
"type": "keyword"
},
"agencyId": {
"type": "keyword"
}
}
}
}
}
This is to where I got, I still need to set the keywordstring and edgengram to the CompanyName field. But not sure how to add the those Analyzers
MODEL
[ElasticsearchType(Name = "company")]
public class CompanyDocument : IDocument
{
public long CompanyId { get; set; }
public CompletionField CompanyName { get; set; } // <-- I think this needs to be set as an CompletionField
public string CompanyTradingName { get; set; }
public long CompanyGroupId { get; set; }
public long AgencyId { get; set; }
}
QUERY
var response1 = this.Client.CreateIndex(
"testing_testing",
index => index
.Mappings(
ms => ms.Map<CompanyDocument>(
m => m.Properties(
p => p
.Keyword(t => t.Name(n => n.AgencyId))
.Keyword(t => t.Name(n => n.CompanyGroupId))
.Keyword(t => t.Name(n => n.CompanyId))
.Text(t => t.Name(n => n.CompanyName)
.Fields(ff =>
ff.Completion(descriptor => descriptor.Contexts(contextsDescriptor => contextsDescriptor.Category(contextDescriptor => contextDescriptor.Name("CompanyGroupId").Path("CompanyGroupId"))))))
.Text(t => t.Name(n => n.CompanyTradingName).Index(false)))))
.Settings(
f => f.Analysis(analysis => analysis
.Analyzers(analyzers => analyzers
.Custom("keyword_analyzer", _ => _.Filters("lowercase", "asciifolding", "trim").CharFilters().Tokenizer("keyword"))
.Custom("edge_ngram_analyzer", _ => _.Filters("lowercase").Tokenizer("edge_ngram_tokenizer")).Custom("edge_ngram_search_analyzer", _ => _.Tokenizer("lowercase")))
.Tokenizers(
descriptor => descriptor.EdgeNGram("edge_ngram_tokenizer", t => t.MinGram(2).MaxGram(5).TokenChars(TokenChar.Letter))))));
UPDATE
GET company/_search
{
"suggest": {
"company-suggest": {
"prefix": "trans",
"completion": {
"field": "companyName.completion",
"fuzzy": {
"fuzziness": 1
}
}
}
}
}
Rather than trying to re-write it into NEST, I decided to just use the low-level client.
So i created a json file and just reading the contents and then just using the following
var response = this.Client.LowLevel.IndicesCreate<CompanyResponse>("index-name", PostData.Bytes(bytes));
return response.ApiCall.Success;
This seems a lot easier than trying to recreate the DSL query

Get available apartments query

Overview
I have apartments which have reservations. My index has the reservations as nested fields with date fields for start_date and end_date.
I'm using the chewy ruby gem - but this doesn't matter at this time i think. Just need to get my query right.
Goal
I want to fetch all available apartments which have no reservation at the given date or no reservations at all.
Current query
Unfortunately returns all apartments:
:query => {
:bool => {
:must_not => [
{
:range => {:"reservations.start_date" => {:gte => "2017-02-10"}}
},
{
:range => {:"reservations.end_date" => {:lte => "2017-02-12"}}
}
]
}
}
Index Settings
{
"apartments" : {
"aliases" : { },
"mappings" : {
"apartment" : {
"properties" : {
"city" : {
"type" : "string"
},
"coordinates" : {
"type" : "geo_point"
},
"email" : {
"type" : "string"
},
"reservations" : {
"type" : "nested",
"properties" : {
"end_date" : {
"type" : "date",
"format" : "yyyy-MM-dd"
},
"start_date" : {
"type" : "date",
"format" : "yyyy-MM-dd"
}
}
},
"street" : {
"type" : "string"
},
"zip" : {
"type" : "string"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1487289727161",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "-rM79OUvQ3qkkLJmQCsoCg",
"version" : {
"created" : "2040499"
}
}
},
"warmers" : { }
}
}
We have to list free apartments and those apartment that will be available in the desired period (start_date, end_date variables)
So it should be a or query: free_aparments or available_aparments
The free apartments (those that haven't any value in reservations field) should be easy to query with a missing filter, but this is a nested field and we have to deal with.
If we perform the query with a missing filter all docs will be returned. It's weird but it happens. Here there's the explained solution: https://gist.github.com/Erni/7484095 and here is the issue: https://github.com/elastic/elasticsearch/issues/3495 The gist snnipet works with all elasticsearch versions.
The other part of the or query are available apartments. I've solved this part performing a not query. Return me those apartments that NOT have a reservation, thought a list of range that match with those aparments that do have a reservation and then negate the result using must_not filter
elasticsearch_query = {
"query": {
"filtered": {
"filter": {
"bool": {
"should": [
{
"nested": {
"filter": {
"bool": {
"must_not" : [
{
"range": {
"start_date": {
"gte" : start_date,
"lt" :end_date
}
}
},
{
"range": {
"end_date": {
"gte" : end_date,
#"lte" :end_date
}
}
}
]
}
},
"path": "reservations"
}
},
{
#{ "missing" : { "field" : "reservations"} }
"not": {
"nested": {
"path": "reservations",
"filter": {
"match_all": {}
}
}
}
}
],
}
}
},
},
"sort" : {"id":"desc"}
}
You can have a look to my solution in this notebook
I've created and example, populating a sample index and searching for desired apartments with this query
Comments answers:
Prefix: Since nested filter is performed setting path will be queried, prefix is no needed at all (at least in my tested version). And yes, you can add a field names start_date at document level or at another nested field
Apartment matches: Yes, it matches with 91 sample apartments, but since I did a search with default size parameter, only 10 are returned (I didn't specified its value, its default value). If you need to get ALL of them, use a scroll search
(notebook has been modified to clarify this points)
First of all, I think you must use the nested query.
I am not familiar with chewy-gem but the query would look something like:
:query => {
:nested: => {
:path: => "reservations",
:query => {
:bool => {
:must_not => [
{
:range => {:"reservations.start_date" => {:gte => "2017-02-10"}}
},
{
:range => {:"reservations.end_date" => {:lte => "2017-02-12"}}
}
]
}
}
}
}
But it might also not work as if there is a reservation in 2018, the fisrt bool query will be true (as the start date will be > 2017-02-10), therefore the appartment will not be returned, if I'm correct.
I would do something like:
:query => {
:nested: => {
:path: => "reservations",
:query => {
:bool => {
:must_not => [
{
:range => {:"reservations.start_date" => {:gte => "2017-02-10", :lte => "2017-02-12"}}
},
{
:range => {:"reservations.end_date" => {:gte => "2017-02-10", :lte => "2017-02-12"}}
}
]
}
}
}
}
which means no start date beetween the range you want, no end date beetween the range you want.
This is the query I came up with which is supposed to take into account all conditions, namely:
either there are no reservations (1st top-level bool/should)
or there are at least one reservation and the reservation start and end dates do not overlap with the requested dates.
Here, we're asking for free apartments between 2017-02-10 and 2017-02-12
{
"bool": {
"minimum_should_match": 1,
"should": [
{
"nested": {
"path": "reservations",
"query": {
"bool": {
"must_not": {
"exists": {
"field": "reservations.start_date"
}
}
}
}
}
},
{
"bool": {
"must": [
{
"nested": {
"path": "reservations",
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"range": {
"reservations.start_date": {
"gt": "2017-02-10"
}
}
},
{
"range": {
"reservations.end_date": {
"lt": "2017-02-10"
}
}
}
]
}
}
}
},
{
"nested": {
"path": "reservations",
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"range": {
"reservations.start_date": {
"gt": "2017-02-12"
}
}
},
{
"range": {
"reservations.end_date": {
"lt": "2017-02-12"
}
}
}
]
}
}
}
}
]
}
}
]
}
}

Elastic search cluster level analyzer

How can I define one custom analyzer that will be used in more than one index (in a cluster level)? All the examples I can find shows how to create a custom analyzer on a specific index.
My analyzer for example:
PUT try_index
{
"settings": {
"analysis": {
"filter": {
"od_synonyms": {
"type": "synonym",
"synonyms": [
"dog, cat => animal",
"john, lucas => boy",
"emma, kate => girl"
]
}
},
"analyzer": {
"od_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"od_synonyms"
]
}
}
}
},
"mappings": {
"record": {
"properties": {
"name": {
"type": "string",
"analyzer":"standard",
"search_analyzer": "od_analyzer"
}
}
}
}
}
Any idea how to change my analyzer scope to cluster level?
thanks
There is no "scope" for analyzers. But you can do something similar with index templates:
PUT /_template/some_name_here
{
"template": "a*",
"order": 0,
"settings": {
"analysis": {
"filter": {
"od_synonyms": {
"type": "synonym",
"synonyms": [
"dog, cat => animal",
"john, lucas => boy",
"emma, kate => girl"
]
}
},
"analyzer": {
"od_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"od_synonyms"
]
}
}
}
}
}
And at "template" you should put the name of the indices that this template should be applied to when the index is created. You could very well specify "*" and matching all the indices. I think that's the best you can do for what you want.

How do I give whole words priority in elasticsearch?

Elasticsearch is running well for me at the moment, however I want to give whole words priority over Ngrams.
I've tried the following:
client.indices.create index: index,
body: {
mappings: {
search_variable: {
properties: {
"name" => {
"type" => "string",
"index" => "not_analyzed"
},
"label" => {
"type" => "string",
"index" => "not_analyzed"
},
"value_labels" => {
"type" => "string",
"index" => "not_analyzed"
},
"value_label_search_string" => {
"type" => "string",
"index" => "not_analyzed"
},
"search_text" => {
"type" => "multi_field",
"fields" => {
"whole_words" => {"type" => "string", "analyzer" => "simple"},
"ngram" => {"type" => "string", "analyzer" => "ngram", "search_analyzer" => "ngram_search"}
}
}
}
},
settings: {
analysis: {
filter: {
ngram: {
type: 'nGram',
min_gram: 3,
max_gram: 25
}
},
analyzer: {
ngram: {
tokenizer: 'whitespace',
filter: ['lowercase', 'stop', 'ngram'],
type: 'custom'
},
ngram_search: {
tokenizer: 'whitespace',
filter: ['lowercase', 'stop'],
type: 'custom'
}
}
}
}
}
}
This is the part relevant to my full text search field: search_text:
"search_text" => {
"type" => "multi_field",
"fields" => {
"whole_words" => {"type" => "string", "analyzer" => "simple"},
"ngram" => {"type" => "string", "analyzer" => "ngram", "search_analyzer" => "ngram_search"}
}
}
I want to give higher scores to items that match whole words in the search text.
[400] {"error":{"root_cause":[{"type":"mapper_parsing_exception","reason":"analyzer [ngram_search] not found for field [ngram]"}],"type":"mapper_parsing_exception","reason":"Failed to parse mapping [search_variable]: analyzer [ngram_search] not found for field [ngram]","caused_by":{"type":"mapper_parsing_exception","reason":"analyzer [ngram_search] not found for field [ngram]"}},"status":400}
Here's the error:
"reason":"analyzer [ngram_search] not found for field [ngram]"
What am I doing wrong?
Edit:
Here is my query, where I tried to match on whole words only for now, and I only get 0 results every time.
search_query = {
index: index,
body: {
_source: {
exclude: ["search_text", "survey_id"]
},
query: {
:bool => {
:filter => {
:term => {"survey_id" => 12}
},
:must => {
:match => {
"search_text.whole_words" => {"query" => "BMW", "operator" => "and"}
}
}
}
}
}
}
result = client.search(search_query)
Here is the output of:
curl -XGET localhost:9200/yg_search_variables
{"yg_search_variables":{"aliases":{},"mappings":{"search_variable":{"properties":{"label":{"type":"string","index":"not_analyzed"},"name":{"type":"string","index":"not_analyzed"},"search_text":{"type":"string","index":"no","fields":{"ngram":{"type":"string","analyzer":"ngram","search_analyzer":"ngram_search"},"whole_words":{"type":"string","analyzer":"simple"}}},"value_label_search_string":{"type":"string","index":"not_analyzed"},"value_labels":{"type":"string","index":"not_analyzed"}}},"variables":{"properties":{"category":{"type":"string"},"label":{"type":"string","index":"not_analyzed"},"name":{"type":"string","index":"not_analyzed"},"search_text":{"type":"string","index":"no"},"survey_id":{"type":"long"},"value_label_search_text":{"type":"string"},"value_labels":{"properties":{"0":{"type":"string"},"1":{"type":"string"},"10":{"type":"string"},"100":{"type":"string"},"101":{"type":"string"},"102":{"type":"string"},"103":{"type":"string"},"104":{"type":"string"},"105":{"type":"string"},"106":{"type":"string"},"107":{"type":"string"},"108":{"type":"string"},"109":{"type":"string"},"11":{"type":"string"},"110":{"type":"string"},"1100":{"type":"string"},"1101":{"type":"string"},"1102":{"type":"string"},"1103":{"type":"string"},"1104":{"type":"string"},"1105":{"type":"string"},"1106":{"type":"string"},"1107":{"type":"string"},"1108":{"type":"string"},"1109":{"type":"string"},"111":{"type":"string"},"1110":{"type":"string"},"1111":{"type":"string"},"1112":{"type":"string"},"1113":{"type":"string"},"1114":{"type":"string"},"112":{"type":"string"},"113":{"type":"string"},"114":{"type":"string"},"115":{"type":"string"},"116":{"type":"string"},"117":{"type":"string"},"118":{"type":"string"},"119":{"type":"string"},"12":{"type":"string"},"120":{"type":"string"},"121":{"type":"string"},"122":{"type":"string"},"123":{"type":"string"},"124":{"type":"string"},"125":{"type":"string"},"126":{"type":"string"},"127":{"type":"string"},"128":{"type":"string"},"129":{"type":"string"},"13":{"type":"string"},"130":{"type":"string"},"131":{"type":"string"},"132":{"type":"string"},"133":{"type":"string"},"134":{"type":"string"},"135":{"type":"string"},"136":{"type":"string"},"137":{"type":"string"},"138":{"type":"string"},"139":{"type":"string"},"14":{"type":"string"},"140":{"type":"string"},"141":{"type":"string"},"142":{"type":"string"},"143":{"type":"string"},"144":{"type":"string"},"145":{"type":"string"},"146":{"type":"string"},"147":{"type":"string"},"148":{"type":"string"},"149":{"type":"string"},"15":{"type":"string"},"150":{"type":"string"},"151":{"type":"string"},"152":{"type":"string"},"153":{"type":"string"},"154":{"type":"string"},"155":{"type":"string"},"156":{"type":"string"},"157":{"type":"string"},"158":{"type":"string"},"159":{"type":"string"},"16":{"type":"string"},"160":{"type":"string"},"161":{"type":"string"},"162":{"type":"string"},"163":{"type":"string"},"164":{"type":"string"},"165":{"type":"string"},"166":{"type":"string"},"167":{"type":"string"},"168":{"type":"string"},"169":{"type":"string"},"17":{"type":"string"},"170":{"type":"string"},"171":{"type":"string"},"172":{"type":"string"},"173":{"type":"string"},"174":{"type":"string"},"175":{"type":"string"},"176":{"type":"string"},"177":{"type":"string"},"178":{"type":"string"},"179":{"type":"string"},"18":{"type":"string"},"180":{"type":"string"},"181":{"type":"string"},"182":{"type":"string"},"183":{"type":"string"},"184":{"type":"string"},"185":{"type":"string"},"186":{"type":"string"},"187":{"type":"string"},"188":{"type":"string"},"189":{"type":"string"},"19":{"type":"string"},"190":{"type":"string"},"191":{"type":"string"},"192":{"type":"string"},"193":{"type":"string"},"194":{"type":"string"},"195":{"type":"string"},"196":{"type":"string"},"197":{"type":"string"},"198":{"type":"string"},"199":{"type":"string"},"2":{"type":"string"},"20":{"type":"string"},"200":{"type":"string"},"201":{"type":"string"},"202":{"type":"string"},"203":{"type":"string"},"204":{"type":"string"},"205":{"type":"string"},"206":{"type":"string"},"207":{"type":"string"},"208":{"type":"string"},"209":{"type":"string"},"21":{"type":"string"},"210":{"type":"string"},"211":{"type":"string"},"22":{"type":"string"},"23":{"type":"string"},"24":{"type":"string"},"25":{"type":"string"},"26":{"type":"string"},"27":{"type":"string"},"28":{"type":"string"},"29":{"type":"string"},"3":{"type":"string"},"30":{"type":"string"},"301":{"type":"string"},"302":{"type":"string"},"303":{"type":"string"},"304":{"type":"string"},"305":{"type":"string"},"306":{"type":"string"},"307":{"type":"string"},"308":{"type":"string"},"309":{"type":"string"},"31":{"type":"string"},"310":{"type":"string"},"311":{"type":"string"},"312":{"type":"string"},"313":{"type":"string"},"314":{"type":"string"},"315":{"type":"string"},"316":{"type":"string"},"317":{"type":"string"},"32":{"type":"string"},"33":{"type":"string"},"34":{"type":"string"},"35":{"type":"string"},"36":{"type":"string"},"37":{"type":"string"},"38":{"type":"string"},"39":{"type":"string"},"4":{"type":"string"},"40":{"type":"string"},"41":{"type":"string"},"42":{"type":"string"},"43":{"type":"string"},"44":{"type":"string"},"45":{"type":"string"},"46":{"type":"string"},"47":{"type":"string"},"48":{"type":"string"},"49":{"type":"string"},"5":{"type":"string"},"50":{"type":"string"},"51":{"type":"string"},"52":{"type":"string"},"53":{"type":"string"},"54":{"type":"string"},"55":{"type":"string"},"554":{"type":"string"},"555":{"type":"string"},"556":{"type":"string"},"56":{"type":"string"},"57":{"type":"string"},"58":{"type":"string"},"59":{"type":"string"},"6":{"type":"string"},"60":{"type":"string"},"601":{"type":"string"},"602":{"type":"string"},"603":{"type":"string"},"604":{"type":"string"},"61":{"type":"string"},"62":{"type":"string"},"63":{"type":"string"},"64":{"type":"string"},"65":{"type":"string"},"66":{"type":"string"},"666":{"type":"string"},"667":{"type":"string"},"67":{"type":"string"},"68":{"type":"string"},"69":{"type":"string"},"7":{"type":"string"},"70":{"type":"string"},"71":{"type":"string"},"72":{"type":"string"},"73":{"type":"string"},"74":{"type":"string"},"75":{"type":"string"},"76":{"type":"string"},"77":{"type":"string"},"777":{"type":"string"},"78":{"type":"string"},"79":{"type":"string"},"8":{"type":"string"},"80":{"type":"string"},"801":{"type":"string"},"802":{"type":"string"},"803":{"type":"string"},"804":{"type":"string"},"805":{"type":"string"},"806":{"type":"string"},"807":{"type":"string"},"808":{"type":"string"},"809":{"type":"string"},"81":{"type":"string"},"810":{"type":"string"},"811":{"type":"string"},"812":{"type":"string"},"813":{"type":"string"},"814":{"type":"string"},"815":{"type":"string"},"816":{"type":"string"},"817":{"type":"string"},"818":{"type":"string"},"819":{"type":"string"},"82":{"type":"string"},"820":{"type":"string"},"821":{"type":"string"},"822":{"type":"string"},"83":{"type":"string"},"84":{"type":"string"},"85":{"type":"string"},"86":{"type":"string"},"87":{"type":"string"},"88":{"type":"string"},"888":{"type":"string"},"89":{"type":"string"},"9":{"type":"string"},"90":{"type":"string"},"901":{"type":"string"},"902":{"type":"string"},"903":{"type":"string"},"904":{"type":"string"},"905":{"type":"string"},"906":{"type":"string"},"907":{"type":"string"},"908":{"type":"string"},"909":{"type":"string"},"91":{"type":"string"},"910":{"type":"string"},"911":{"type":"string"},"912":{"type":"string"},"913":{"type":"string"},"914":{"type":"string"},"915":{"type":"string"},"916":{"type":"string"},"917":{"type":"string"},"918":{"type":"string"},"919":{"type":"string"},"92":{"type":"string"},"920":{"type":"string"},"921":{"type":"string"},"922":{"type":"string"},"923":{"type":"string"},"924":{"type":"string"},"925":{"type":"string"},"926":{"type":"string"},"927":{"type":"string"},"928":{"type":"string"},"93":{"type":"string"},"94":{"type":"string"},"95":{"type":"string"},"96":{"type":"string"},"97":{"type":"string"},"98":{"type":"string"},"99":{"type":"string"},"997":{"type":"string"},"998":{"type":"string"},"999":{"type":"string"}}}}}},"settings":{"index":{"creation_date":"1457103857764","analysis":{"filter":{"ngram":{"type":"nGram","min_gram":"3","max_gram":"25"}},"analyzer":{"ngram":{"filter":["lowercase","stop","ngram"],"type":"custom","tokenizer":"whitespace"},"ngram_search":{"filter":["lowercase","stop"],"type":"custom","tokenizer":"whitespace"}}},"number_of_shards":"5","number_of_replicas":"1","uuid":"zPN2LDfCTFqPleW7d5nkwA","version":{"created":"2020099"}}},"warmers":{}}}%
It seems strange that index is no:
"search_text": {
"type": "string",
"index": "no",
"fields": {
"ngram": {
"type": "string",
"analyzer": "ngram",
"search_analyzer": "ngram_search"
},
"whole_words": {
"type": "string",
"analyzer": "simple"
}
}
}
Edit: Here is a sample matching document for the term "Ford":
{
"name"=>"car_ownership",
"label"=>"Customer: Ford",
"category"=>["Vehicles", "Passenger Vehicles"], "value"=>nil,
"value_labels"=>{"1"=>"Yes", "2"=>"No"},
"node_id"=>14813,
"survey_id" => 12,
"search_text" => "Customer Ford Vehicles Passenger Vehicles Yes No"
}
Edit: I have added a smaller beginning to end test case that can be found here, which replicates the error.
https://www.dropbox.com/s/wwxm3qe0oxc2z5y/Slimmed%20ElasticSearch%20Text%20%281%29.html?dl=0
The first issue is that settings is not properly nested when you create your index. settings and mappings should be at the same level.
Then, looking at your dropbox file, I think the issue is that the mapping type is called search_variable while in your bulk you are using the mapping type test_type. Hence the mapping will never be applied.

bidirectional match on elasticsearch

I've indexed a list of terms and now I want to query for some of them
Say that I've indexed 'dog food','red dog','dog','food','cats'
How do I create an exact bidirectional match query. ie: I want when search for 'dog' to get only the term dog and not the other terms (because they don't match back).
One primitive solution I thought of is indexing the terms with their length (Words-wise) and then when searching query with lengh X restrict it to the terms of length X. but it seems over complicated.
Create a custom analyzer to lowercase and normalize your search terms. So that would be your index:
{
"settings" : {
"analysis" : {
"analyzer" : {
"my_analyzer_keyword" : {
"type" : "custom",
"tokenizer" : "keyword",
"filter" : [
"asciifolding",
"lowercase"
]
}
}
}
},
"mappings" : {
"your_type" : {
"properties" : {
"name" : {
"type" : "string",
"analyzer" : "my_analyzer_keyword"
}
}
}
}
}
So if you have indexed 'dog' and users types in Dog or dog or DOG, it will match only dog, 'dog food' won't be brought back.
Just set your field's index property to not_analyzed and your query should use term filter to search for text.
As per Evaldas' suggestion, find below a more complete solution, that also keeps the original value indexed with standard analyzer but uses a sub-field with a lowercased version of the terms:
PUT /test
{
"settings": {
"analysis": {
"analyzer": {
"my_keyword_lowercase_analyzer": {
"type": "custom",
"filter": [
"lowercase"
],
"tokenizer": "keyword"
}
}
}
},
"mappings": {
"asset": {
"properties": {
"name": {
"type": "string",
"fields": {
"case_ignore": {
"type": "string",
"analyzer": "my_keyword_lowercase_analyzer"
}
}
}
}
}
}
}
POST /test/asset/1
{
"name":"dog"
}
POST /test/asset/2
{
"name":"dog food"
}
POST /test/asset/3
{
"name":"red dog"
}
GET /test/asset/_search
{
"query": {
"match": {
"name.case_ignore": "Dog"
}
}
}

Resources