Elasticsearch Facet Tokenization - elasticsearch

I am using terms facet to get top terms in the elasticsearch server. Now my tags "indian-government" is not treated as one tag. Its treated as "indian" "government". And so, the most used tag is "indian". How can I fix this? Should I change tokenization?
'settings': {
'analysis': {
'analyzer': {
'my_ngram_analyzer' : {
'tokenizer' : 'my_ngram_tokenizer',
'filter': ['my_synonym_filter']
}
},
'filter': {
'my_synonym_filter': {
'type': 'synonym',
'format': 'wordnet',
'synonyms_path': 'analysis/wn_s.pl'
}
},
'tokenizer' : {
'my_ngram_tokenizer' : {
'type' : 'nGram',
'min_gram' : '1',
'max_gram' : '50'
}
}
}
}
Edit: Based on comments, indexing as follows. The results do not change though:
es.indices.create(
index="article-index",
body={
'settings': {
'analysis': {
'analyzer': {
'my_ngram_analyzer' : {
'tokenizer' : 'my_ngram_tokenizer',
'filter': ['my_synonym_filter']
}
},
'filter': {
'my_synonym_filter': {
'type': 'synonym',
'format': 'wordnet',
'synonyms_path': 'analysis/wn_s.pl'
}
},
'tokenizer' : {
'my_ngram_tokenizer' : {
'type' : 'nGram',
'min_gram' : '1',
'max_gram' : '50'
}
}
}
},
'mappings': {
'my_mapping_type': {
'_all': {
'enabled': False
},
'_source': {
'compressed': True
},
'properties': {
"tags": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
},
# ignore already existing index
ignore=400
)
EDIT: Solved. my_mapping_type has to be replaced by doc_type (in my case, its 'article') and it works :)

Making field not_analysed should work if it fits your requirement.
curl -XPUT localhost:9200/INDEX -d '{
"settings": {
"number_of_shards": 5,
"number_of_replicas": 2
},
"mappings": {
"my_type": {
"_all": {
"enabled": false
},
"_source": {
"compressed": true
},
"properties": {
"tag": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}'

Related

elasticsearch nested query returns only last 3 results

We have the following elasticsearch mapping
{
index: 'data',
body: {
settings: {
analysis: {
analyzer: {
lowerCase: {
tokenizer: 'whitespace',
filter: ['lowercase']
}
}
}
},
mappings: {
// used for _all field
_default_: {
index_analyzer: 'lowerCase'
},
entry: {
properties: {
id: { type: 'string', analyzer: 'lowerCase' },
type: { type: 'string', analyzer: 'lowerCase' },
name: { type: 'string', analyzer: 'lowerCase' },
blobIds: {
type: 'nested',
properties: {
id: { type: 'string' },
filename: { type: 'string', analyzer: 'lowerCase' }
}
}
}
}
}
}
}
and a sample document that looks like the following:
{
"id":"5f02e9dae252732912749e13",
"type":"test_type",
"name":"test_name",
"creationTimestamp":"2020-07-06T09:07:38.775Z",
"blobIds":[
{
"id":"5f02e9dbe252732912749e18",
"filename":"test1.csv"
},
{
"id":"5f02e9dbe252732912749e1c",
"filename":"test2.txt"
},
// removed in-between elements for simplicity
{
"id":"5f02e9dbe252732912749e1e",
"filename":"test3.csv"
},
{
"id":"5f02e9dbe252732912749e58",
"filename":"test4.txt"
},
{
"id":"5f02e9dbe252732912749e5a",
"filename":"test5.csv"
},
{
"id":"5f02e9dbe252732912749e5d",
"filename":"test6.txt"
}
]
}
I have the following ES query which is querying documents for a certain timerange based on the creationTimestamp field and then filtering the nested field blobIds based on a user query, that should match the blobIds.filename field.
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"range": {
"creationTimestamp": {
"gte": "2020-07-01T09:07:38.775Z",
"lte": "2020-07-07T09:07:40.147Z"
}
}
},
{
"nested": {
"path": [
"blobIds"
],
"query": {
"query_string": {
"fields": [
"blobIds.filename"
],
"query": "*"
}
},
// returns the actual blobId hit
// and not the whole array
"inner_hits": {}
}
},
{
"query": {
"query_string": {
"query": "+type:*test_type* +name:*test_name*"
}
}
}
]
}
}
}
},
"sort": [
{
"creationTimestamp": {
"order": "asc"
},
"id": {
"order": "asc"
}
}
]
}
The above entry is clearly matching the query. However, it seems like there is something wrong with the returned inner_hits, since I always get only the last 3 blobIds elements instead of the whole array that contains 24 elements, as can be seen below.
{
"name": "test_name",
"creationTimestamp": "2020-07-06T09:07:38.775Z",
"id": "5f02e9dae252732912749e13",
"type": "test_type",
"blobIds": [
{
"id": "5f02e9dbe252732912749e5d",
"filename": "test4.txt"
},
{
"id": "5f02e9dbe252732912749e5a",
"filename": "test5.csv"
},
{
"id": "5f02e9dbe252732912749e58",
"filename": "test6.txt"
}
]
}
I find it very strange since I'm only doing a simple * query.
Using elasticsearch v1.7 and cannot update at the moment

How to setup location as a geo_point in elasticsearch?

I've been running into this issue where I get failed to find geo_point field [location]
Here is my flow.
Import csv
input {
file {
path => "test.csv"
start_position => "beginning"
sincedb_path => "/dev/null"
}
}
filter {
csv {
separator => ","
#zip,lat, lon
columns => [ "zip" , "lat", "lon"]
}
mutate {
convert => { "zip" => "integer" }
convert => { "lon" => "float" }
convert => { "lat" => "float" }
}
mutate {
rename => {
"lon" => "[location][lon]"
"lat" => "[location][lat]"
}
}
mutate { convert => { "[location]" => "float" } }
}
output {
elasticsearch {
hosts => "cluster:80"
index => "data"
}
stdout {}
}
Test records
GET data
"hits": [
{
"_index": "data",
"_type": "logs",
"_id": "AVvQcOfXUojnX",
"_score": 1,
"_source": {
"zip": 164283216,
"location": {
"lon": 71.34,
"lat": 40.12
}
}
},
...
If I try to run a geo_distance query I get failed to find geo_point field [location]
Then I try to run
PUT data
{
"mappings": {
"location": {
"properties": {
"pin": {
"properties": {
"location": {
"type": "geo_point"
}
}
}
}
}
}
}
but I get index [data/3uxAJ4ISKy_NyVDNC] already exists
How to I convert location into a geo_point so I can run the query on it?
edit:
I tried planting a template before i index anything, but still same errors
PUT _template/template
{
"template": "base_map_template",
"order": 1,
"settings": {
"number_of_shards": 1
},
"mappings": {
"node_points": {
"properties": {
"location": {
"type": "geo_point"
}
}
}
}
}
You need to name your template data instead of base_map_template since this is how your index is named. Also the type name needs to be logs instead of node_points:
PUT _template/template
{
"template": "data", <--- change this
"order": 1,
"settings": {
"number_of_shards": 1
},
"mappings": {
"logs": { <--- and this
"properties": {
"location": {
"type": "geo_point"
}
}
}
}
}

Strange results when querying nested objects

Elasticsearch version: 2.3.3
Plugins installed: no plugin
JVM version: 1.8.0_91
OS version: Linux version 3.19.0-56-generic (Ubuntu 4.8.2-19ubuntu1)
I get strange results when I query nested objects on multiple paths. I want to search for all female with dementia. And there are matched patients among the results. But I also get other diagnoses I'm not looking for, the diagnoses related to these patients.
For example, I also get the following diagnoses despite the fact that I looked only for dementia.
Mental disorder, not otherwise specified
Essential (primary) hypertension
Why is that?
I want to get only female with dementia and don't want other diagnoses.
Client_Demographic_Details contains one document per patient. Diagnosis contains multiple documents per patient. The ultimate goal is to index my whole data from PostgreSQL DB (72 tables, over 1600 columns in total) into Elasticsearch.
Query:
{'query': {
'bool': {
'must': [
{'nested': {
'path': 'Diagnosis',
'query': {
'bool': {
'must': [{'match_phrase': {'Diagnosis.Diagnosis': {'query': "dementia"}}}]
}
}
}},
{'nested': {
'path': 'Client_Demographic_Details',
'query': {
'bool': {
'must': [{'match_phrase': {'Client_Demographic_Details.Gender_Description': {'query': "female"}}}]
}
}
}}
]
}
}}
Results:
{
"hits": {
"hits": [
{
"_score": 3.4594634,
"_type": "Patient",
"_id": "72",
"_source": {
"Client_Demographic_Details": [
{
"Gender_Description": "Female",
"Patient_ID": 72,
}
],
"Diagnosis": [
{
"Diagnosis": "F00.0 - Dementia in Alzheimer's disease with early onset",
"Patient_ID": 72,
},
{
"Patient_ID": 72,
"Diagnosis": "F99.X - Mental disorder, not otherwise specified",
},
{
"Patient_ID": 72,
"Diagnosis": "I10.X - Essential (primary) hypertension",
}
]
},
"_index": "denorm1"
}
],
"total": 6,
"max_score": 3.4594634
},
"_shards": {
"successful": 5,
"failed": 0,
"total": 5
},
"took": 8,
"timed_out": false
}
Mapping:
{
"denorm1" : {
"aliases" : { },
"mappings" : {
"Patient" : {
"properties" : {
"Client_Demographic_Details" : {
"type" : "nested",
"properties" : {
"Patient_ID" : {
"type" : "long"
},
"Gender_Description" : {
"type" : "string"
}
}
},
"Diagnosis" : {
"type" : "nested",
"properties" : {
"Patient_ID" : {
"type" : "long"
},
"Diagnosis" : {
"type" : "string"
}
}
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1473974457603",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "Jo9cI4kRQjeWcZ7WMB6ZAw",
"version" : {
"created" : "2030399"
}
}
},
"warmers" : { }
}
}
Try this
{
"_source": {
"exclude": [
"Client_Demographic_Details",
"Diagnosis"
]
},
"query": {
"bool": {
"must": [
{
"nested": {
"path": "Diagnosis",
"query": {
"bool": {
"must": [
{
"match_phrase": {
"Diagnosis.Diagnosis": {
"query": "dementia"
}
}
}
]
}
},
"inner_hits": {}
}
},
{
"nested": {
"path": "Client_Demographic_Details",
"query": {
"bool": {
"must": [
{
"match_phrase": {
"Client_Demographic_Details.Gender_Description": {
"query": "female"
}
}
}
]
}
},
"inner_hits": {}
}
}
]
}
}
}
Matched doc on nested will be inside inner hits and rest in source.
i know it's not a concrete approach
As #blackmamba suggested, I constructed mapping with Client_Demographic_Details as root object and Diagnosis as a nested object.
Mapping:
{
"denorm2" : {
"aliases" : { },
"mappings" : {
"Patient" : {
"properties" : {
"BRC_ID" : {
"type" : "long"
},
"Diagnosis" : {
"type" : "nested",
"properties" : {
"BRC_ID" : {
"type" : "long"
},
"Diagnosis" : {
"type" : "string"
}
}
},
"Gender_Description" : {
"type" : "string"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1474031740689",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "fMeKa6sfThmxkg_281WdHA",
"version" : {
"created" : "2030399"
}
}
},
"warmers" : { }
}
}
Query:
I added source filtering and highlight.
{
'_source': {
'exclude': ['Diagnosis'],
'include': ['BRC_ID', 'Gender_Description']
},
'highlight': {
'fields': {
'Gender_Description': {}
}
},
'query': {
'bool': {
'must': [
{'nested': {
'path': 'Diagnosis',
'query': {
'bool': {
'must': [{'match_phrase': {'Diagnosis.Diagnosis': {'query': "dementia"}}}]
}
},
'inner_hits': {
'highlight': {
'fields': {
'Diagnosis.Diagnosis': {}
}
},
'_source': ['BRC_ID', 'Diagnosis']
}
}},
{'match_phrase': {'Gender_Description': {'query': "female"}}}
]
}
}}

How do I give whole words priority in elasticsearch?

Elasticsearch is running well for me at the moment, however I want to give whole words priority over Ngrams.
I've tried the following:
client.indices.create index: index,
body: {
mappings: {
search_variable: {
properties: {
"name" => {
"type" => "string",
"index" => "not_analyzed"
},
"label" => {
"type" => "string",
"index" => "not_analyzed"
},
"value_labels" => {
"type" => "string",
"index" => "not_analyzed"
},
"value_label_search_string" => {
"type" => "string",
"index" => "not_analyzed"
},
"search_text" => {
"type" => "multi_field",
"fields" => {
"whole_words" => {"type" => "string", "analyzer" => "simple"},
"ngram" => {"type" => "string", "analyzer" => "ngram", "search_analyzer" => "ngram_search"}
}
}
}
},
settings: {
analysis: {
filter: {
ngram: {
type: 'nGram',
min_gram: 3,
max_gram: 25
}
},
analyzer: {
ngram: {
tokenizer: 'whitespace',
filter: ['lowercase', 'stop', 'ngram'],
type: 'custom'
},
ngram_search: {
tokenizer: 'whitespace',
filter: ['lowercase', 'stop'],
type: 'custom'
}
}
}
}
}
}
This is the part relevant to my full text search field: search_text:
"search_text" => {
"type" => "multi_field",
"fields" => {
"whole_words" => {"type" => "string", "analyzer" => "simple"},
"ngram" => {"type" => "string", "analyzer" => "ngram", "search_analyzer" => "ngram_search"}
}
}
I want to give higher scores to items that match whole words in the search text.
[400] {"error":{"root_cause":[{"type":"mapper_parsing_exception","reason":"analyzer [ngram_search] not found for field [ngram]"}],"type":"mapper_parsing_exception","reason":"Failed to parse mapping [search_variable]: analyzer [ngram_search] not found for field [ngram]","caused_by":{"type":"mapper_parsing_exception","reason":"analyzer [ngram_search] not found for field [ngram]"}},"status":400}
Here's the error:
"reason":"analyzer [ngram_search] not found for field [ngram]"
What am I doing wrong?
Edit:
Here is my query, where I tried to match on whole words only for now, and I only get 0 results every time.
search_query = {
index: index,
body: {
_source: {
exclude: ["search_text", "survey_id"]
},
query: {
:bool => {
:filter => {
:term => {"survey_id" => 12}
},
:must => {
:match => {
"search_text.whole_words" => {"query" => "BMW", "operator" => "and"}
}
}
}
}
}
}
result = client.search(search_query)
Here is the output of:
curl -XGET localhost:9200/yg_search_variables
{"yg_search_variables":{"aliases":{},"mappings":{"search_variable":{"properties":{"label":{"type":"string","index":"not_analyzed"},"name":{"type":"string","index":"not_analyzed"},"search_text":{"type":"string","index":"no","fields":{"ngram":{"type":"string","analyzer":"ngram","search_analyzer":"ngram_search"},"whole_words":{"type":"string","analyzer":"simple"}}},"value_label_search_string":{"type":"string","index":"not_analyzed"},"value_labels":{"type":"string","index":"not_analyzed"}}},"variables":{"properties":{"category":{"type":"string"},"label":{"type":"string","index":"not_analyzed"},"name":{"type":"string","index":"not_analyzed"},"search_text":{"type":"string","index":"no"},"survey_id":{"type":"long"},"value_label_search_text":{"type":"string"},"value_labels":{"properties":{"0":{"type":"string"},"1":{"type":"string"},"10":{"type":"string"},"100":{"type":"string"},"101":{"type":"string"},"102":{"type":"string"},"103":{"type":"string"},"104":{"type":"string"},"105":{"type":"string"},"106":{"type":"string"},"107":{"type":"string"},"108":{"type":"string"},"109":{"type":"string"},"11":{"type":"string"},"110":{"type":"string"},"1100":{"type":"string"},"1101":{"type":"string"},"1102":{"type":"string"},"1103":{"type":"string"},"1104":{"type":"string"},"1105":{"type":"string"},"1106":{"type":"string"},"1107":{"type":"string"},"1108":{"type":"string"},"1109":{"type":"string"},"111":{"type":"string"},"1110":{"type":"string"},"1111":{"type":"string"},"1112":{"type":"string"},"1113":{"type":"string"},"1114":{"type":"string"},"112":{"type":"string"},"113":{"type":"string"},"114":{"type":"string"},"115":{"type":"string"},"116":{"type":"string"},"117":{"type":"string"},"118":{"type":"string"},"119":{"type":"string"},"12":{"type":"string"},"120":{"type":"string"},"121":{"type":"string"},"122":{"type":"string"},"123":{"type":"string"},"124":{"type":"string"},"125":{"type":"string"},"126":{"type":"string"},"127":{"type":"string"},"128":{"type":"string"},"129":{"type":"string"},"13":{"type":"string"},"130":{"type":"string"},"131":{"type":"string"},"132":{"type":"string"},"133":{"type":"string"},"134":{"type":"string"},"135":{"type":"string"},"136":{"type":"string"},"137":{"type":"string"},"138":{"type":"string"},"139":{"type":"string"},"14":{"type":"string"},"140":{"type":"string"},"141":{"type":"string"},"142":{"type":"string"},"143":{"type":"string"},"144":{"type":"string"},"145":{"type":"string"},"146":{"type":"string"},"147":{"type":"string"},"148":{"type":"string"},"149":{"type":"string"},"15":{"type":"string"},"150":{"type":"string"},"151":{"type":"string"},"152":{"type":"string"},"153":{"type":"string"},"154":{"type":"string"},"155":{"type":"string"},"156":{"type":"string"},"157":{"type":"string"},"158":{"type":"string"},"159":{"type":"string"},"16":{"type":"string"},"160":{"type":"string"},"161":{"type":"string"},"162":{"type":"string"},"163":{"type":"string"},"164":{"type":"string"},"165":{"type":"string"},"166":{"type":"string"},"167":{"type":"string"},"168":{"type":"string"},"169":{"type":"string"},"17":{"type":"string"},"170":{"type":"string"},"171":{"type":"string"},"172":{"type":"string"},"173":{"type":"string"},"174":{"type":"string"},"175":{"type":"string"},"176":{"type":"string"},"177":{"type":"string"},"178":{"type":"string"},"179":{"type":"string"},"18":{"type":"string"},"180":{"type":"string"},"181":{"type":"string"},"182":{"type":"string"},"183":{"type":"string"},"184":{"type":"string"},"185":{"type":"string"},"186":{"type":"string"},"187":{"type":"string"},"188":{"type":"string"},"189":{"type":"string"},"19":{"type":"string"},"190":{"type":"string"},"191":{"type":"string"},"192":{"type":"string"},"193":{"type":"string"},"194":{"type":"string"},"195":{"type":"string"},"196":{"type":"string"},"197":{"type":"string"},"198":{"type":"string"},"199":{"type":"string"},"2":{"type":"string"},"20":{"type":"string"},"200":{"type":"string"},"201":{"type":"string"},"202":{"type":"string"},"203":{"type":"string"},"204":{"type":"string"},"205":{"type":"string"},"206":{"type":"string"},"207":{"type":"string"},"208":{"type":"string"},"209":{"type":"string"},"21":{"type":"string"},"210":{"type":"string"},"211":{"type":"string"},"22":{"type":"string"},"23":{"type":"string"},"24":{"type":"string"},"25":{"type":"string"},"26":{"type":"string"},"27":{"type":"string"},"28":{"type":"string"},"29":{"type":"string"},"3":{"type":"string"},"30":{"type":"string"},"301":{"type":"string"},"302":{"type":"string"},"303":{"type":"string"},"304":{"type":"string"},"305":{"type":"string"},"306":{"type":"string"},"307":{"type":"string"},"308":{"type":"string"},"309":{"type":"string"},"31":{"type":"string"},"310":{"type":"string"},"311":{"type":"string"},"312":{"type":"string"},"313":{"type":"string"},"314":{"type":"string"},"315":{"type":"string"},"316":{"type":"string"},"317":{"type":"string"},"32":{"type":"string"},"33":{"type":"string"},"34":{"type":"string"},"35":{"type":"string"},"36":{"type":"string"},"37":{"type":"string"},"38":{"type":"string"},"39":{"type":"string"},"4":{"type":"string"},"40":{"type":"string"},"41":{"type":"string"},"42":{"type":"string"},"43":{"type":"string"},"44":{"type":"string"},"45":{"type":"string"},"46":{"type":"string"},"47":{"type":"string"},"48":{"type":"string"},"49":{"type":"string"},"5":{"type":"string"},"50":{"type":"string"},"51":{"type":"string"},"52":{"type":"string"},"53":{"type":"string"},"54":{"type":"string"},"55":{"type":"string"},"554":{"type":"string"},"555":{"type":"string"},"556":{"type":"string"},"56":{"type":"string"},"57":{"type":"string"},"58":{"type":"string"},"59":{"type":"string"},"6":{"type":"string"},"60":{"type":"string"},"601":{"type":"string"},"602":{"type":"string"},"603":{"type":"string"},"604":{"type":"string"},"61":{"type":"string"},"62":{"type":"string"},"63":{"type":"string"},"64":{"type":"string"},"65":{"type":"string"},"66":{"type":"string"},"666":{"type":"string"},"667":{"type":"string"},"67":{"type":"string"},"68":{"type":"string"},"69":{"type":"string"},"7":{"type":"string"},"70":{"type":"string"},"71":{"type":"string"},"72":{"type":"string"},"73":{"type":"string"},"74":{"type":"string"},"75":{"type":"string"},"76":{"type":"string"},"77":{"type":"string"},"777":{"type":"string"},"78":{"type":"string"},"79":{"type":"string"},"8":{"type":"string"},"80":{"type":"string"},"801":{"type":"string"},"802":{"type":"string"},"803":{"type":"string"},"804":{"type":"string"},"805":{"type":"string"},"806":{"type":"string"},"807":{"type":"string"},"808":{"type":"string"},"809":{"type":"string"},"81":{"type":"string"},"810":{"type":"string"},"811":{"type":"string"},"812":{"type":"string"},"813":{"type":"string"},"814":{"type":"string"},"815":{"type":"string"},"816":{"type":"string"},"817":{"type":"string"},"818":{"type":"string"},"819":{"type":"string"},"82":{"type":"string"},"820":{"type":"string"},"821":{"type":"string"},"822":{"type":"string"},"83":{"type":"string"},"84":{"type":"string"},"85":{"type":"string"},"86":{"type":"string"},"87":{"type":"string"},"88":{"type":"string"},"888":{"type":"string"},"89":{"type":"string"},"9":{"type":"string"},"90":{"type":"string"},"901":{"type":"string"},"902":{"type":"string"},"903":{"type":"string"},"904":{"type":"string"},"905":{"type":"string"},"906":{"type":"string"},"907":{"type":"string"},"908":{"type":"string"},"909":{"type":"string"},"91":{"type":"string"},"910":{"type":"string"},"911":{"type":"string"},"912":{"type":"string"},"913":{"type":"string"},"914":{"type":"string"},"915":{"type":"string"},"916":{"type":"string"},"917":{"type":"string"},"918":{"type":"string"},"919":{"type":"string"},"92":{"type":"string"},"920":{"type":"string"},"921":{"type":"string"},"922":{"type":"string"},"923":{"type":"string"},"924":{"type":"string"},"925":{"type":"string"},"926":{"type":"string"},"927":{"type":"string"},"928":{"type":"string"},"93":{"type":"string"},"94":{"type":"string"},"95":{"type":"string"},"96":{"type":"string"},"97":{"type":"string"},"98":{"type":"string"},"99":{"type":"string"},"997":{"type":"string"},"998":{"type":"string"},"999":{"type":"string"}}}}}},"settings":{"index":{"creation_date":"1457103857764","analysis":{"filter":{"ngram":{"type":"nGram","min_gram":"3","max_gram":"25"}},"analyzer":{"ngram":{"filter":["lowercase","stop","ngram"],"type":"custom","tokenizer":"whitespace"},"ngram_search":{"filter":["lowercase","stop"],"type":"custom","tokenizer":"whitespace"}}},"number_of_shards":"5","number_of_replicas":"1","uuid":"zPN2LDfCTFqPleW7d5nkwA","version":{"created":"2020099"}}},"warmers":{}}}%
It seems strange that index is no:
"search_text": {
"type": "string",
"index": "no",
"fields": {
"ngram": {
"type": "string",
"analyzer": "ngram",
"search_analyzer": "ngram_search"
},
"whole_words": {
"type": "string",
"analyzer": "simple"
}
}
}
Edit: Here is a sample matching document for the term "Ford":
{
"name"=>"car_ownership",
"label"=>"Customer: Ford",
"category"=>["Vehicles", "Passenger Vehicles"], "value"=>nil,
"value_labels"=>{"1"=>"Yes", "2"=>"No"},
"node_id"=>14813,
"survey_id" => 12,
"search_text" => "Customer Ford Vehicles Passenger Vehicles Yes No"
}
Edit: I have added a smaller beginning to end test case that can be found here, which replicates the error.
https://www.dropbox.com/s/wwxm3qe0oxc2z5y/Slimmed%20ElasticSearch%20Text%20%281%29.html?dl=0
The first issue is that settings is not properly nested when you create your index. settings and mappings should be at the same level.
Then, looking at your dropbox file, I think the issue is that the mapping type is called search_variable while in your bulk you are using the mapping type test_type. Hence the mapping will never be applied.

Elasticsearch nested query and filter

I have the following Elasticsearch mapping:
"show": {
"properties": {
"startsAt": {
"type": "date"
},
"venue": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"address": {
"type": "string",
"index": "no"
},
"location": {
"type": "geo_point",
"lat_lon": true
},
"section": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
I want to find an exact match using the show.startsAt, show.venue.location, and show.venue.section properties. I've been trying the following query, but it's not taking show.venue.section into account.
bool: {
must: [
{match: {startsAt: starts}},
{
nested: {
path: 'venue',
query: {
match: {'venue.section': section}
},
filter: {
geo_distance: {
distance: '1m',
'venue.location': location
}
}
}
}
]
}
This query worked for me:
query: {
bool: {
must: [
{match: {startsAt: starts}},
{
nested: {
path: 'venue',
filter: {
bool: {
must: [
{
geo_distance: {
distance: '1m',
'venue.location': location
}
},
{
term: {'venue.section': section}
}
]
}
}
}
}
]
}
}

Resources