Fairly new to Elastic Search so may have to bare with me, I'm running into a problem where if I search for a document using 20 characters or less, the document appears, however any more characters within the same word within the query, I get no results:
Using 'phenoxymethylpenicillin' brings no documents.
Using 'phenoxymethylpenicil' brings back documents.
This is the query I'm trying to use:
{
"match_phrase": {
"genericNames.name": {
"query": "phenoxymethylpenicillin",
"slop": 15,
"zero_terms_query": "NONE",
"boost": 1.0
}
}
}
Here is the full query: https://pastebin.com/DEJvP2uS
Like I said, I'm fairly new to this, it may be a point of not looking in the correct area.
So my question is, what possible areas would cause this and why?
Thanks!
Edit:
Provided is an extract from one of the documents from the sample data. I can't show a lot of it due a lot of it being sensitive, luckily the names from sample data I can share. This is from the data I'm trying to search for:
"genericNames":[
{
"nameType":1,
"name":"Phenoxymethylpenicillin 250mg tablets",
"nameChangeCode":"0000",
"nameBasisCode":"0001",
"nameTypeDescription":"Name",
"startDate":"1948-01-01T00:00:00.000000+0000",
"endDate":"3456-02-01T00:00:00.000000+0000"
},
{
"nameType":5,
"name":"Penicillin V 250mg tablets",
"nameTypeDescription":"Alternative Name 3",
"startDate":"1948-01-01T00:00:00.000000+0000",
"endDate":"3456-02-01T00:00:00.000000+0000"
}
],
I have also provided the index mapping as it may provide extra information:
{
"amp": {
"mappings": {
"properties": {
"_class": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"ampId": {
"type": "long"
},
"amppId": {
"type": "long"
},
"attributes": {
"type": "nested",
"properties": {
"attributeQualifier": {
"type": "keyword"
},
"attributeType": {
"type": "integer"
},
"attributeTypeDescription": {
"type": "keyword"
},
"attributeValue": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"countryId": {
"type": "long"
},
"decodedValue": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"dictionaries": {
"type": "nested",
"properties": {
"abbreviation": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"description": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"dictId": {
"type": "integer"
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"excipients": {
"type": "nested",
"properties": {
"basisOfStrengthCode": {
"type": "keyword"
},
"bossId": {
"type": "long"
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"id": {
"type": "long"
},
"ingredientNames": {
"properties": {
"endDate": {
"type": "date"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"startDate": {
"type": "date"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"strengthDenominatorUnitOfMeasureCode": {
"type": "keyword"
},
"strengthDenominatorValue": {
"type": "keyword"
},
"strengthNumeratorUnitOfMeasureCode": {
"type": "keyword"
},
"strengthNumeratorValue": {
"type": "keyword"
},
"strengthVal": {
"type": "keyword"
},
"unitOfMeasure": {
"type": "keyword"
}
}
},
"extractableEntry": {
"type": "boolean"
},
"genericNames": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"name": {
"type": "text",
"ignore_above": 256,
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "autocomplete_index",
"search_analyzer": "autocomplete_search"
},
"nameBasisCode": {
"type": "keyword"
},
"nameChangeCode": {
"type": "keyword"
},
"nameType": {
"type": "integer"
},
"nameTypeDescription": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"id": {
"type": "keyword"
},
"ingredients": {
"type": "nested",
"properties": {
"basisOfStrengthCode": {
"type": "keyword"
},
"bossId": {
"type": "long"
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"id": {
"type": "long"
},
"ingredientNames": {
"properties": {
"endDate": {
"type": "date"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"startDate": {
"type": "date"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"strengthDenominatorUnitOfMeasureCode": {
"type": "keyword"
},
"strengthDenominatorValue": {
"type": "keyword"
},
"strengthNumeratorUnitOfMeasureCode": {
"type": "keyword"
},
"strengthNumeratorValue": {
"type": "keyword"
},
"strengthVal": {
"type": "keyword"
},
"unitOfMeasure": {
"type": "keyword"
}
}
},
"invalidEntry": {
"type": "boolean"
},
"pitId": {
"type": "integer"
},
"ppaCodes": {
"type": "nested",
"properties": {
"code": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"proprietaryNames": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"name": {
"type": "text",
"ignore_above": 256,
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "autocomplete_index",
"search_analyzer": "autocomplete_search"
},
"nameBasisCode": {
"type": "keyword"
},
"nameChangeCode": {
"type": "keyword"
},
"nameType": {
"type": "integer"
},
"nameTypeDescription": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"qpuUomCde": {
"type": "keyword"
},
"qpuVal": {
"type": "keyword"
},
"qtyUomCde": {
"type": "keyword"
},
"qtyVal": {
"type": "keyword"
},
"snomedCodes": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"ppaNextNo": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"snomed": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"snomedDescriptions": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"ppaNextNo": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"snomed": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"suppliers": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"id": {
"type": "long"
},
"names": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "autocomplete_index",
"search_analyzer": "autocomplete_search"
},
"nameBasisCode": {
"type": "keyword"
},
"nameChangeCode": {
"type": "keyword"
},
"nameType": {
"type": "integer"
},
"nameTypeDescription": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"udfs": {
"type": "nested",
"properties": {
"ddIndicator": {
"type": "integer"
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"udfsUomCode": {
"type": "keyword"
},
"udfsValue": {
"type": "keyword"
},
"vmpUomCode": {
"type": "keyword"
}
}
},
"vmpId": {
"type": "long"
},
"vmppId": {
"type": "long"
},
"vtms": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"id": {
"type": "long"
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
}
}
}
}
}
Edit: Added link to full query - https://pastebin.com/DEJvP2uS
Edit: Settings for index:
{
"index": {
"max_ngram_diff": "20",
"analysis": {
"filter": {
"autocomplete_suffix_filter": {
"type": "ngram",
"min_gram": "1",
"max_gram": "20"
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete_index": {
"filter": [
"lowercase",
"autocomplete_filter",
"autocomplete_suffix_filter"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete_search": {
"filter": [
"lowercase"
],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1"
}
}
This must be happening due to the custom analyzer which you have on your genericNames.name field, you have different custom analyzer, index time you are using the autocomplete_index and search time autocomplete_search analyzer, but the definition of these analyzers is not provided in the question, only mapping part is provided.
Please provide the output of _setting API on your index, refer https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-get-settings.html for more info.
You need to check the tokens generated for phenoxymethylpenicillin using the analyze API for both autocomplete_index and autocomplete_search analyzer and you will notice the difference.
In the index mapping provided above, genericNames is of the nested type so you need to use nested query
Adding a working example using the same index data as provided above along with search query and search result.
Search Query:
{
"query": {
"nested": {
"path": "genericNames",
"query": {
"bool": {
"must": [
{
"match": {
"genericNames.name": "phenoxymethylpenicillin"
}
}
]
}
},
"inner_hits":{}
}
}
}
Search Result:
"hits": [
{
"_index": "64817981",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "genericNames",
"offset": 0
},
"_score": 0.7361701,
"_source": {
"nameType": 1,
"name": "Phenoxymethylpenicillin 250mg tablets",
"nameChangeCode": "0000",
"nameBasisCode": "0001",
"nameTypeDescription": "Name",
"startDate": "1948-01-01T00:00:00.000000+0000",
"endDate": "3456-02-01T00:00:00.000000+0000"
}
}
]
I am currently trying to update an index template on Elastic Search 6.7/6.8.
Templates are stored in the code and are applied each time my API starts.
There are no errors, the request returns 200.
For example, here is a template i am currently using:
{
"index_patterns": [ "*-ec2-reports" ],
"version": 11,
"mappings": {
"ec2-report": {
"properties": {
"account": {
"type": "keyword"
},
"reportDate": {
"type": "date"
},
"reportType": {
"type": "keyword"
},
"instance": {
"properties": {
"id": {
"type": "keyword"
},
"region": {
"type": "keyword"
},
"state": {
"type": "keyword"
},
"purchasing": {
"type": "keyword"
},
"keyPair": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"platform": {
"type": "keyword"
},
"tags": {
"type": "nested",
"properties": {
"key": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
"costs": {
"type": "object"
},
"stats": {
"type": "object",
"properties": {
"cpu": {
"type": "object",
"properties": {
"average": {
"type": "double"
},
"peak": {
"type": "double"
}
}
},
"network": {
"type": "object",
"properties": {
"in": {
"type": "double"
},
"out": {
"type": "double"
}
}
},
"volumes": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
},
"read": {
"type": "double"
},
"write": {
"type": "double"
}
}
}
}
},
"recommendation": {
"type": "object",
"properties": {
"instancetype": {
"type": "keyword"
},
"reason": {
"type": "keyword"
},
"newgeneration": {
"type": "keyword"
}
}
}
}
}
},
"_all": {
"enabled": false
},
"numeric_detection": false,
"date_detection": false
}
}
}
I'd like to add a new keyword field under the properties object like this :
"exampleField": {
"type": "keyword"
}
but it seems the template is not applied to existing indexes.
When data is inserted into a specific index which use the template, it is stored like this:
"exampleField": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
because the template has not been updated beforehand.
I would expect it to be like:
"exampleField": {
"type": "keyword"
}
in the index and in the template.
Does someone have any idea on how to have this result?
Thank you, Alexandre.
We have an elastic search 5.5 setup. We use nest to perform our queries through C#.
When executing the following query:
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "00917751"
}
}
]
}
}
}
We get the desired result: one result with that the number as identifier.
When using the following query:
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "00917751",
"fields": [
"searchReference",
"searchIdentifier",
"searchObjectNo",
"searchBrand",
"searchExtSerNo"
]
}
}
]
}
}
}
We get no results.
The value we are searching for is in the field searchIndentifier, and has the value "1-00917751".
We have a custom analyzer called "final"
.Custom("final", cu => cu
.Tokenizer("keyword").Filters(new List() { "lowercase" }))
The field searchIndentifier has no custom analyzer set on it. We tried adding the whitespace tokenizer in it but that made no difference.
Another field called "searchObjectNo" does work, when we try to search for the value "S328-25" with the query "S328". These fields are exactly the same.
Any ideas here?
Another question. In the first query, when we search for 1-00917751 (without the quotes) we get a lot of results. But we think that is because of the keyword tokenizer?
Thank you
Schoof
Index settings and mappings:
{
"inventoryitems": {
"aliases": {},
"mappings": {
"inventoryobject": {
"properties": {
"articleGroups": {
"type": "nested",
"properties": {
"id": {
"type": "long"
}
}
},
"articleId": {
"type": "long"
},
"articleNumber": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
},
"brand": {
"type": "text",
"analyzer": "final"
},
"catalogues": {
"type": "nested",
"properties": {
"articleGroupId": {
"type": "long"
},
"articleGroupName": {
"type": "text",
"analyzer": "final",
"fielddata": true
},
"id": {
"type": "long"
},
"name": {
"type": "text",
"analyzer": "final",
"fielddata": true
}
}
},
"details": {
"type": "nested",
"properties": {
"actualState": {
"type": "double"
},
"allocation": {
"type": "text",
"analyzer": "final",
"fielddata": true
},
"available": {
"type": "double"
},
"batch": {
"type": "text",
"analyzer": "final"
},
"calibrationDate": {
"type": "date"
},
"expected": {
"type": "double"
},
"externalSerialNumber": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"inReturn": {
"type": "double"
},
"inventory": {
"type": "double"
},
"isInMobileCarrier": {
"type": "boolean"
},
"locationDetail": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"locationId": {
"type": "long"
},
"locationName": {
"type": "text",
"analyzer": "final",
"fielddata": true
},
"locationType": {
"type": "text",
"analyzer": "final",
"fielddata": true
},
"lotId": {
"type": "long"
},
"mobileCarrierCode": {
"type": "text",
"analyzer": "final",
"fielddata": true
},
"mobileCarrierId": {
"type": "long"
},
"ownerCode": {
"type": "text",
"analyzer": "final"
},
"requested": {
"type": "double"
},
"reserved": {
"type": "double"
},
"storeLocationId": {
"type": "long"
},
"thicknessCode": {
"type": "text",
"analyzer": "final"
},
"weldedMark": {
"type": "text",
"analyzer": "final"
}
}
},
"docNo": {
"type": "long"
},
"hasStock": {
"type": "boolean"
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"identifier": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
},
"inventoryItemType": {
"properties": {
"name": {
"type": "text",
"analyzer": "final",
"fielddata": true
}
}
},
"mobileCarrierId": {
"type": "long"
},
"name": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
},
"objectNumber": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
},
"quantity": {
"type": "double"
},
"reference": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
},
"searchBrand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"searchExtSerNo": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"searchIndentifier": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"searchName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"searchObjectNo": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"searchReference": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"sortNumber": {
"type": "long"
},
"stockUnit": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
}
}
}
},
"settings": {
"index": {
"number_of_shards": "3",
"provided_name": "inventoryitems",
"creation_date": "1539253308319",
"analysis": {
"analyzer": {
"final": {
"filter": [
"lowercase"
],
"type": "custom",
"tokenizer": "keyword"
}
}
},
"number_of_replicas": "1",
"uuid": "Kb5KuYEiR5GQqgBPVYjJfA",
"version": {
"created": "5050299"
}
}
}
}
}
The answer is pretty simple: in your mapping your field is named searchIndentifier and in your query you're using a field called searchIdentifier which doesn't exist ;-)