How to migrate data from ES2.1 to ES7 by logstash - elasticsearch

[ERROR][logstash.outputs.elasticsearch][main] Failed to install
template {:message=>"Got response code '400' contacting Elasticsearch
at URL 'http://127.0.0.1:9200/_template/ecs-logstash'",
:exception=>LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError
original ES version: 2.1.2 new ES version: 7.13 logstash version: 8.1.1
I have below index on my ES2
"designs_v1": {
"mappings": {
"design": {
"dynamic": "false",
"_all": {
"enabled": false
},
"_id": {
"store": true,
"index": "not_analyzed"
},
"_timestamp": {
"enabled": true,
"store": true
},
"properties": {
"createDate": {
"type": "date",
"fielddata": {
"__comment": "Used for sorting",
"loading": "eager"
},
"format": "epoch_millis||date_time"
},
"designId": {
"type": "long",
"fielddata": {
"__comment": "Used for sorting to break ties and accessed by our custom scorer",
"loading": "eager"
}
},
"editorialTags": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "shingle"
},
"stemmed": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "kstem"
}
}
},
"isPersonalizable": {
"type": "boolean"
},
"legalBlockTags": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "standard_with_stopwords"
},
"memberId": {
"type": "long"
},
"pixel_height": {
"type": "integer"
},
"pixel_width": {
"type": "integer"
},
"products": {
"type": "nested",
"properties": {
"caption": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "standard_with_stopwords"
},
"createDate": {
"type": "date",
"format": "epoch_millis||date_time"
},
"defaultThumbnail": {
"type": "integer"
},
"description": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "standard_with_stopwords"
},
"hasPersonalizableSvg": {
"type": "boolean"
},
"imageOneId": {
"type": "long"
},
"imageTwoId": {
"type": "long"
},
"locations": {
"type": "string",
"norms": {
"enabled": false
},
"analyzer": "keyword"
},
"productId": {
"type": "long"
},
"productTypeId": {
"type": "integer",
"fielddata": {
"__comment": "Used during aggregations",
"loading": "eager"
}
},
"showColorId": {
"type": "integer"
},
"storeId": {
"type": "long"
}
}
},
"sellerTags": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "shingle"
},
"stemmed": {
"type": "string",
"norms": {
"loading": "eager"
},
"analyzer": "kstem"
}
}
}
}
}
}
}
I created new index in ES7:
{
"mappings": {
// "_id": {
// "store": true,
// "index": "not_analyzed"
// },
// "_timestamp": {
// "enabled": true,
// "store": true
// },
"properties": {
"createDate": {
"type": "date",
"format": "epoch_millis||date_time"
},
"designId": {
"type": "long"
},
"editorialTags": {
"type": "text",
"norms": true,
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "text",
"norms": true,
"analyzer": "shingle"
},
"stemmed": {
"type": "text",
"norms": true,
"analyzer": "kstem"
}
}
},
"isPersonalizable": {
"type": "boolean"
},
"legalBlockTags": {
"type": "text",
"norms": false,
"analyzer": "standard_with_stopwords"
},
"memberId": {
"type": "long"
},
"pixel_height": {
"type": "integer"
},
"pixel_width": {
"type": "integer"
},
"products": {
"type": "nested",
"properties": {
"caption": {
"type": "text",
"norms": false,
"analyzer": "standard_with_stopwords"
},
"createDate": {
"type": "date",
"format": "epoch_millis||date_time"
},
"defaultThumbnail": {
"type": "integer"
},
"description": {
"type": "text",
"norms": false,
"analyzer": "standard_with_stopwords"
},
"hasPersonalizableSvg": {
"type": "boolean"
},
"imageOneId": {
"type": "long"
},
"imageTwoId": {
"type": "long"
},
"locations": {
"type": "text",
"norms": false,
"analyzer": "keyword"
},
"productId": {
"type": "long"
},
"productTypeId": {
"type": "integer"
},
"showColorId": {
"type": "integer"
},
"storeId": {
"type": "long"
}
}
},
"sellerTags": {
"type": "text",
"norms": true,
"analyzer": "standard_with_stopwords",
"fields": {
"shingles": {
"type": "text",
"norms": true,
"analyzer": "shingle"
},
"stemmed": {
"type": "text",
"norms": true,
"analyzer": "kstem"
}
}
}
}
}
}
I want to migrate data to ES7 by logstash, below is my logstash conf file:
input {
elasticsearch {
hosts => ["http://xxx:9200"]
index => "designs_v1"
type => "design"
size => 10
scroll => "1m"
}
}
filter {
}
output {
elasticsearch {
hosts => ["http://127.0.0.1:9200"]
index => "designs_v1"
#document_type => "%{[#metadata][_type]}"
document_id => "%{[#metadata][_id]}"
}
}
But I can't fix issue:
[ERROR][logstash.outputs.elasticsearch][main] Failed to install
template {:message=>"Got response code '400' contacting Elasticsearch
at URL 'http://127.0.0.1:9200/_template/ecs-logstash'",
:exception=>LogStash::Outputs::ElasticSearch::HttpClient::Pool::BadResponseCodeError
What can I do now?

Related

Problem with rotating (ILM) Cloudflare indices on ELK cluster

The problem I have is that my Cloudflare indices report the following ILM errors:
on index with alias: illegal_argument_exception: rollover target [cloudflare] does not point to a write index
on index without alias: illegal_argument_exception: index.lifecycle.rollover_alias [cloudflare] does not point to index [cloudflare-2022.08.13-000001]
Basically what I was able to find out is that when a new index is created, it doesn't receive the alias from rollover_alias:
{
"settings": {
"index": {
"lifecycle": {
"name": "cloudflare",
"rollover_alias": "cloudflare"
},
option which makes the rollover fail. When I assign the alias manually to all indices affected, rollover and ILM starts to work again but I want to understand why does it happen and find out a permanent solution to this problem. Otherwise I will have to check this cluster manually and force moving the data from HOT to WARM nodes when the HOT storage fills up.
The setup on Cloudflare is based on this guide, in other words Cloudflare pushes the logs to S3 bucket, then AWS Lambda pushes them to ELK (elastic.co).
Cloudflare index template in question:
"cloudflare": {
"index_patterns": [
"cloudflare-*"
],
"mappings": {
"properties": {
"observer.ip": {
"type": "ip"
},
"cloudflare.parent.ray_id": {
"type": "keyword"
},
"cloudflare.worker.subrequest_count": {
"type": "long"
},
"cloudflare.origin.ip": {
"type": "ip"
},
"cloudflare.edge.rate.limit.id": {
"type": "long"
},
"user_agent.version": {
"type": "keyword"
},
"cloudflare.device.type": {
"type": "keyword"
},
"cloudflare.edge.pathing.op": {
"type": "keyword"
},
"user_agent.os.version": {
"type": "keyword"
},
"source.port": {
"type": "long"
},
"cloudflare.edge.server.ip": {
"type": "ip"
},
"cloudflare.security_level": {
"type": "keyword"
},
"observer.vendor": {
"type": "keyword"
},
"event.dataset": {
"type": "keyword"
},
"cloudflare.worker.cpu_time": {
"type": "long"
},
"http.response.status_code": {
"type": "long"
},
"user_agent.minor": {
"type": "keyword"
},
"cloudflare.cache.response.status": {
"type": "long"
},
"user_agent.patch": {
"type": "keyword"
},
"#timestamp": {
"type": "date"
},
"cloudflare.edge.colo.id": {
"type": "integer"
},
"user_agent.os.full": {
"type": "keyword"
},
"source.address": {
"type": "keyword"
},
"user_agent.build": {
"type": "keyword"
},
"source.as.number": {
"type": "long"
},
"cloudflare.edge.start.timestamp": {
"type": "date"
},
"cloudflare.waf.rule.id": {
"type": "keyword"
},
"cloudflare.origin.ssl.protocol": {
"type": "keyword"
},
"http.request.bytes": {
"type": "long"
},
"source.geo.country_iso_code": {
"type": "keyword"
},
"cloudflare.edge.pathing.src": {
"type": "keyword"
},
"cloudflare.edge.response.bytes": {
"type": "long"
},
"cloudflare.edge.response.status": {
"type": "long"
},
"cloudflare.waf.rule.message": {
"type": "keyword"
},
"cloudflare.origin.response.time": {
"type": "long"
},
"url.path": {
"fields": {
"path": {
"index": true,
"eager_global_ordinals": false,
"fielddata": false,
"index_options": "positions",
"index_phrases": false,
"norms": true,
"type": "text",
"store": false
}
},
"type": "keyword"
},
"cloudflare.edge.response.compression_ratio": {
"type": "float"
},
"cloudflare.worker.subrequest": {
"type": "boolean"
},
"cloudflare.cache.response.bytes": {
"type": "long"
},
"cloudflare.waf.profile": {
"type": "keyword"
},
"cloudflare.waf.flags": {
"type": "keyword"
},
"cloudflare.firewall.matches.actions": {
"type": "keyword"
},
"cloudflare.http.response.status_code": {
"type": "long"
},
"user_agent.os.platform": {
"type": "keyword"
},
"cloudflare.waf.matched_var": {
"type": "keyword"
},
"user_agent.os_minor": {
"type": "keyword"
},
"cloudflare.worker.status": {
"type": "keyword"
},
"#version": {
"type": "keyword"
},
"cloudflare.firewall.matches.rule_ids": {
"type": "keyword"
},
"user_agent.os_major": {
"type": "keyword"
},
"cloudflare.origin.response.bytes": {
"type": "long"
},
"source.ip": {
"type": "ip"
},
"http.response.bytes": {
"type": "long"
},
"cloudflare.client.ssl.protocol": {
"type": "keyword"
},
"url.full": {
"type": "keyword"
},
"client.address": {
"type": "keyword"
},
"user_agent.os_name": {
"type": "keyword"
},
"cloudflare.edge.end.timestamp": {
"type": "date"
},
"cloudflare.origin.response.http.last_modified": {
"ignore_malformed": true,
"type": "date"
},
"user_agent.original": {
"type": "keyword"
},
"cloudflare.cache.tiered.fill": {
"type": "boolean"
},
"cloudflare.origin.response.http.expires": {
"type": "date",
"format": "E, d MMM uuuu HH:mm:ss 'UTC'"
},
"user_agent.name": {
"type": "keyword"
},
"cloudflare.waf.action": {
"type": "keyword"
},
"cloudflare.cache.status": {
"type": "keyword"
},
"cloudflare.edge.request.host": {
"type": "keyword"
},
"source.geo": {
"type": "object",
"properties": {
"region_code": {
"type": "keyword"
},
"longitude": {
"type": "float"
},
"region_iso_code": {
"type": "keyword"
},
"region_name": {
"type": "keyword"
},
"country_code2": {
"type": "keyword"
},
"ip": {
"type": "ip"
},
"continent_code": {
"type": "keyword"
},
"postal_code": {
"type": "keyword"
},
"country_code3": {
"type": "keyword"
},
"latitude": {
"type": "float"
},
"city_name": {
"type": "keyword"
},
"dma_code": {
"type": "long"
},
"country_name": {
"type": "keyword"
},
"continent_name": {
"type": "keyword"
},
"timezone": {
"type": "keyword"
},
"location": {
"type": "geo_point"
}
}
},
"cloudflare.edge.rate.limit.action": {
"type": "keyword"
},
"cloudflare.client.ssl.cipher": {
"type": "keyword"
},
"user_agent.os.name": {
"type": "keyword"
},
"cloudflare.edge.pathing.status": {
"type": "keyword"
},
"cloudflare.zone_id": {
"type": "integer"
},
"client.port": {
"type": "long"
},
"observer.type": {
"type": "keyword"
},
"http.request.referrer": {
"type": "keyword"
},
"user_agent.major": {
"type": "keyword"
},
"event.end": {
"type": "date"
},
"cloudflare.client.request.protocol": {
"type": "keyword"
},
"user_agent.device.name": {
"type": "keyword"
},
"destination.ip": {
"type": "ip"
},
"url.domain": {
"type": "keyword"
},
"http.request.method": {
"type": "keyword"
},
"cloudflare.firewall.matches.sources": {
"type": "keyword"
},
"cloudflare.edge.response.content_type": {
"type": "keyword"
},
"cloudflare.ray_id": {
"type": "keyword"
},
"event.start": {
"type": "date"
},
"ecs.version": {
"type": "keyword"
},
"client.ip": {
"type": "ip"
},
"cloudflare.edge.colo.code": {
"type": "keyword"
},
"http.version": {
"type": "keyword"
},
"cloudflare.client.ip.class": {
"type": "keyword"
},
"server.ip": {
"type": "ip"
},
"user_agent.os.kernel": {
"type": "keyword"
}
}
},
"aliases": {},
"order": 0,
"settings": {
"index": {
"number_of_replicas": "1",
"mapping": {
"ignore_malformed": "true"
},
"number_of_shards": "1",
"lifecycle": {
"rollover_alias": "cloudflare",
"name": "cloudflare"
},
"routing": {
"allocation": {
"include": {
"_tier_preference": null
}
}
}
}
}
}
}
ILM Policy in quesion:
{
"cloudflare": {
"policy": {
"phases": {
"cold": {
"actions": {
"set_priority": {
"priority": 0
}
},
"min_age": "30d"
},
"warm": {
"actions": {
"set_priority": {
"priority": 50
}
},
"min_age": "0ms"
},
"hot": {
"actions": {
"rollover": {
"max_age": "1d"
},
"set_priority": {
"priority": 100
}
},
"min_age": "0ms"
},
"delete": {
"actions": {
"delete": {
"delete_searchable_snapshot": true
}
},
"min_age": "60d"
}
}
},
"modified_date": "2021-11-02T17:18:34.417Z",
"in_use_by": {
"indices": [
"cloudflare-2022.07.09-000001",
"cloudflare-2022.07.08-000001",
"cloudflare-2022.07.04-000001",
"cloudflare-2022.07.06-000001",
"cloudflare-2022.07.07-000001",
"cloudflare-2022.07.05-000001",
"cloudflare-2022.06.10-000001",
"cloudflare-2022.06.12-000001",
"cloudflare-2022.06.11-000001",
"cloudflare-2022.06.13-000001",
"cloudflare-2022.08.02-000001",
"cloudflare-2022.08.03-000001",
"cloudflare-2022.08.01-000001",
"cloudflare-2022.08.04-000001",
"cloudflare-2022.08.08-000001",
"cloudflare-2022.06.18-000001",
"cloudflare-2022.08.06-000001",
"cloudflare-2022.06.07-000001",
"cloudflare-2022.06.16-000001",
"cloudflare-2022.06.14-000001",
"cloudflare-2022.06.09-000001",
"cloudflare-2022.06.05-000001",
"cloudflare-2022.06.03-000001",
"cloudflare-2022.05.23-000001",
"cloudflare-2022.05.21-000001",
"cloudflare-2022.07.02-000001",
"cloudflare-2022.07.11-000001",
"cloudflare-2022.07.13-000001",
"cloudflare-2022.08.01-000017",
"cloudflare-2022.07.17-000001",
"cloudflare-2022.07.18-000001",
"cloudflare-2022.05.28-000001",
"cloudflare-2022.05.27-000001",
"cloudflare-2022.05.24-000001",
"cloudflare-2022.06.01-000001",
"cloudflare-2022.06.22-000001",
"cloudflare-2022.08.02-000023",
"cloudflare-2022.08.03-000024",
"cloudflare-2022.08.02-000021",
"cloudflare-2022.06.23-000001",
"cloudflare-2022.08.02-000022",
"cloudflare-2022.08.12-000001",
"cloudflare-2022.08.06-000027",
"cloudflare-2022.08.13-000001",
"cloudflare-2022.08.07-000028",
"cloudflare-2022.06.19-000001",
"cloudflare-2022.08.16-000001",
"cloudflare-2022.06.26-000001",
"cloudflare-2022.08.09-000001",
"cloudflare-2022.08.05-000001",
"cloudflare-2022.08.02-000020",
"cloudflare-2022.06.15-000001",
"cloudflare-2022.05.20-000001",
"cloudflare-2022.06.08-000001",
"cloudflare-2022.07.10-000001",
"cloudflare-2022.06.04-000001",
"cloudflare-2022.07.03-000001",
"cloudflare-2022.05.31-000001",
"cloudflare-2022.07.14-000001",
"cloudflare-2022.07.25-000004",
"cloudflare-2022.07.21-000001",
"cloudflare-2022.07.25-000001",
"cloudflare-2022.08.02-000018",
"cloudflare-2022.08.02-000019",
"cloudflare-2022.07.29-000001",
"cloudflare-2022.07.26-000001",
"cloudflare-2022.07.27-000009",
"cloudflare-2022.07.30-000015",
"cloudflare-2022.07.30-000014",
"cloudflare-2022.07.31-000016",
"cloudflare-2022.07.30-000013",
"cloudflare-2022.07.27-000010",
"cloudflare-2022.06.30-000001",
"cloudflare-2022.07.28-000011",
"cloudflare-2022.08.17-000001",
"cloudflare-2022.07.29-000012",
"cloudflare-2022.06.27-000001",
"cloudflare-2022.06.29-000001",
"cloudflare-2022.06.25-000001",
"cloudflare-2022.05.30-000001",
"cloudflare-2022.07.26-000008",
"cloudflare-2022.07.22-000001",
"cloudflare-2022.07.26-000007",
"cloudflare-2022.07.31-000001",
"cloudflare-2022.07.26-000006",
"cloudflare-2022.07.24-000001",
"cloudflare-2022.07.26-000005",
"cloudflare-2022.07.20-000001",
"cloudflare-2022.07.24-000003",
"cloudflare-2022.07.28-000001",
"cloudflare-2022.05.29-000001",
"cloudflare-2022.07.16-000001",
"cloudflare-2022.07.19-000001",
"cloudflare-2022.07.15-000001",
"cloudflare-2022.08.09-000030",
"cloudflare-2022.05.25-000001",
"cloudflare-2022.05.26-000001",
"cloudflare-2022.06.02-000001",
"cloudflare-2022.06.21-000001",
"cloudflare-2022.06.20-000001",
"cloudflare-2022.06.24-000001",
"cloudflare-2022.08.05-000026",
"cloudflare-2022.08.04-000025",
"cloudflare-2022.08.14-000001",
"cloudflare-2022.08.10-000001",
"cloudflare-2022.08.15-000001",
"cloudflare-2022.08.11-000001",
"cloudflare-2022.08.08-000029",
"cloudflare-2022.08.07-000001",
"cloudflare-2022.06.28-000001",
"cloudflare-2022.06.17-000001",
"cloudflare-2022.06.06-000001",
"cloudflare-2022.05.22-000001",
"cloudflare-2022.07.01-000001",
"cloudflare-2022.07.12-000001",
"cloudflare-2022.07.30-000001",
"cloudflare-2022.07.27-000001",
"cloudflare-2022.07.23-000001",
"cloudflare-2022.07.23-000002"
],
"data_streams": [],
"composable_templates": []
},
"version": 12
}
}
Elastic version: v7.16.2 provided by elastic.co on AWS

Is there a character limit on an individual word within a match phrase query in elastic search?

Fairly new to Elastic Search so may have to bare with me, I'm running into a problem where if I search for a document using 20 characters or less, the document appears, however any more characters within the same word within the query, I get no results:
Using 'phenoxymethylpenicillin' brings no documents.
Using 'phenoxymethylpenicil' brings back documents.
This is the query I'm trying to use:
{
"match_phrase": {
"genericNames.name": {
"query": "phenoxymethylpenicillin",
"slop": 15,
"zero_terms_query": "NONE",
"boost": 1.0
}
}
}
Here is the full query: https://pastebin.com/DEJvP2uS
Like I said, I'm fairly new to this, it may be a point of not looking in the correct area.
So my question is, what possible areas would cause this and why?
Thanks!
Edit:
Provided is an extract from one of the documents from the sample data. I can't show a lot of it due a lot of it being sensitive, luckily the names from sample data I can share. This is from the data I'm trying to search for:
"genericNames":[
{
"nameType":1,
"name":"Phenoxymethylpenicillin 250mg tablets",
"nameChangeCode":"0000",
"nameBasisCode":"0001",
"nameTypeDescription":"Name",
"startDate":"1948-01-01T00:00:00.000000+0000",
"endDate":"3456-02-01T00:00:00.000000+0000"
},
{
"nameType":5,
"name":"Penicillin V 250mg tablets",
"nameTypeDescription":"Alternative Name 3",
"startDate":"1948-01-01T00:00:00.000000+0000",
"endDate":"3456-02-01T00:00:00.000000+0000"
}
],
I have also provided the index mapping as it may provide extra information:
{
"amp": {
"mappings": {
"properties": {
"_class": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"ampId": {
"type": "long"
},
"amppId": {
"type": "long"
},
"attributes": {
"type": "nested",
"properties": {
"attributeQualifier": {
"type": "keyword"
},
"attributeType": {
"type": "integer"
},
"attributeTypeDescription": {
"type": "keyword"
},
"attributeValue": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"countryId": {
"type": "long"
},
"decodedValue": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"dictionaries": {
"type": "nested",
"properties": {
"abbreviation": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"description": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"dictId": {
"type": "integer"
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"excipients": {
"type": "nested",
"properties": {
"basisOfStrengthCode": {
"type": "keyword"
},
"bossId": {
"type": "long"
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"id": {
"type": "long"
},
"ingredientNames": {
"properties": {
"endDate": {
"type": "date"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"startDate": {
"type": "date"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"strengthDenominatorUnitOfMeasureCode": {
"type": "keyword"
},
"strengthDenominatorValue": {
"type": "keyword"
},
"strengthNumeratorUnitOfMeasureCode": {
"type": "keyword"
},
"strengthNumeratorValue": {
"type": "keyword"
},
"strengthVal": {
"type": "keyword"
},
"unitOfMeasure": {
"type": "keyword"
}
}
},
"extractableEntry": {
"type": "boolean"
},
"genericNames": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"name": {
"type": "text",
"ignore_above": 256,
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "autocomplete_index",
"search_analyzer": "autocomplete_search"
},
"nameBasisCode": {
"type": "keyword"
},
"nameChangeCode": {
"type": "keyword"
},
"nameType": {
"type": "integer"
},
"nameTypeDescription": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"id": {
"type": "keyword"
},
"ingredients": {
"type": "nested",
"properties": {
"basisOfStrengthCode": {
"type": "keyword"
},
"bossId": {
"type": "long"
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"id": {
"type": "long"
},
"ingredientNames": {
"properties": {
"endDate": {
"type": "date"
},
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"startDate": {
"type": "date"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"strengthDenominatorUnitOfMeasureCode": {
"type": "keyword"
},
"strengthDenominatorValue": {
"type": "keyword"
},
"strengthNumeratorUnitOfMeasureCode": {
"type": "keyword"
},
"strengthNumeratorValue": {
"type": "keyword"
},
"strengthVal": {
"type": "keyword"
},
"unitOfMeasure": {
"type": "keyword"
}
}
},
"invalidEntry": {
"type": "boolean"
},
"pitId": {
"type": "integer"
},
"ppaCodes": {
"type": "nested",
"properties": {
"code": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"proprietaryNames": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"name": {
"type": "text",
"ignore_above": 256,
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "autocomplete_index",
"search_analyzer": "autocomplete_search"
},
"nameBasisCode": {
"type": "keyword"
},
"nameChangeCode": {
"type": "keyword"
},
"nameType": {
"type": "integer"
},
"nameTypeDescription": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"qpuUomCde": {
"type": "keyword"
},
"qpuVal": {
"type": "keyword"
},
"qtyUomCde": {
"type": "keyword"
},
"qtyVal": {
"type": "keyword"
},
"snomedCodes": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"ppaNextNo": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"snomed": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"snomedDescriptions": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"ppaNextNo": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"snomed": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"suppliers": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"id": {
"type": "long"
},
"names": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"name": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
},
"analyzer": "autocomplete_index",
"search_analyzer": "autocomplete_search"
},
"nameBasisCode": {
"type": "keyword"
},
"nameChangeCode": {
"type": "keyword"
},
"nameType": {
"type": "integer"
},
"nameTypeDescription": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
},
"udfs": {
"type": "nested",
"properties": {
"ddIndicator": {
"type": "integer"
},
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"udfsUomCode": {
"type": "keyword"
},
"udfsValue": {
"type": "keyword"
},
"vmpUomCode": {
"type": "keyword"
}
}
},
"vmpId": {
"type": "long"
},
"vmppId": {
"type": "long"
},
"vtms": {
"type": "nested",
"properties": {
"endDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
},
"id": {
"type": "long"
},
"startDate": {
"type": "date",
"format": "uuuu-MM-dd'T'HH:mm:ss.SSSSSSZ"
}
}
}
}
}
}
}
Edit: Added link to full query - https://pastebin.com/DEJvP2uS
Edit: Settings for index:
{
"index": {
"max_ngram_diff": "20",
"analysis": {
"filter": {
"autocomplete_suffix_filter": {
"type": "ngram",
"min_gram": "1",
"max_gram": "20"
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": "1",
"max_gram": "20"
}
},
"analyzer": {
"autocomplete_index": {
"filter": [
"lowercase",
"autocomplete_filter",
"autocomplete_suffix_filter"
],
"type": "custom",
"tokenizer": "standard"
},
"autocomplete_search": {
"filter": [
"lowercase"
],
"type": "custom",
"tokenizer": "standard"
}
}
},
"number_of_replicas": "1"
}
}
This must be happening due to the custom analyzer which you have on your genericNames.name field, you have different custom analyzer, index time you are using the autocomplete_index and search time autocomplete_search analyzer, but the definition of these analyzers is not provided in the question, only mapping part is provided.
Please provide the output of _setting API on your index, refer https://www.elastic.co/guide/en/elasticsearch/reference/current/indices-get-settings.html for more info.
You need to check the tokens generated for phenoxymethylpenicillin using the analyze API for both autocomplete_index and autocomplete_search analyzer and you will notice the difference.
In the index mapping provided above, genericNames is of the nested type so you need to use nested query
Adding a working example using the same index data as provided above along with search query and search result.
Search Query:
{
"query": {
"nested": {
"path": "genericNames",
"query": {
"bool": {
"must": [
{
"match": {
"genericNames.name": "phenoxymethylpenicillin"
}
}
]
}
},
"inner_hits":{}
}
}
}
Search Result:
"hits": [
{
"_index": "64817981",
"_type": "_doc",
"_id": "1",
"_nested": {
"field": "genericNames",
"offset": 0
},
"_score": 0.7361701,
"_source": {
"nameType": 1,
"name": "Phenoxymethylpenicillin 250mg tablets",
"nameChangeCode": "0000",
"nameBasisCode": "0001",
"nameTypeDescription": "Name",
"startDate": "1948-01-01T00:00:00.000000+0000",
"endDate": "3456-02-01T00:00:00.000000+0000"
}
}
]

Update Mapping of existing Index in Elasticsearch

I am totally new to elastic search. So please forgive me if this is a stupid Question and my Questions might have been answered somewhere else already but I couldn't find it.
I want to use Elastic Search as a search engine for PDF'S and docx's in my network. I used fscrawler to ingest the PDF's to elastic search. Since the documents I want to ingest are in several languages I wanted to use n-graming for stemming. To do so I wanted to update my mapping like this
PUT test/_mappings/_all
{
"mappings": {
"title": {
"properties": {
"title": {
"type": "text",
"fields": {
"de": {
"type": "string",
"analyzer": "german"
},
"en": {
"type": "string",
"analyzer": "english"
},
"general": {
"type": "string",
"analyzer": "trigrams"
}
}
}
}
}
}
}
And now I get this Errormessage
{ "error": {
"root_cause": [
{
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [mappings : {title={properties={title={type=text,
fields={de={type=string, analyzer=german}, en={type=string,
analyzer=english}, general={type=string, analyzer=trigrams}}}}}}]"
}
],
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [mappings : {title={properties={title={type=text,
fields={de={type=string, analyzer=german}, en={type=string,
analyzer=english}, general={type=string, analyzer=trigrams}}}}}}]"
}, "status": 400 }
Do you have any idea how i can fix this? Or do you have an idea how I can ingest the files with the right mapping without using fscrawler?
those are my settings
{
"test": {
"settings": {
"index": {
"mapping": {
"total_fields": {
"limit": "2000"
}
},
"number_of_shards": "5",
"provided_name": "test",
"creation_date": "1542031632596",
"analysis": {
"filter": {
"trigrams_filter": {
"type": "ngram",
"min_gram": "3",
"max_gram": "3"
}
},
"analyzer": {
"fscrawler_path": {
"tokenizer": "fscrawler_path"
},
"trigrams": {
"filter": [
"lowercase",
"trigrams_filter"
],
"type": "custom",
"tokenizer": "standard"
}
},
"tokenizer": {
"fscrawler_path": {
"type": "path_hierarchy"
}
}
},
"number_of_replicas": "1",
"uuid": "7L3QE5_xRACECVbTFlFY-Q",
"version": {
"created": "6040399"
}
}
}
}
}
My mapping
{
"test": {
"mappings": {
"_doc": {
"dynamic_templates": [
{
"raw_as_text": {
"path_match": "meta.raw.*",
"mapping": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
}
}
}
],
"properties": {
"attachment": {
"type": "binary"
},
"attributes": {
"properties": {
"group": {
"type": "keyword"
},
"owner": {
"type": "keyword"
}
}
},
"content": {
"type": "text"
},
"file": {
"properties": {
"checksum": {
"type": "keyword"
},
"content_type": {
"type": "keyword"
},
"created": {
"type": "date",
"format": "dateOptionalTime"
},
"extension": {
"type": "keyword"
},
"filename": {
"type": "keyword",
"store": true
},
"filesize": {
"type": "long"
},
"indexed_chars": {
"type": "long"
},
"indexing_date": {
"type": "date",
"format": "dateOptionalTime"
},
"last_accessed": {
"type": "date",
"format": "dateOptionalTime"
},
"last_modified": {
"type": "date",
"format": "dateOptionalTime"
},
"url": {
"type": "keyword",
"index": false
}
}
},
"meta": {
"properties": {
"altitude": {
"type": "text"
},
"author": {
"type": "text"
},
"comments": {
"type": "text"
},
"contributor": {
"type": "text"
},
"coverage": {
"type": "text"
},
"created": {
"type": "date",
"format": "dateOptionalTime"
},
"creator_tool": {
"type": "keyword"
},
"date": {
"type": "date",
"format": "dateOptionalTime"
},
"description": {
"type": "text"
},
"format": {
"type": "text"
},
"identifier": {
"type": "text"
},
"keywords": {
"type": "text"
},
"language": {
"type": "keyword"
},
"latitude": {
"type": "text"
},
"longitude": {
"type": "text"
},
"metadata_date": {
"type": "date",
"format": "dateOptionalTime"
},
"modifier": {
"type": "text"
},
"print_date": {
"type": "date",
"format": "dateOptionalTime"
},
"publisher": {
"type": "text"
},
"rating": {
"type": "byte"
},
"relation": {
"type": "text"
},
"rights": {
"type": "text"
},
"source": {
"type": "text"
},
"title": {
"type": "text"
},
"type": {
"type": "text"
}
}
},
"path": {
"properties": {
"real": {
"type": "keyword",
"fields": {
"fulltext": {
"type": "text"
},
"tree": {
"type": "text",
"analyzer": "fscrawler_path",
"fielddata": true
}
}
},
"root": {
"type": "keyword"
},
"virtual": {
"type": "keyword",
"fields": {
"fulltext": {
"type": "text"
},
"tree": {
"type": "text",
"analyzer": "fscrawler_path",
"fielddata": true
}
}
}
}
}
}
}
}
}
}

Error while creating an index on Elasticsearch with custom analyzer

I am trying to create an index with a custom default analyzer.
I already checked the following questions:
Analyzer not found exception while creating an index with mapping and settings
How to specify an analyzer while creating an index in ElasticSearch
mapper_parsing_exception for a custom analyzer while creating index in elasticsearch?
but they didn't solve the issue.
Here is my schema:
put /emails
{
"mappings": {
"email": {
"analyzer": "lkw",
"properties": {
"createdOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"data": {
"type": "object",
"dynamic": "true"
},
"from": {
"type": "string",
"store": true
},
"id": {
"type": "string",
"store": true
},
"sentOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"sesId": {
"type": "string",
"store": true
},
"subject": {
"type": "string",
"store": true,
"analyzer": "standard"
},
"templates": {
"properties": {
"html": {
"type": "string",
"store": true
},
"plainText": {
"type": "string",
"store": true
}
}
},
"to": {
"type": "string",
"store": true
},
"type": {
"type": "string",
"store": true
}
}
},
"event": {
"_parent": {
"type": "email"
},
"analyzer": "lkw",
"properties": {
"id": {
"type": "string",
"store": true
},
"origin": {
"type": "string",
"store": true
},
"time": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"type": {
"type": "string",
"store": true
},
"userAgent": {
"type": "string",
"store": true
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"lkw": {
"tokenizer": "keyword",
"filter": [
"lowercase"
],
"type": "custom"
}
}
}
}
}
When I execute the command above, I get this error:
{
"error": {
"root_cause": [
{
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [analyzer : lkw]"
}
],
"type": "mapper_parsing_exception",
"reason": "Failed to parse mapping [event]: Root mapping definition has unsupported parameters: [analyzer : lkw]",
"caused_by": {
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [analyzer : lkw]"
}
},
"status": 400
}
Since you have only a few string fields, I suggest you simply specify your lkw analyzer where you need it, just like you did for the standard one:
PUT /emails
{
"mappings": {
"email": {
"properties": {
"createdOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"data": {
"type": "object",
"dynamic": "true"
},
"from": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"id": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"sentOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"sesId": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"subject": {
"type": "string",
"store": true,
"analyzer": "standard"
},
"templates": {
"properties": {
"html": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"plainText": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
},
"to": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"type": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
},
"event": {
"_parent": {
"type": "email"
},
"properties": {
"id": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"origin": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"time": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"type": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"userAgent": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"lkw": {
"tokenizer": "keyword",
"filter": [
"lowercase"
],
"type": "custom"
}
}
}
}
}

ES NGram with multi-languages

I'm trying to implement an auto-suggest control powered by an ES index. The index has multiple fields (Multi-language - Arabic and English) and I want to be able to search in all languages.
The easiest way to do that is NGram with the "_all" field, as long as some care is taken in the mapping definition. The issue we have now how to accomplish this using multi-language.
PS: We are looking to separate field for all the possible languages (Using one index).
I tried to use the nGram tokenizer and filter and it's working good for one language (English).
{
"template": "index_com",
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edgeNGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
}
}
}
},
"mappings": {
"product": {
"_all": {
"enabled": true,
"index_analyzer": "edgeNGram_analyzer",
"search_analyzer": "standard"
},
"properties": {
"id": {
"type": "string",
"index": "no",
"include_in_all": false
},
"uuid": {
"type": "string",
"index": "no",
"include_in_all": false
},
"name": {
"type": "string",
"include_in_all": true
},
"description": {
"type": "string",
"include_in_all": true
},
"brand": {
"type": "string",
"include_in_all": true
},
"made_id": {
"type": "string",
"include_in_all": true
},
"category": {
"type": "string",
"include_in_all": true
},
"category_id": {
"type": "integer",
"include_in_all": false
},
"keywords": {
"type": "string",
"include_in_all": true
},
"colors": {
"type": "string",
"index": "not_analyzed"
},
"colors_name": {
"type": "string",
"include_in_all": true
},
"quality": {
"type": "string",
"index": "not_analyzed"
},
"vendor_name": {
"type": "string",
"include_in_all": false
},
"vendor_location" : {
"type" : "geo_point",
"include_in_all": false
},
"price": {
"type": "double",
"include_in_all": false
},
"price_before_discount": {
"type": "double",
"include_in_all": false
},
"is_deal": {
"type": "integer",
"include_in_all": false
},
"is_best_seller": {
"type": "integer",
"include_in_all": false
},
"views": {
"type": "integer",
"include_in_all": false
},
"rating": {
"type": "integer",
"include_in_all": false
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"image_link": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}
Arabic analyzer:
{
"settings": {
"analysis": {
"filter": {
"arabic_stop": {
"type": "stop",
"stopwords": "_arabic_"
},
"arabic_keywords": {
"type": "keyword_marker",
"keywords": []
},
"arabic_stemmer": {
"type": "stemmer",
"language": "arabic"
}
},
"analyzer": {
"arabic": {
"tokenizer": "standard",
"filter": [
"lowercase",
"arabic_stop",
"arabic_normalization",
"arabic_keywords",
"arabic_stemmer"
]
}
}
}
}
}
can someone suggest any solution? Thanks!
Your second snippet defines the arabic analyzer, which is already available so you shouldn't need to add it.
What you are missing is to tell elasticsearch to also use the arabic analyzer. So you want to analyze each field twice, in english and arabic. To do that, add
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
to all your fields that have "include_in_all": true. That makes your mappings look like this:
{
"template": "index_com",
"settings": {
"number_of_shards": 5,
"number_of_replicas": 1,
"analysis": {
"filter": {
"edgeNGram_filter": {
"type": "edgeNGram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edgeNGram_analyzer": {
"type": "custom",
"tokenizer": "whitespace",
"filter": [
"lowercase",
"asciifolding",
"edgeNGram_filter"
]
}
}
}
},
"mappings": {
"product": {
"_all": {
"enabled": true,
"index_analyzer": "edgeNGram_analyzer",
"search_analyzer": "standard"
},
"properties": {
"id": {
"type": "string",
"index": "no",
"include_in_all": false
},
"uuid": {
"type": "string",
"index": "no",
"include_in_all": false
},
"name": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"description": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"brand": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"made_id": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"category": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"category_id": {
"type": "integer",
"include_in_all": false
},
"keywords": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"colors": {
"type": "string",
"index": "not_analyzed"
},
"colors_name": {
"type": "string",
"include_in_all": true,
"fields": {
"ar": {
"type": "string",
"analyzer": "arabic"
},
"en": {
"type": "string",
"analyzer": "english"
}
}
},
"quality": {
"type": "string",
"index": "not_analyzed"
},
"vendor_name": {
"type": "string",
"include_in_all": false
},
"vendor_location": {
"type": "geo_point",
"include_in_all": false
},
"price": {
"type": "double",
"include_in_all": false
},
"price_before_discount": {
"type": "double",
"include_in_all": false
},
"is_deal": {
"type": "integer",
"include_in_all": false
},
"is_best_seller": {
"type": "integer",
"include_in_all": false
},
"views": {
"type": "integer",
"include_in_all": false
},
"rating": {
"type": "integer",
"include_in_all": false
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"image_link": {
"type": "string",
"index": "not_analyzed"
}
}
}
}
}

Resources