Upgrading to Elasticsearch 5.2 - elasticsearch

I have the following legacy mapping code that works in ES 1.7 but fails in 5.2. The things that fail are multi_field is not supported as well as path. The documentation mentions that these fields were removed but fails to provide the remedy beyond suggesting to use copy_to. Cans someone give a bit more details on that.
{
"sample": {
"_parent": {
"type": "security"
},
"properties": {
"securityDocumentId": {
"type": "string",
"index": "not_analyzed",
"include_in_all": false
},
"id": {
"type": "multi_field",
"path": "full",
"fields": {
"indexer_sample_id": {
"type": "string"
},
"id": {
"type": "string",
"include_in_all": false
}
}
},
"sampleid": {
"type": "multi_field",
"path": "just_name",
"fields": {
"sampleid": {
"type": "string",
"analyzer": "my_analyzer"
},
"sample.sampleid": {
"type": "string",
"analyzer": "my_analyzer"
},
"sample.sampleid.sort": {
"type": "string",
"analyzer": "case_insensitive_sort_analyzer"
},
"sample.sampleid.name.autocomplete": {
"type": "string",
"analyzer": "autocomplete"
}
}
},

The path option's default value was full, so you can leave it out since it way deprecated in 2.0. The path value just_name doesn't exist anymore and you MUST reference all your fields by their full path name. The multi-fields can be rewritten very simply:
{
"sample": {
"_parent": {
"type": "security"
},
"properties": {
"securityDocumentId": {
"type": "keyword",
"include_in_all": false
},
"id": {
"type": "text",
"fields": {
"indexer_sample_id": {
"type": "text"
},
"id": {
"type": "text",
"include_in_all": false
}
}
},
"sampleid": {
"type": "text",
"fields": {
"sampleid": {
"type": "text",
"analyzer": "my_analyzer"
},
"sample.sampleid": {
"type": "text",
"analyzer": "my_analyzer"
},
"sample.sampleid.sort": {
"type": "text",
"analyzer": "case_insensitive_sort_analyzer"
},
"sample.sampleid.name.autocomplete": {
"type": "text",
"analyzer": "autocomplete"
}
}
},
Note that I'm not sure of the usefulness and added value of the id sub-fields

Related

Es index rate become slow after create index mapping

I write data using ES BulkProcessor(I tried python script, storm es-bolt, flink es-sink), but the index rate is so slow after create index mapping.
Situation 1: Leave all index settings as its default, index rate can reach about 10000+.
Situation 2: Just create index mapping, index rate fall to 3000.
I use the same data, same code, same machines.
result
flink es-sink write json data to es:
My data
repeat write the same data below(the message field is the raw log, it's about 7KB size, the delete some content for exceeding the question limit):
{
"_index": "nyc_flink_test997",
"_type": "doc",
"_id": "k8uS92cBOH4ugSIjCzmn",
"_score": 1,
"_source": {
"exception": "false",
"log_id": "8F71AF1606EE46BFA9D57AA2282D8596",
"offset": "2368",
"message_length": "2103",
"level": "INFO",
"source": "/opt/hadoop/elastic-stack/s_login/Gusermanager.usermanager.s_login.20.log",
"sessionid": "provider-60-2883b4bd3ff2b",
"associate_id": "33d081b83a0654a2",
"message": """
[16:41:33.376][I][ec4edfe0b2584b73]log start:53F9A1A1E71044E281755E930E1B004C
[16:41:33.376][T][ec4edfe0b2584b73]入参0=__REQ__
at com.mysql.jdbc.MysqlIO.checkErrorPacket(MysqlIO.java:4119)
at com.mysql.jdbc.MysqlIO.sendCommand(MysqlIO.java:2570)
at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2731)
at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2815)
at com.mysql.jdbc.PreparedStatement.executeInternal(PreparedStatement.java:2155)
at com.mysql.jdbc.PreparedStatement.executeQuery(PreparedStatement.java:2322)
at cn.com.agree.addal.cp.ProxyPreparedStatement.executeQuery(ProxyPreparedStatement.java:46)
at tc.bank.aesb.mbs.MBS_DBIMPL.PyDBGetSel(MBS_DBIMPL.java:1624)
at tc.bank.aesb.mbs.MBS_DBIMPL.PyDBExecOneSQL(MBS_DBIMPL.java:466)
at tc.bank.aesb.mbs.MBS_DBIMPL.PyDBExecGrpSQL(MBS_DBIMPL.java:123)
at tc.bank.aesb.mbs.B_MBS_DataBase.B_DBUnityRptOpr(B_MBS_DataBase.java:121)
at CUST.CustomerInfoQry.TCustomerInfoQry$Step1$Node4.execute(TCustomerInfoQry.java:200)
at CUST.CustomerInfoQry.TCustomerInfoQry$Step1.execute(TCustomerInfoQry.java:113)
at CUST.CustomerInfoQry.TCustomerInfoQry.execute(TCustomerInfoQry.java:76)
at cn.com.agree.afa.svc.javaengine.JavaEngine.execute(JavaEngine.java:237)
at cn.com.agree.afa.svc.handler.TradeHandler.handle(TradeHandler.java:62)
[16:41:33.414][I][ec4edfe0b2584b73]log end:53F9A1A1E71044E281755E930E1B004C
""",
"exec_ip": "10.88.188.167",
"start_time": "2018-12-09 16:46:14.764",
"group_v2": "Gusermanager",
"script_exec_time": "1",
"trade_exec_time": "2"
}
}
index mapping
{
"mappings": {
"doc":{
"dynamic_templates": [
{
"string_fields": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "text",
"norms": false,
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
}
],
"properties": {
"#timestamp": {
"type": "date"
},
"#version": {
"type": "keyword"
},
"geoip": {
"dynamic": true,
"properties": {
"ip": {
"type": "ip"
},
"location": {
"type": "geo_point"
},
"latitude": {
"type": "half_float"
},
"longitude": {
"type": "half_float"
}
}
},
"exception": {
"type": "boolean"
},
"message":{
"type":"text",
"norms": false,
"analyzer": "ik_max_word"
},
"associate_id": {
"type": "text",
"analyzer": "ik_max_word"
},
"end_time": {
"type": "date",
"format": "date_time||yyyy-MM-dd HH:mm:ss.SSS||yyyy-MM-dd||epoch_millis||HH:mm:ss.SSS"
},
"start_time": {
"type": "date",
"format": "date_time||yyyy-MM-dd HH:mm:ss.SSS||yyyy-MM-dd||epoch_millis||HH:mm:ss.SSS"
},
"exec_ip": {
"type": "ip"
},
"level": {
"type": "keyword"
},
"script_exec_time": {
"type": "long"
},
"trade_exec_time": {
"type": "long"
},
"sessionid": {
"type": "text",
"analyzer": "ik_max_word"
},
"log_id": {
"type": "text",
"analyzer": "ik_max_word"
},
"discard_time": {
"type": "long"
},
"scene_code": {
"type": "text",
"analyzer": "ik_max_word"
},
"service_code": {
"type": "text",
"analyzer": "ik_max_word"
},
"group": {
"type": "text"
},
"group_v2" :{
"type": "text",
"analyzer": "ik_max_word"
},
"message_length":{
"type": "long"
},
"log_filename":{
"type": "text",
"analyzer": "ik_max_word"
},
"ingest_time":{
"type": "date"
}
}
}
}
}
I tried writing with python scirpts, storm es-bolt, the result is same, index rate falls after create index mapping. Can anyone give some ideas about it. Thanks in advance.

Elastic Search: Different results for query string when using fields

We have an elastic search 5.5 setup. We use nest to perform our queries through C#.
When executing the following query:
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "00917751"
}
}
]
}
}
}
We get the desired result: one result with that the number as identifier.
When using the following query:
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "00917751",
"fields": [
"searchReference",
"searchIdentifier",
"searchObjectNo",
"searchBrand",
"searchExtSerNo"
]
}
}
]
}
}
}
We get no results.
The value we are searching for is in the field searchIndentifier, and has the value "1-00917751".
We have a custom analyzer called "final"
.Custom("final", cu => cu
.Tokenizer("keyword").Filters(new List() { "lowercase" }))
The field searchIndentifier has no custom analyzer set on it. We tried adding the whitespace tokenizer in it but that made no difference.
Another field called "searchObjectNo" does work, when we try to search for the value "S328-25" with the query "S328". These fields are exactly the same.
Any ideas here?
Another question. In the first query, when we search for 1-00917751 (without the quotes) we get a lot of results. But we think that is because of the keyword tokenizer?
Thank you
Schoof
Index settings and mappings:
{
"inventoryitems": {
"aliases": {},
"mappings": {
"inventoryobject": {
"properties": {
"articleGroups": {
"type": "nested",
"properties": {
"id": {
"type": "long"
}
}
},
"articleId": {
"type": "long"
},
"articleNumber": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
},
"brand": {
"type": "text",
"analyzer": "final"
},
"catalogues": {
"type": "nested",
"properties": {
"articleGroupId": {
"type": "long"
},
"articleGroupName": {
"type": "text",
"analyzer": "final",
"fielddata": true
},
"id": {
"type": "long"
},
"name": {
"type": "text",
"analyzer": "final",
"fielddata": true
}
}
},
"details": {
"type": "nested",
"properties": {
"actualState": {
"type": "double"
},
"allocation": {
"type": "text",
"analyzer": "final",
"fielddata": true
},
"available": {
"type": "double"
},
"batch": {
"type": "text",
"analyzer": "final"
},
"calibrationDate": {
"type": "date"
},
"expected": {
"type": "double"
},
"externalSerialNumber": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"inReturn": {
"type": "double"
},
"inventory": {
"type": "double"
},
"isInMobileCarrier": {
"type": "boolean"
},
"locationDetail": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"locationId": {
"type": "long"
},
"locationName": {
"type": "text",
"analyzer": "final",
"fielddata": true
},
"locationType": {
"type": "text",
"analyzer": "final",
"fielddata": true
},
"lotId": {
"type": "long"
},
"mobileCarrierCode": {
"type": "text",
"analyzer": "final",
"fielddata": true
},
"mobileCarrierId": {
"type": "long"
},
"ownerCode": {
"type": "text",
"analyzer": "final"
},
"requested": {
"type": "double"
},
"reserved": {
"type": "double"
},
"storeLocationId": {
"type": "long"
},
"thicknessCode": {
"type": "text",
"analyzer": "final"
},
"weldedMark": {
"type": "text",
"analyzer": "final"
}
}
},
"docNo": {
"type": "long"
},
"hasStock": {
"type": "boolean"
},
"id": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"identifier": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
},
"inventoryItemType": {
"properties": {
"name": {
"type": "text",
"analyzer": "final",
"fielddata": true
}
}
},
"mobileCarrierId": {
"type": "long"
},
"name": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
},
"objectNumber": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
},
"quantity": {
"type": "double"
},
"reference": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
},
"searchBrand": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"searchExtSerNo": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"searchIndentifier": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"searchName": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"searchObjectNo": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"searchReference": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"sortNumber": {
"type": "long"
},
"stockUnit": {
"type": "text",
"boost": 1.5,
"analyzer": "final"
}
}
}
},
"settings": {
"index": {
"number_of_shards": "3",
"provided_name": "inventoryitems",
"creation_date": "1539253308319",
"analysis": {
"analyzer": {
"final": {
"filter": [
"lowercase"
],
"type": "custom",
"tokenizer": "keyword"
}
}
},
"number_of_replicas": "1",
"uuid": "Kb5KuYEiR5GQqgBPVYjJfA",
"version": {
"created": "5050299"
}
}
}
}
}
The answer is pretty simple: in your mapping your field is named searchIndentifier and in your query you're using a field called searchIdentifier which doesn't exist ;-)

Error while creating an index on Elasticsearch with custom analyzer

I am trying to create an index with a custom default analyzer.
I already checked the following questions:
Analyzer not found exception while creating an index with mapping and settings
How to specify an analyzer while creating an index in ElasticSearch
mapper_parsing_exception for a custom analyzer while creating index in elasticsearch?
but they didn't solve the issue.
Here is my schema:
put /emails
{
"mappings": {
"email": {
"analyzer": "lkw",
"properties": {
"createdOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"data": {
"type": "object",
"dynamic": "true"
},
"from": {
"type": "string",
"store": true
},
"id": {
"type": "string",
"store": true
},
"sentOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"sesId": {
"type": "string",
"store": true
},
"subject": {
"type": "string",
"store": true,
"analyzer": "standard"
},
"templates": {
"properties": {
"html": {
"type": "string",
"store": true
},
"plainText": {
"type": "string",
"store": true
}
}
},
"to": {
"type": "string",
"store": true
},
"type": {
"type": "string",
"store": true
}
}
},
"event": {
"_parent": {
"type": "email"
},
"analyzer": "lkw",
"properties": {
"id": {
"type": "string",
"store": true
},
"origin": {
"type": "string",
"store": true
},
"time": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"type": {
"type": "string",
"store": true
},
"userAgent": {
"type": "string",
"store": true
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"lkw": {
"tokenizer": "keyword",
"filter": [
"lowercase"
],
"type": "custom"
}
}
}
}
}
When I execute the command above, I get this error:
{
"error": {
"root_cause": [
{
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [analyzer : lkw]"
}
],
"type": "mapper_parsing_exception",
"reason": "Failed to parse mapping [event]: Root mapping definition has unsupported parameters: [analyzer : lkw]",
"caused_by": {
"type": "mapper_parsing_exception",
"reason": "Root mapping definition has unsupported parameters: [analyzer : lkw]"
}
},
"status": 400
}
Since you have only a few string fields, I suggest you simply specify your lkw analyzer where you need it, just like you did for the standard one:
PUT /emails
{
"mappings": {
"email": {
"properties": {
"createdOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"data": {
"type": "object",
"dynamic": "true"
},
"from": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"id": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"sentOn": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"sesId": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"subject": {
"type": "string",
"store": true,
"analyzer": "standard"
},
"templates": {
"properties": {
"html": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"plainText": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
},
"to": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"type": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
},
"event": {
"_parent": {
"type": "email"
},
"properties": {
"id": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"origin": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"time": {
"type": "date",
"store": true,
"format": "strict_date_optional_time||epoch_millis"
},
"type": {
"type": "string",
"store": true,
"analyzer": "lkw"
},
"userAgent": {
"type": "string",
"store": true,
"analyzer": "lkw"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"lkw": {
"tokenizer": "keyword",
"filter": [
"lowercase"
],
"type": "custom"
}
}
}
}
}

copy_to field referring parent field in elasticsearch

Here is my mapping of Product document type. I have "copy_to" : "product_all" in mapping.
I am expecting value of label in brand should be copied in 'product_all' in product. Is it correct way of referring the field in outer object from inner object i.e. (product_all in type product from brand), below mapping doesn't work as I don't get results back for a query made on product_all field for the value of label in brand. Am I missing something?
{
"product": {
"_timestamp": {
"enabled": true,
"store": true
},
"_all": {
"enabled": false
},
"dynamic": "strict",
"properties": {
"brand": {
"properties": {
"id": {
"type": "long"
},
"label": {
"type": "multi_field",
"fields": {
"label": {
"type": "string",
"index_analyzer": "productAnalyzer",
"search_analyzer": "productAnalyzer"
},
"raw": {
"type": "string",
"index": "not_analyzed"
}
},
"copy_to": "product_all"
}
}
},
"product_all": {
"type": "string",
"index_analyzer": "productAnalyzer",
"search_analyzer": "productAnalyzer"
}
}
}
}
I moved "copy_to":"product_all" inside label.label of mutifield. Now it works
"label": {
"type": "multi_field",
"fields": {
"label": {
"type": "string",
"copy_to": "product_all"
"index_analyzer": "productAnalyzer",
"search_analyzer": "productAnalyzer"
},
"raw": {
"type": "string",
"index": "not_analyzed"
}
}
}

How to build a parent/child mapping for Elasticsearch?

I tried to use the following mapping to index my data:
{
"mappings": {
"chow-demo": {
"properties": {
"#fields": {
"dynamic": "true",
"properties": {
"asgid": {
"type": "string",
"analyzer": "keyword"
},
"asid": {
"type": "long"
},
"astid": {
"type": "long"
},
"clfg": {
"analyzer": "keyword",
"type": "string"
},
"httpcode": {
"type": "long"
},
"oid": {
"type": "string"
},
"onid": {
"type": "long"
},
"ptrnr": {
"analyzer": "keyword",
"type": "string"
},
"pguid": {
"analyzer": "keyword",
"type": "string"
},
"ptid": {
"type": "long"
},
"sid": {
"type": "long"
},
"src_url": {
"analyzer": "keyword",
"type": "string"
},
"title": {
"analyzer": "keyword",
"type": "string"
},
"ts": {
"type": "long"
}
}
},
"#timestamp": {
"format": "dateOptionalTime",
"type": "date"
},
"#message": {
"type": "string"
},
"#source": {
"type": "string"
},
"#type": {
"analyzer": "keyword",
"type": "string"
},
"#tags": {
"type": "string"
},
"#source_host": {
"type": "string"
},
"#source_path": {
"type": "string"
}
}
},
"chow-clfg": {
"_parent": {
"type": "chow-demo"
},
"dynamic": "true",
"properties": {
"_ttl": {
"enabled": true,
"default": "1h"
},
"clfg": {
"analyzer": "keyword",
"type": "string"
},
"#timestamp": {
"format": "dateOptionalTime",
"type": "date"
},
"count": {
"type": "long"
}
}
}
}
}
I tried to populate the parent type "chow-demo" without populating the child type "chow-clfg", and the document refused to index. (No documents were indexed into Elasticsearach)
When I take out the child mapping for "chow-clfg", it does indexing properly as usual. Hence I have the following question:
Is my mapping structure wrong?
Must the parent and child be indexed together at the same time before the data can be successfully indexed?
Really need help in this question for my project to progress! Thanks!
Yes, your mapping is wrong. The _ttl element should be one level higher in the chow-clfg type. In other words _ttl should be on the same level as _parent. However, I am not quite sure how this problem can affect your ability to index.
Parents and children don't have to be indexed together.

Resources