How to convert Elasticsearch index field of string type to json? - elasticsearch

I have an Opensearch index with a string field message defined as below:
{"name":"message.message","type":"string","esTypes":["text"],"count":0,"scripted":false,"searchable":true,"aggregatable":false,"readFromDocValues":false}
Sample data:
"_source" : {
"message" : {
"message" : "user: AB, from: home, to: /app1"
}
}
I would like to convert the message column into json so that I can access the values message.user, message.from and message.to individually.
How do I go about it?

You can use Json Processor.
POST /_ingest/pipeline/_simulate
{
"pipeline": {
"description": "convert json to object",
"processors": [
{
"json": {
"field": "foo",
"target_field": "json_target"
}
}
]
},
"docs": [
{
"_index": "index",
"_id": "id",
"_source": {
"foo": "{\"name\":\"message.message\",\"type\":\"string\",\"esTypes\":[\"text\"],\"count\":0,\"scripted\":false,\"searchable\":true,\"aggregatable\":false,\"readFromDocValues\":false}\r\n"
}
}
]
}
Response:
{
"docs": [
{
"doc": {
"_index": "index",
"_id": "id",
"_version": "-3",
"_source": {
"foo": """{"name":"message.message","type":"string","esTypes":["text"],"count":0,"scripted":false,"searchable":true,"aggregatable":false,"readFromDocValues":false}
""",
"json_target": {
"esTypes": [
"text"
],
"readFromDocValues": false,
"name": "message.message",
"count": 0,
"aggregatable": false,
"type": "string",
"scripted": false,
"searchable": true
}
},
"_ingest": {
"timestamp": "2022-11-09T19:38:01.16232Z"
}
}
}
]
}

Related

How to update a text type field in Elasticsearch to a keyword field, where each word becomes a keyword in a list?

I’m looking to update a field in Elasticsearch from text to keyword type.
I’ve tried changing the type from text to keyword in the mapping and then reindexing, but with this method the entire text value is converted into one big keyword. For example, ‘limited time offer’ is converted into one keyword, rather than being broken up into something like ['limited', 'time', 'offer'].
Is it possible to change a text field into a list of keywords, rather than one big keyword? Also, is there a way to do this with only a mapping change and then reindexing?
You need create a new index and reindex using a pipeline to create a list words.
Pipeline
POST _ingest/pipeline/_simulate
{
"pipeline": {
"processors": [
{
"split": {
"field": "items",
"target_field": "new_list",
"separator": " ",
"preserve_trailing": true
}
}
]
},
"docs": [
{
"_index": "index",
"_id": "id",
"_source": {
"items": "limited time offer"
}
}
]
}
Results
{
"docs": [
{
"doc": {
"_index": "index",
"_id": "id",
"_version": "-3",
"_source": {
"items": "limited time offer",
"new_list": [
"limited",
"time",
"offer"
]
},
"_ingest": {
"timestamp": "2022-11-11T14:49:15.9814242Z"
}
}
}
]
}
Steps
1 - Create a new index
2 - Create a pipeline
PUT _ingest/pipeline/split_words_field
{
"processors": [
{
"split": {
"field": "items",
"target_field": "new_list",
"separator": " ",
"preserve_trailing": true
}
}
]
}
3 - Reindex with pipeline
POST _reindex
{
"source": {
"index": "idx_01"
},
"dest": {
"index": "idx_02",
"pipeline": "split_words_field"
}
}
Example:
PUT _ingest/pipeline/split_words_field
{
"processors": [
{
"split": {
"field": "items",
"target_field": "new_list",
"separator": " ",
"preserve_trailing": true
}
}
]
}
POST idx_01/_doc
{
"items": "limited time offer"
}
POST _reindex
{
"source": {
"index": "idx_01"
},
"dest": {
"index": "idx_02",
"pipeline": "split_words_field"
}
}
GET idx_02/_search

How to search for a nested key in kibana

I have kibana documents that look like this
{
"_index": "echo.caspian-test.2020-06-11.idx.2",
"_type": "status",
"_id": "01754abe95fd084495da20646194fdf7",
"_score": 1,
"_source": {
"applicationVersion": "9f80e49dea1c647fa1baf2e70665aba3a74158eb",
"echoClientVersion": "1.5.1",
"echoMetadata": {
"transportType": "echo"
},
"dataCenter": "hdc-digital-non-prod",
"echoLoggerVersion": "EchoLogbackAppender-1.5.1",
"host": "e22ab1e4-9256-438b-5855-ad04",
"type": "INFO",
"message": "AddUpdate process method ends",
"messageDetail": {
"logger": "com.kroger.cxp.app.transformer.processor.AddUpdateTransformerImpl",
"thread": "DispatchThread: [com.ibm.mq.jmqi.remote.impl.RemoteSession[:/1f6e1b6c][connectionId=414D5143514D2E4150504C2E54455354967C7F5F0407B82E]]"
},
"routingKey": "caspian-test",
"timestamp": "1603276805250"
},
"fields": {
"timestamp": [
"2020-10-21T10:40:05.250Z"
]
}
}
I need to search all the docs having a particular connectionId which is present in
"messageDetail": {
"logger": "com.kroger.cxp.app.transformer.processor.AddUpdateTransformerImpl",
"thread": "DispatchThread: [com.ibm.mq.jmqi.remote.impl.RemoteSession[:/1f6e1b6c][connectionId=414D5143514D2E4150504C2E54455354967C7F5F0407B82E]]"
}
How can i do that . I have tried searching for messageDetail.thread=%$CONNECTION_ID% but it didn't work
You need to add a nested path in your search query to make it work and your messageDetail must be of nested datatype, something like below
{
"query": {
"nested": {
"path": "messageDetail", --> note this
"query": {
"bool": {
"must": [
{
"match": {
"messageDetail. thread": "CONNECTION_ID"
}
}
]
}
}
}
}
}
Adding a working sample with mapping, search query, and result
Index mapping
{
"mappings": {
"properties": {
"messageDetail": {
"type" : "nested"
}
}
}
}
Index sample doc
{
"applicationVersion": "9f80e49dea1c647fa1baf2e70665aba3a74158eb",
"echoClientVersion": "1.5.1",
"echoMetadata": {
"transportType": "echo"
},
"dataCenter": "hdc-digital-non-prod",
"echoLoggerVersion": "EchoLogbackAppender-1.5.1",
"host": "e22ab1e4-9256-438b-5855-ad04",
"type": "INFO",
"message": "AddUpdate process method ends",
"messageDetail": {
"logger": "com.kroger.cxp.app.transformer.processor.AddUpdateTransformerImpl",
"thread": "DispatchThread: [com.ibm.mq.jmqi.remote.impl.RemoteSession[:/1f6e1b6c][connectionId=414D5143514D2E4150504C2E54455354967C7F5F0407B82E]]"
},
"routingKey": "caspian-test",
"timestamp": "1603276805250"
}
And search query
{
"query": {
"nested": {
"path": "messageDetail",
"query": {
"bool": {
"must": [
{
"match": {
"messageDetail.thread": "DispatchThread"
}
}
]
}
}
}
}
}
And search res
"hits": [
{
"_index": "nestedmsg",
"_type": "_doc",
"_id": "1",
"_score": 0.2876821,
"_source": {
"applicationVersion": "9f80e49dea1c647fa1baf2e70665aba3a74158eb",
"echoClientVersion": "1.5.1",
"echoMetadata": {
"transportType": "echo"
},
"dataCenter": "hdc-digital-non-prod",
"echoLoggerVersion": "EchoLogbackAppender-1.5.1",
"host": "e22ab1e4-9256-438b-5855-ad04",
"type": "INFO",
"message": "AddUpdate process method ends",
"messageDetail": {
"logger": "com.kroger.cxp.app.transformer.processor.AddUpdateTransformerImpl",
"thread": "DispatchThread: [com.ibm.mq.jmqi.remote.impl.RemoteSession[:/1f6e1b6c][connectionId=414D5143514D2E4150504C2E54455354967C7F5F0407B82E]]"
},
"routingKey": "caspian-test",
"timestamp": "1603276805250"
}
}
]

search first element of a multivalue text field in elasticsearch

I want to search first element of array in documents of elasticsearch, but I can't.
I don't find it that how can I search.
For test, I created new index with fielddata=true, but I still didn't get the response that I wanted
Document
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
Values
name : ["John", "Doe"]
My request
{
"query": {
"bool" : {
"must" : {
"script" : {
"script" : {
"source": "doc['name'][0]=params.param1",
"params" : {
"param1" : "john"
}
}
}
}
}
}
}
Incoming Response
"reason": "Fielddata is disabled on text fields by default. Set fielddata=true on [name] in order to load fielddata in memory by uninverting the inverted index. Note that this can however use significant memory. Alternatively use a keyword field instead."
You can use the following script that is used in a search request to return a scripted field:
{
"script_fields": {
"firstElement": {
"script": {
"lang": "painless",
"inline": "params._source.name[0]"
}
}
}
}
Search Result:
"hits": [
{
"_index": "stof_64391432",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"fields": {
"firstElement": [
"John" <-- note this
]
}
}
]
You can use a Painless script to create a script field to return a customized value for each document in the results of a query.
You need to use equality equals operator '==' to COMPARE two
values where the resultant boolean type value is true if the two
values are equal and false otherwise in the script query.
Adding a working example with index data, mapping, search query, and search result
Index Mapping:
{
"mappings":{
"properties":{
"name":{
"type":"text",
"fielddata":true
}
}
}
}
Index data:
{
"name": [
"John",
"Doe"
]
}
Search Query:
{
"script_fields": {
"my_field": {
"script": {
"lang": "painless",
"source": "params['_source']['name'][0] == params.params1",
"params": {
"params1": "John"
}
}
}
}
}
Search Result:
"hits": [
{
"_index": "test",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"fields": {
"my_field": [
true <-- note this
]
}
}
]
Arrays of objects do not work as you would expect: you cannot query
each object independently of the other objects in the array. If you
need to be able to do this then you should use the nested data type
instead of the object data type.
You can use the script as shown in my another answer if you want to just compare the value of the first element of the array to some other value. But based on your comments, it looks like your use case is quite different.
If you want to search the first element of the array you need to convert your data, into nested form. Using arrays of object at search time you can’t refer to “the first element” or “the last element”.
Adding a working example with index data, mapping, search query, and search result
Index Mapping:
{
"mappings": {
"properties": {
"name": {
"type": "nested"
}
}
}
}
Index Data:
{
"booking_id": 2,
"name": [
{
"first": "John Doe",
"second": "abc"
}
]
}
{
"booking_id": 1,
"name": [
{
"first": "Adam Simith",
"second": "John Doe"
}
]
}
{
"booking_id": 3,
"name": [
{
"first": "John Doe",
"second": "Adam Simith"
}
]
}
Search Query:
{
"query": {
"nested": {
"path": "name",
"query": {
"bool": {
"must": [
{
"match_phrase": {
"name.first": "John Doe"
}
}
]
}
}
}
}
}
Search Result:
"hits": [
{
"_index": "test",
"_type": "_doc",
"_id": "2",
"_score": 0.9400072,
"_source": {
"booking_id": 2,
"name": [
{
"first": "John Doe",
"second": "abc"
}
]
}
},
{
"_index": "test",
"_type": "_doc",
"_id": "3",
"_score": 0.9400072,
"_source": {
"booking_id": 3,
"name": [
{
"first": "John Doe",
"second": "Adam Simith"
}
]
}
}
]

How to get value in nested field using Kibana scripted field?

I'm new to Elastic Stack.
Here, I'm trying to get the value for "pressure" and then convert it to numeric value(string⇒numeric) using Kibana scripted field.
I tried scripted field, but it didn't work for me.
Any idea? I really appreciate your support in advance.
One of my data records is as below.
{
"_index": "production",
"_type": "_doc",
"_id": "4570df7a0d4ec1b0e624e868a5861a0f1a9a7f6c35fdsssafafsa734fb152f4bed",
"_version": 1,
"_score": null,
"_source": {
"factorycode": "AM-NY",
"productcode": "STR",
"lastupdatetime": "2020-05-28T04:16:17.590Z",
"#timestamp": "2020-05-28T04:14:48.000Z",
"massproduction": {
"errorcode": null,
"equipment": "P17470110",
"operatorldap": null,
"machinetime": null,
"quantity": "1",
"externalfilename": null,
"errorcomment": null,
"datas": {
"data": [
{
"value": "45.4",
"id": "001",
"name": "pressure"
},
{
"value": "0.45",
"id": "002",
"name": "current"
}
]
},
"ladderver": null,
"eid": null,
"setupid": null,
"model": "0",
"identificationtagid": null,
"workid": "GD606546sf0B002020040800198",
"reuse": {
"num": "0"
},
"registrydate": "2020-05-28T13:14:48",
"product": "GD604564550B00",
"line": "STRS001",
"judge": "1",
"cycletime": null,
"processcode": "OP335",
"registryutcdate": "2020-04-28T04:14:48",
"name": "massproduction"
}
},
"fields": {
"massproduction.registrydate": [
"2020-05-28T13:14:48.000Z"
],
"#timestamp": [
"2020-05-28T04:14:48.000Z"
],
"lastupdatetime": [
"2020-05-28T04:16:17.590Z"
],
"registrydate": [
"2020-05-28T13:14:48.000Z"
],
"massproduction.registryutcdate": [
"2020-05-28T04:14:48.000Z"
],
"registryutcdate": [
"2020-05-28T04:14:48.000Z"
]
},
"sort": [
158806546548000
]
}
This is my "painless" scripted field in Kibana.
for(item in params._source.massproduction.datas.data)
{
if(item.name=='pressure'){
return item.value;
}
}
return 0;
You can use Float.parseFloat(value) to convert string to float
if(params._source.massproduction!=null && params._source.massproduction.datas!=null &&params._source.massproduction.datas.data.size()>0)
{
def data = params._source.massproduction.datas.data;
if(data instanceof ArrayList)
{
for(item in data)
{
if(item.name=='pressure')
{
return Float.parseFloat(item.value);
}
}
}else
{
if(data.name=='pressure')
{
return Float.parseFloat(data.value);
}
}
}
return 0;

How to configure elasticsearch regexp query

I try to configure elasticsearch request. I use DSL and try to find some data with word "swagger" into "message" field.
Here is one of correct answer I want to show :
{
"_index": "apiconnect508",
"_type": "audit",
"_id": "AWF1us1T4ztincEzswAr",
"_score": 1,
"_source": {
"consumerOrgId": null,
"headers": {
"http_accept": "application/json",
"content_type": "application/json",
"request_path": "/apim-5a7c34e0e4b02e66c60edbb2-2018.02/auditevent",
"http_version": "HTTP/1.1",
"http_connection": "keep-alive",
"request_method": "POST",
"http_host": "localhost:9700",
"request_uri": "/apim-5a7c34e0e4b02e66c60edbb2-2018.02/auditevent",
"content_length": "533",
"http_user_agent": "Wink Client v1.1.1"
},
"nlsMessage": {
"resource": "messages",
"replacements": [
"test",
"1.0.0",
"ext_mafashagov#rencredit.ru"
],
"key": "swagger.import.notification"
},
"notificationType": "EVENT",
"eventType": "AUDIT",
"source": null,
"envId": null,
"message": "API test version 1.0.0 was created from a Swagger document by ext_mafashagov#rencredit.ru.",
"userId": "ext_mafashagov#rencredit.ru",
"orgId": "5a7c34e0e4b02e66c60edbb2",
"assetType": "api",
"tags": [
"_geoip_lookup_failure"
],
"gateway_geoip": {},
"datetime": "2018-02-08T14:04:32.731Z",
"#timestamp": "2018-02-08T14:04:32.747Z",
"assetId": "5a7c58f0e4b02e66c60edc53",
"#version": "1",
"host": "127.0.0.1",
"id": "5a7c58f0e4b02e66c60edc55",
"client_geoip": {}
}
}
I try to find ths JSON by :
POST myAddress/_search
Next query works without "regexp" field. How should I configure regexp part of my query?
{
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"range": {
"#timestamp" : {"gte" : "now-100d"}
}
},
{
"term": {
"_type": "audit"
}
},
{
"regexp" : {
"message": "*wagger*"
}
}
]
}
}
}
},
"sort": {
"TraceDateTime": {
"order": "desc",
"ignore_unmapped": "true"
}
}
}
If message field is analyzed, this simple match query should work:
"match":{
"message":"*swagger*"
}
However if it is not analyzed, these two queries should also work for you:
These two queries are case sensitive so you should consider lower casing your field if you wish to keep it not analyzed.
"wildcard":{
"message":"*swagger*"
}
or
"regexp":{
"message":"swagger"
}
Be careful as wildcard and regexp queries degrade performance.

Resources