How to deep clone a Object field in painless script - elasticsearch

I want to cerate a new field to store the old value when updating a object field.
The original doc is:
{
"_index" : "test",
"_id" : "15895_-1",
"_source" : {
"device" : {
"standard": {
"name" : "unknown",
"brand" : "unknown"
},
"other": "other"
}
}
}
This is my updateByquery code:
GET test/_update_by_query
{
"script": {
"source": """
if (params.deviceStandard != null) {
ctx._source['device_algoed'] = ctx._source['device'];
ctx._source['device']['standard']['series'] = params.deviceStandard.series;
ctx._source['device']['standard']['brand'] = params.deviceStandard.brand;
}
""",
"params": {
"deviceStandard": {
"series" : "unknown",
"brand" : "OPPO"
}
}
},
"query": {
"bool": {
"filter": {
"term": {
"_id": "15895_-1"
}
}
}
}
}
When I change the ctx._source['device']['standard'], the ctx._source['device_algoed'] will change too.
So how to deep clone the ctx._source['device']['standard'] to
ctx._source['device_algoed']?

What you can do is to create a new map out of the origin one:
ctx._source['device_algoed'] = [:];
ctx._source['device_algoed'].putAll(ctx._source['device']);
And then you can freely modify ctx._source['device'] without impacting ctx._source['device_algoed']
That's it!

Related

Search multi field with term query

I have some documents in a index..
"hits" : [
{
"_index" : "siem-referencedata-table-table2d526444eff99b1706053853ef7",
"_type" : "_doc",
"_id" : "0table222cc244b04b59d9ecafb0476e6",
"_score" : 1.0,
"_source" : {
"column-name1" : "10.1.10.1",
"column-name2" : "range(100,200)",
"column-name3" : "nam3",
"create_time" : "2022-05-21 03:30:39",
"last_seen" : "2022-05-21 03:30:39",
"id" : "0table222cc244b04b59d9ecafb0476e6"
}
},...
I want to search documents with three fields column-name1, column-name2 and column-name3.
I use below query with term to search exact considered word:
{
"query": {
"bool": {
"must": [
{
"term": {
"column-name1": {"value":"10.1.10.1"}
}
},
{
"term": {
"column-name2": {"value":"range(100,200)"}
}
},
{
"term": {
"column-name3": {"value":"nam3"}
}
}
]
}
}
}
It works without "column-name2": {"value":"range(100,200)"}.. what should I do with range ability? Is there another way to handle this?
The query solved with adding keyword to filed as below:
{
"query": {
"bool": {
"must": [
{
"term": {
"column-name1.keyword": {"value":"10.1.10.1"}
}
},
{
"term": {
"column-name2.keyword": {"value":"range(100,200)"}
}
},
{
"term": {
"column-name3.keyword": {"value":"nam3"}
}
}
]
}
}
}
Thank from Barkha Jain!

How remove an element in all documents of an index in elasticsearch?

I have list of documents in an index of Elasticsearch like below:
...
{
"_index" : "index-name",
"_type" : "_doc",
"_id" : "table1c7151240c583e60c8e2cbad351",
"_score" : 0.28322574,
"_source" : {
...
"table.tag" : {
"datasources_keys" : [...],
"tags" : [
"6e7358e2bfc84c34af32a01f6d19e9b2",
"ab450ae5c1734fb0aad5fed052b42023",
"f725e3100bba4b5eb8a5199a2b3e62fc"
]
}
}
},
...
I wanna delete an element in all documents.. for example should remove a specified tag_id in tags like "6e7358e2bfc84c34af32a01f6d19e9b2" .. how should I write a script for that? Is there other way in elasticsearch?
I'm using this script.. but it doesnt work!!
POST index-name/_update_by_query
{
"query": {
"match":{
"table.tag.tags": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
},
"script": {
"source": "ctx._source['table.tag']['tags'] -= 6e7358e2bfc84c34af32a01f6d19e9b2",
"lang": "painless"
}
}
Here is a more concise way with implicit list iterations and if conditions (+ it's a one-liner 😉):
POST index-name/_update_by_query
{
"query": {
"match": {
"table.tag.tags": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
},
"script": {
"lang": "painless"
"source": "ctx._source['table.tag']['tags'].removeIf(tag -> tag == params.tag);",
"params": {
"tag": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
}
}
UPDATE
You can add your second condition like this:
ctx._source['table.tag']['tags'].removeIf(tag -> tag == params.tag);
if (ctx._source['table.tag']['tags'].size() == 0) {
ctx._source['table.tag'].remove('tags');
}
You can try below script:
POST index-name/_update_by_query
{
"query": {
"match": {
"table.tag.tags": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
},
"script": {
"source": """
for (int i = 0; i < ctx._source['table.tag']['tags'].length; i++)
{
if(ctx._source['table.tag']['tags'][i]=='6e7358e2bfc84c34af32a01f6d19e9b2')
{
ctx._source['table.tag']['tags'].remove(i);
}
}"""
}
}

Trying to update a nested geoip location field in elasticsearch

Here is what I've tried:
POST orders/_update_by_query
{
"script" : "ctx._source.geoip += newElement",
"params": {
"newElement": {
"location" : "[40.730610, -73.935242]"
}
},
"query": {
"term": {
"CITY": {
"value": "nyc"
}
}
}
}
The above throws error Unknown key for a START_OBJECT in [params].
Second Attempt:
POST orders/_update_by_query
{
"script":{
"source":
"for (item in ctx._source.geoip){item.location = '[40.730610, -73.935242]'}",
"lang":"painless"
},
"query": {
"term": {
"CITY": {
"value": "nyc"
}
}
}
}
The above throws null pointer exception, and points to the period at source.geoip
I also tried changing the value of location to just test but receive the same errors.
Here is my mapping:
{
"orders" : {
"mappings" : {
"properties" : {
"geoip" : {
"dynamic" : "true",
"properties" : {
"location" : {
"type" : "geo_point"
}
}
}
}
}
I am using ES v7.2 and Kibana v7.2
A couple of issues in the 1st approach:
params need to be defined within the script object, not below it
newElement needs to be accessed using params.newElement
you cannot append += params.newElement to a nonexistent ctx._source.geoip
you cannot append an object to a single-value field -- you can just assign it
location is of the geo_point type, so either [40.730610, -73.935242] ([lon, lat]) or "-73.935242,40.730610" ("lat,lon"), but not a mixture of both
Working command:
POST orders/_update_by_query
{
"script": {
"inline": "ctx._source.geoip = params.newElement",
"params": {
"newElement": {
"location": [
40.73061,
-73.935242
]
}
}
},
"query": {
"term": {
"CITY": {
"value": "nyc"
}
}
}
}

How can I discard results with empy fields?

This is the structure of my documents:
{
"_index" : "index",
"_type" : "_doc",
"_id" : "4002809",
"_score" : 5.6219883,
"_source" : {
"manufacturer" : "manufacturer of the part",
"shortdesc" : "Description of the part",
"te_param" : "None",
"coverart" : "/partpics/placeholder.jpg",
"has_datasheet" : 0,
"id" : 4002809,
"part" : "437297OBD25"
}
},
I need to discard results with field "shortdesc" empty
This should work:
GET /_search
{
"query": {
"bool": {
"must_not": {
"exists": {
"field": "shortdesc"
}
}
}
}
}
Referenced from here:
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-exists-query.html
I like AJ's suggestion:
{
"query": {
"bool": {
"must_not": {
"exists": {
"field": "shortdesc"
}
}
}
}
}
That will return all the docs that DON'T have that field on it.
Or you can use the following:
{
"query": {
"bool": {
"must_not": {
"term": {
"shortdesc": ""
}
}
}
}
}
This will return all the docs that contain the field but with an empty description. I assume that an empty description has an empty string (represented as " ").

How can I find all documents in elasticsearch that contain a number in a certain field?

I have a keyword type'd field that can contain either a number or a string. If the field does not contain any letters, I would like to hit on that document. How can I do this?
My index mapping looks like:
{
"mappings": {
"Entry": {
"properties": {
"testField": {
"type": "keyword"
}
}
}
}
}
My documents look like this:
{
"testField":"123abc"
}
or
{
"testField": "456789"
}
I've tried the query:
{
"query": {
"range": {
"gte": 0,
"lte": 2000000
}
}
}
but it stills hits on 123abc. How can I design this so that I only hit on the documents with a number in that particular field?
There is another more optimal option for achieving exactly what you want. You can leverage the ingest API pipelines and using a script processor you can create another numeric field at indexing time that you can then use more efficiently at search time.
The ingestion pipeline below contains a single script processor which will create another field called numField that will only contain numeric values.
POST _ingest/pipeline/_simulate
{
"pipeline": {
"processors": [
{
"script": {
"source": """
ctx.numField = /\D/.matcher(ctx.testField).replaceAll("");
"""
}
}
]
},
"docs": [
{
"_source": {
"testField": "123"
}
},
{
"_source": {
"testField": "abc123"
}
},
{
"_source": {
"testField": "123abc"
}
},
{
"_source": {
"testField": "abc"
}
}
]
}
Simulating this pipeline with 4 different documents having a mix of alphanumeric content, will yield this:
{
"docs" : [
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"numField" : "123",
"testField" : "123"
},
"_ingest" : {
"timestamp" : "2019-05-09T04:14:51.448Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"numField" : "123",
"testField" : "abc123"
},
"_ingest" : {
"timestamp" : "2019-05-09T04:14:51.448Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"numField" : "123",
"testField" : "123abc"
},
"_ingest" : {
"timestamp" : "2019-05-09T04:14:51.448Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"numField" : "",
"testField" : "abc"
},
"_ingest" : {
"timestamp" : "2019-05-09T04:14:51.448Z"
}
}
}
]
}
After indexing your documents using this pipeline, you can run your range query on numField instead of testField. Compared to the other solution (sorry #Kamal), it will shift the scripting burden to run only once per document at indexing time, instead of everytime on every document at search time.
{
"query": {
"range": {
"numField": {
"gte": 0,
"lte": 2000000
}
}
}
}
Afaik, Elasticsearch does not have a direct solution for this.
Instead you would need to write a Script Query. Below is what you are looking for:
POST <your_index_name>/_search
{
"query": {
"bool": {
"must": [
{
"script": {
"script": {
"lang": "painless",
"source": """
try{
String temp = doc['testField'].value;
int a = Integer.parseInt(temp);
if(a instanceof Integer)
return true;
}catch(NumberFormatException e){
return false;
}
"""
}
}
}
]
}
}
}
Hope it helps!

Resources