How remove an element in all documents of an index in elasticsearch? - elasticsearch

I have list of documents in an index of Elasticsearch like below:
...
{
"_index" : "index-name",
"_type" : "_doc",
"_id" : "table1c7151240c583e60c8e2cbad351",
"_score" : 0.28322574,
"_source" : {
...
"table.tag" : {
"datasources_keys" : [...],
"tags" : [
"6e7358e2bfc84c34af32a01f6d19e9b2",
"ab450ae5c1734fb0aad5fed052b42023",
"f725e3100bba4b5eb8a5199a2b3e62fc"
]
}
}
},
...
I wanna delete an element in all documents.. for example should remove a specified tag_id in tags like "6e7358e2bfc84c34af32a01f6d19e9b2" .. how should I write a script for that? Is there other way in elasticsearch?
I'm using this script.. but it doesnt work!!
POST index-name/_update_by_query
{
"query": {
"match":{
"table.tag.tags": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
},
"script": {
"source": "ctx._source['table.tag']['tags'] -= 6e7358e2bfc84c34af32a01f6d19e9b2",
"lang": "painless"
}
}

Here is a more concise way with implicit list iterations and if conditions (+ it's a one-liner 😉):
POST index-name/_update_by_query
{
"query": {
"match": {
"table.tag.tags": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
},
"script": {
"lang": "painless"
"source": "ctx._source['table.tag']['tags'].removeIf(tag -> tag == params.tag);",
"params": {
"tag": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
}
}
UPDATE
You can add your second condition like this:
ctx._source['table.tag']['tags'].removeIf(tag -> tag == params.tag);
if (ctx._source['table.tag']['tags'].size() == 0) {
ctx._source['table.tag'].remove('tags');
}

You can try below script:
POST index-name/_update_by_query
{
"query": {
"match": {
"table.tag.tags": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
},
"script": {
"source": """
for (int i = 0; i < ctx._source['table.tag']['tags'].length; i++)
{
if(ctx._source['table.tag']['tags'][i]=='6e7358e2bfc84c34af32a01f6d19e9b2')
{
ctx._source['table.tag']['tags'].remove(i);
}
}"""
}
}

Related

Is there a way to delete elements of an array using elasticsearch update_by_query?

elasticsearch version : 7.8.1
[
...
{
"target" : [
{ "docId" : "operator" },
{ "docId" : "test" },
{ "docId" : "abcde" },
]
}
...
]
Hello?
I want to delete the element whose docId is operator from the above array called target in elasticsearch.
I tried the below but failed.
What part of my code is wrong?
{
"query": {
"terms": {
"target": [
{
"docId": "operator"
}
]
}
},
"script": {
"source": " for (int i = 0; i < ctx._source.target.length(); i++) { if (ctx._source.target[i].docId == params.docId) { ctx._source.target.remove(i);}}",
"params": {
"docId": "operator"
}
}
}
The query was wrong and length() was not a function either.
{
"query": {
"bool": {
"filter": {
"term": {
"target.docId": "operator"
}
}
}
},
"script": {
"lang": "painless",
"source": " for (int i = 0; i < ctx._source.target.length; i++) { if (ctx._source.target[i].docId == params.docId) { ctx._source.target.remove(i);}}",
"params": {
"docId": "operator"
}
}
}

How to deep clone a Object field in painless script

I want to cerate a new field to store the old value when updating a object field.
The original doc is:
{
"_index" : "test",
"_id" : "15895_-1",
"_source" : {
"device" : {
"standard": {
"name" : "unknown",
"brand" : "unknown"
},
"other": "other"
}
}
}
This is my updateByquery code:
GET test/_update_by_query
{
"script": {
"source": """
if (params.deviceStandard != null) {
ctx._source['device_algoed'] = ctx._source['device'];
ctx._source['device']['standard']['series'] = params.deviceStandard.series;
ctx._source['device']['standard']['brand'] = params.deviceStandard.brand;
}
""",
"params": {
"deviceStandard": {
"series" : "unknown",
"brand" : "OPPO"
}
}
},
"query": {
"bool": {
"filter": {
"term": {
"_id": "15895_-1"
}
}
}
}
}
When I change the ctx._source['device']['standard'], the ctx._source['device_algoed'] will change too.
So how to deep clone the ctx._source['device']['standard'] to
ctx._source['device_algoed']?
What you can do is to create a new map out of the origin one:
ctx._source['device_algoed'] = [:];
ctx._source['device_algoed'].putAll(ctx._source['device']);
And then you can freely modify ctx._source['device'] without impacting ctx._source['device_algoed']
That's it!

How to calculate data in elastic search

I am trying to calculate data and assign in same field after search result.
{
query: {
"query_string": {
"query": req.body.query
}
}
}
I am getting search result.
"results": [
{
"_index": "test_index",
"_type": "_doc",
"_id": "34",
"_score": 1.8216469,
"_source": {
"pre_function_area": "100",
"property_id": 46,
"max_benchmark": 0,
}
}
]
Here i want to modified max_benchmark during search. So sending query like as.
{
"query": {
"bool" : {
"must" : {
"query_string": {
"query": "test"
}
},
"filter" : {
"script" : {
"script" : { //Math.round((pow * doc['max_benchmark'].value) * 10) / 10
"lang": "expression",
// "lang": "painless",
"source": "doc['max_benchmark'].value * 5",
}
}
}
}
}
}
but it does not update to field i don't want to update actually field value in elasticsearch. I just want logically change value after search so it will display to user. Basically I am trying to calculate below formula and want to update field.
let months = 0;
if(event_date != "") {
let ai_date = moment();
ai_date.month(obj._source.month);
ai_date.year(obj._source.year);
months = ai_date.diff(event_date, 'months');
}
console.log("months "+months);
let pow = Math.pow(1.009,months);
obj._source.max_benchmark_cal = Math.round((pow * obj._source.max_benchmark) * 10) / 10;
obj._source.min_benchmark_cal = Math.round((pow * obj._source.min_benchmark) * 10) / 10;
} else {
obj._source.max_benchmark_cal = "NA";
obj._source.min_benchmark_cal = "NA";
}
can anyone please help me
your are near the good solution.
The asswer is to use a script field.
You can find the doc here
[https://www.elastic.co/guide/en/elasticsearch/reference/7.8/search-fields.html#script-fields]1
GET /_search
{
"query": {
"match_all": {}
},
"script_fields": {
"test1": {
"script": {
"lang": "painless",
"source": "doc['price'].value * 2"
}
},
"test2": {
"script": {
"lang": "painless",
"source": "doc['price'].value * params.factor",
"params": {
"factor": 2.0
}
}
}
}
}
EDIT: To respond to your comment.
You can not add a field to _source, but you can get the _source and the scripted field by specifying the fields you want in the source. (* are accepted).
As an example:
GET test/_search
{
"query": {
"match_all": {}
},
"_source": "*",
"script_fields": {
"test1": {
"script": {
"lang": "painless",
"source": "if(doc['title.keyword'].size()>0 ){doc['title.keyword'].value}"
}
}
}
}

How to subtract two values in Elasticsearch

I'm trying to get the difference between two fields. I'm using Elasticsearch 5.5.
I have already tried
{
"query": {
"bool": {
"filter": {
"script": {
"script": "doc['students.total_fee'].value - doc['students.paid_fee'].value"
}
}
}
}
}
but it is returning empty "hits".
I have also tried
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "students",
"query": {
"script": {
"script": {
"inline": "doc['students.total_fee'].value - doc['students.paid_fee'].value",
"lang": "expression"
}
}
}
}
}
]
}
}
and it is returning "0".
also the "script_fields" did not worked.
{ "script_fields" :
{ "difference" :
{ "script" : "doc['students.total_fee'].value - doc['students.paid_fee'].value"
} } }
suppose I have data in following format.
"_source" : {
"students" : [
{
"name" : "A",
"total_fee" : 12345,
"paid_fee" : 12344.8
},
{
"name" : "B",
"total_fee" : 23456,
"paid_fee" : 23455.6
}
]
}
Now I want to get the difference between "total_fee" and "paid_fee" for each student.
I expect to get an array of differences for all students.
Thanks in advance :D
You need to use the below query and my es version is 6.2.2. It is giving a perfect result. But remember scripting is generally CPU intensive.
If you normal field then below query working fine.
{
"size": 10,
"script_fields": {
"fare_diff": {
"script": "doc[\"students.total_fee\"].value - doc[\"students.paid_fee\"].value"
}
}
}
If you are using a nested field parameter then the query would be like below.
{
"script_fields": {
"fare_diff": {
"script": {
"lang": "painless",
"source": "int total = 0; def l = new ArrayList(); for (int i = 0; i < params['_source']['students'].size(); ++i) { l.add(params['_source']['students'][i]['total_fee'] - params['_source']['students'][i]['paid_fee']);} return l.toArray();"
}
}
}
}
Reason: Because nested documents are indexed as separate documents, they can only be accessed within the scope of the nested query, the nested/reverse_nested aggregations, or nested inner hits.

How can I find all documents in elasticsearch that contain a number in a certain field?

I have a keyword type'd field that can contain either a number or a string. If the field does not contain any letters, I would like to hit on that document. How can I do this?
My index mapping looks like:
{
"mappings": {
"Entry": {
"properties": {
"testField": {
"type": "keyword"
}
}
}
}
}
My documents look like this:
{
"testField":"123abc"
}
or
{
"testField": "456789"
}
I've tried the query:
{
"query": {
"range": {
"gte": 0,
"lte": 2000000
}
}
}
but it stills hits on 123abc. How can I design this so that I only hit on the documents with a number in that particular field?
There is another more optimal option for achieving exactly what you want. You can leverage the ingest API pipelines and using a script processor you can create another numeric field at indexing time that you can then use more efficiently at search time.
The ingestion pipeline below contains a single script processor which will create another field called numField that will only contain numeric values.
POST _ingest/pipeline/_simulate
{
"pipeline": {
"processors": [
{
"script": {
"source": """
ctx.numField = /\D/.matcher(ctx.testField).replaceAll("");
"""
}
}
]
},
"docs": [
{
"_source": {
"testField": "123"
}
},
{
"_source": {
"testField": "abc123"
}
},
{
"_source": {
"testField": "123abc"
}
},
{
"_source": {
"testField": "abc"
}
}
]
}
Simulating this pipeline with 4 different documents having a mix of alphanumeric content, will yield this:
{
"docs" : [
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"numField" : "123",
"testField" : "123"
},
"_ingest" : {
"timestamp" : "2019-05-09T04:14:51.448Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"numField" : "123",
"testField" : "abc123"
},
"_ingest" : {
"timestamp" : "2019-05-09T04:14:51.448Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"numField" : "123",
"testField" : "123abc"
},
"_ingest" : {
"timestamp" : "2019-05-09T04:14:51.448Z"
}
}
},
{
"doc" : {
"_index" : "_index",
"_type" : "_type",
"_id" : "_id",
"_source" : {
"numField" : "",
"testField" : "abc"
},
"_ingest" : {
"timestamp" : "2019-05-09T04:14:51.448Z"
}
}
}
]
}
After indexing your documents using this pipeline, you can run your range query on numField instead of testField. Compared to the other solution (sorry #Kamal), it will shift the scripting burden to run only once per document at indexing time, instead of everytime on every document at search time.
{
"query": {
"range": {
"numField": {
"gte": 0,
"lte": 2000000
}
}
}
}
Afaik, Elasticsearch does not have a direct solution for this.
Instead you would need to write a Script Query. Below is what you are looking for:
POST <your_index_name>/_search
{
"query": {
"bool": {
"must": [
{
"script": {
"script": {
"lang": "painless",
"source": """
try{
String temp = doc['testField'].value;
int a = Integer.parseInt(temp);
if(a instanceof Integer)
return true;
}catch(NumberFormatException e){
return false;
}
"""
}
}
}
]
}
}
}
Hope it helps!

Resources