Is there a way to delete elements of an array using elasticsearch update_by_query? - elasticsearch

elasticsearch version : 7.8.1
[
...
{
"target" : [
{ "docId" : "operator" },
{ "docId" : "test" },
{ "docId" : "abcde" },
]
}
...
]
Hello?
I want to delete the element whose docId is operator from the above array called target in elasticsearch.
I tried the below but failed.
What part of my code is wrong?
{
"query": {
"terms": {
"target": [
{
"docId": "operator"
}
]
}
},
"script": {
"source": " for (int i = 0; i < ctx._source.target.length(); i++) { if (ctx._source.target[i].docId == params.docId) { ctx._source.target.remove(i);}}",
"params": {
"docId": "operator"
}
}
}

The query was wrong and length() was not a function either.
{
"query": {
"bool": {
"filter": {
"term": {
"target.docId": "operator"
}
}
}
},
"script": {
"lang": "painless",
"source": " for (int i = 0; i < ctx._source.target.length; i++) { if (ctx._source.target[i].docId == params.docId) { ctx._source.target.remove(i);}}",
"params": {
"docId": "operator"
}
}
}

Related

Elasticsearch- how to update by query on a nested field that contains a '-'

I have some data in an elasticsearch index that looks like this:
{
"field1" : {
"en-US" : {
"value1" : "example",
"value2" : "example2"
}
},
"field2" : "x"
}
I need to update the value of the nested fields by query and my update script looks like this:
{
"query": {
"bool": {
"must": [
{
"terms": {
"field2": [
"x"
]
}
}
]
}
},
"script": {
"lang": "painless",
"params": {
"value": example
},
"source": "ctx._source.field1.en-US.value1 = params.value"
}
}
This gives an error as it takes the '-' b/w en-US as the subtraction operation; I can update the whole object by making this change to the script:
"script": {
"lang": "painless",
"params": {
"value": {
"value1" : "example3"
}
},
"source": "ctx._source.field1['en-US'] = params.value"
}
This works but I have to update the whole object but not one particular key inside the object. Is there a way to achieve this?

How remove an element in all documents of an index in elasticsearch?

I have list of documents in an index of Elasticsearch like below:
...
{
"_index" : "index-name",
"_type" : "_doc",
"_id" : "table1c7151240c583e60c8e2cbad351",
"_score" : 0.28322574,
"_source" : {
...
"table.tag" : {
"datasources_keys" : [...],
"tags" : [
"6e7358e2bfc84c34af32a01f6d19e9b2",
"ab450ae5c1734fb0aad5fed052b42023",
"f725e3100bba4b5eb8a5199a2b3e62fc"
]
}
}
},
...
I wanna delete an element in all documents.. for example should remove a specified tag_id in tags like "6e7358e2bfc84c34af32a01f6d19e9b2" .. how should I write a script for that? Is there other way in elasticsearch?
I'm using this script.. but it doesnt work!!
POST index-name/_update_by_query
{
"query": {
"match":{
"table.tag.tags": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
},
"script": {
"source": "ctx._source['table.tag']['tags'] -= 6e7358e2bfc84c34af32a01f6d19e9b2",
"lang": "painless"
}
}
Here is a more concise way with implicit list iterations and if conditions (+ it's a one-liner 😉):
POST index-name/_update_by_query
{
"query": {
"match": {
"table.tag.tags": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
},
"script": {
"lang": "painless"
"source": "ctx._source['table.tag']['tags'].removeIf(tag -> tag == params.tag);",
"params": {
"tag": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
}
}
UPDATE
You can add your second condition like this:
ctx._source['table.tag']['tags'].removeIf(tag -> tag == params.tag);
if (ctx._source['table.tag']['tags'].size() == 0) {
ctx._source['table.tag'].remove('tags');
}
You can try below script:
POST index-name/_update_by_query
{
"query": {
"match": {
"table.tag.tags": "6e7358e2bfc84c34af32a01f6d19e9b2"
}
},
"script": {
"source": """
for (int i = 0; i < ctx._source['table.tag']['tags'].length; i++)
{
if(ctx._source['table.tag']['tags'][i]=='6e7358e2bfc84c34af32a01f6d19e9b2')
{
ctx._source['table.tag']['tags'].remove(i);
}
}"""
}
}

Elasticsearch scripting array size

Can anyone help me to construct below query. I get below error, when running this query. ES version is 7.9.0;
In my model there is a field "repliedBy" which is an array field. It's value is always initialized with empty array. But on some entities it has one or couple of objects. I need to write a query to get all items with empty array only.
GET myTable/_search
{
"query": {
"bool": {
"must": [
{
"script": {
"script": {
"source": "doc['repliedBy'].size() == params.val",
"params": {
"val": 0
}
}
}
},
{
"range": {
"receivedDate": {
"gte": "2020-09-15T07:51:21.000Z",
"lte": "2020-12-01T07:51:21.000Z"
}
}
}
]
}
}
}
Error:
"error" : {
"root_cause" : [
{
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"org.elasticsearch.search.lookup.LeafDocLookup.get(LeafDocLookup.java:90)",
"org.elasticsearch.search.lookup.LeafDocLookup.get(LeafDocLookup.java:41)",
"doc['repliedBy'].size() == params.val",
" ^---- HERE"
],
"script" : "doc['repliedBy'].size() == params.val",
"lang" : "painless",
"position" : {
"offset" : 4,
"start" : 0,
"end" : 37
}
}
],
This is the job for a bool/must_not/exists query combination, like this:
{
"query": {
"bool": {
"must_not": [
{
"exists": {
"field": "repliedBy.id"
}
}
],
"filter": [
{
"range": {
"receivedDate": {
"gte": "2020-09-15T07:51:21.000Z",
"lte": "2020-12-01T07:51:21.000Z"
}
}
}
]
}
}
}

How to subtract two values in Elasticsearch

I'm trying to get the difference between two fields. I'm using Elasticsearch 5.5.
I have already tried
{
"query": {
"bool": {
"filter": {
"script": {
"script": "doc['students.total_fee'].value - doc['students.paid_fee'].value"
}
}
}
}
}
but it is returning empty "hits".
I have also tried
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "students",
"query": {
"script": {
"script": {
"inline": "doc['students.total_fee'].value - doc['students.paid_fee'].value",
"lang": "expression"
}
}
}
}
}
]
}
}
and it is returning "0".
also the "script_fields" did not worked.
{ "script_fields" :
{ "difference" :
{ "script" : "doc['students.total_fee'].value - doc['students.paid_fee'].value"
} } }
suppose I have data in following format.
"_source" : {
"students" : [
{
"name" : "A",
"total_fee" : 12345,
"paid_fee" : 12344.8
},
{
"name" : "B",
"total_fee" : 23456,
"paid_fee" : 23455.6
}
]
}
Now I want to get the difference between "total_fee" and "paid_fee" for each student.
I expect to get an array of differences for all students.
Thanks in advance :D
You need to use the below query and my es version is 6.2.2. It is giving a perfect result. But remember scripting is generally CPU intensive.
If you normal field then below query working fine.
{
"size": 10,
"script_fields": {
"fare_diff": {
"script": "doc[\"students.total_fee\"].value - doc[\"students.paid_fee\"].value"
}
}
}
If you are using a nested field parameter then the query would be like below.
{
"script_fields": {
"fare_diff": {
"script": {
"lang": "painless",
"source": "int total = 0; def l = new ArrayList(); for (int i = 0; i < params['_source']['students'].size(); ++i) { l.add(params['_source']['students'][i]['total_fee'] - params['_source']['students'][i]['paid_fee']);} return l.toArray();"
}
}
}
}
Reason: Because nested documents are indexed as separate documents, they can only be accessed within the scope of the nested query, the nested/reverse_nested aggregations, or nested inner hits.

Elasticsearch querying number of dates in array matching query

I have documents in the following form
PUT test_index/_doc/1
{
"dates" : [
"2018-07-15T14:12:12",
"2018-09-15T14:12:12",
"2018-11-15T14:12:12",
"2019-01-15T14:12:12",
"2019-03-15T14:12:12",
"2019-04-15T14:12:12",
"2019-05-15T14:12:12"],
"message" : "hello world"
}
How do I query for documents such that there are n number of dates within the dates array falling in between two specified dates?
For example: Find all documents with 3 dates in the dates array falling in between "2018-05-15T14:12:12" and "2018-12-15T14:12:12" -- this should return the above document as "2018-07-15T14:12:12", "2018-09-15T14:12:12" and "2018-11-15T14:12:12" fall between "2018-05-15T14:12:12" and "2018-12-15T14:12:12".
I recently faced the same problem. However came up with two solutions.
1) If you do not want to change your current mapping, you could query for the documents using query_string. Also note you will have to create the query object according to the range that you have. ("\"2019-04-08\" OR \"2019-04-09\" OR \"2019-04-10\" ")
{
"query": {
"query_string": {
"default_field": "dates",
"query": "\"2019-04-08\" OR \"2019-04-09\" OR \"2019-04-10\" "
}
}
}
However,this type of a query only makes sense if the range is short.
2) So the second way is the nested method. But you will have to change your current mapping in such a way.
{
"properties": {
"dates": {
"type": "nested",
"properties": {
"key": {
"type": "date",
"format": "YYYY-MM-dd"
}
}
}
}
}
So your query will look something like this :-
{
"query": {
"nested": {
"path": "dates",
"query": {
"bool": {
"must": [
{
"range": {
"dates.key": {
"gte": "2018-04-01",
"lte": "2018-12-31"
}
}
}
]
}
}
}
}
}
You can create dates as a nested document and use bucket selector aggregation.
{
"empId":1,
"dates":[
{
"Days":"2019-01-01"
},
{
"Days":"2019-01-02"
}
]
}
Mapping:
"mappings" : {
"properties" : {
"empId" : {
"type" : "keyword"
},
"dates" : {
"type" : "nested",
"properties" : {
"Days" : {
"type" : "date"
}
}
}
}
}
GET profile/_search
{
"query": {
"bool": {
"filter": {
"nested": {
"path": "dates",
"query": {
"range": {
"dates.Days": {
"format": "yyyy-MM-dd",
"gte": "2019-05-01",
"lte": "2019-05-30"
}
}
}
}
}
}
},
"aggs": {
"terms_parent_id": {
"terms": {
"field": "empId"
},
"aggs": {
"availabilities": {
"nested": {
"path": "dates"
},
"aggs": {
"avail": {
"range": {
"field": "dates.Days",
"ranges": [
{
"from": "2019-05-01",
"to": "2019-05-30"
}
]
},
"aggs": {
"count_Total": {
"value_count": {
"field": "dates.Days"
}
}
}
},
"max_hourly_inner": {
"max_bucket": {
"buckets_path": "avail>count_Total"
}
}
}
},
"bucket_selector_page_id_term_count": {
"bucket_selector": {
"buckets_path": {
"children_count": "availabilities>max_hourly_inner"
},
"script": "params.children_count>=19;" ---> give the number of days that should match
}
},
"hits": {
"top_hits": {
"size": 10
}
}
}
}
}
}
I found my own answer to this, although I'm not sure how efficient it is compared to the other answers:
GET test_index/_search
{
"query":{
"bool" : {
"filter" : {
"script" : {
"script" : {"source":"""
int count = 0;
for (int i=0; i<doc['dates'].length; ++i) {
if (params.first_date < doc['dates'][i].toInstant().toEpochMilli() && doc['dates'][i].toInstant().toEpochMilli() < params.second_date) {
count += 1;
}
}
if (count >= 2) {
return true
} else {
return false
}
""",
"lang":"painless",
"params": {
"first_date": 1554818400000,
"second_date": 1583020800000
}
}
}
}
}
}
}
where the parameters are the two dates in epoch time. I've chosen 2 matches here, but obviously you can generalise to any number.

Resources