Elasticsearch: upsert working on older version but not on newer version - elasticsearch

I have an upsert query running in bulk. Final document is to be stored like this:
{
"email": "abc#xyz.com",
"sources": [1,2]
}
Here is the code:
var doc = {
"source": parseInt(id),
"email": email
}
var upsert_query = {
"script": "if (ctx._source.containsKey(\"sources\")) { if (!ctx._source.sources.contains(source)) { ctx._source.sources += source; } } else {ctx._source.sources = [source] }",
"params": {
"source": doc.source
},
"upsert": {
"email": doc.email,
"sources": [doc.source]
}
}
bulkRequestBody.push({"update": {"_index": "my_index", "_type": "email", "_id": doc.email, "_retry_on_conflict": 3}});
bulkRequestBody.push(upsert_query);
The code works perfectly fine on elasticsearch version 1.4 but not working on version 2.1.1.
I also tried to restructure my query:
var upsert_query = {
"script": {
"inline": "if (ctx._source.containsKey(\"sources\")) { if (!ctx._source.sources.contains(source)) { ctx._source.sources += source; } } else {ctx._source.sources = [source] }",
"params": {
"source": doc.source
}
},
"upsert": {
"email": doc.email,
"sources": [doc.source]
}
}
but still no luck. Any help ?

Scripting needs to be enabled to run scripts like this:
in the elasticsearch.yml file in config, add the following lines:
script.inline: on
script.indexed: on

Related

How to filter match in top 3 - elasticsearch?

I am having the following data in the elasticsearch
{
"_index": "media",
"_type": "information",
"_id": "6838",
"_source": {
"demographics_countries": {
"AE": 0.17543859649122806,
"CA": 0.013157894736842105,
"FR": 0.017543859649122806,
"GB": 0.043859649122807015,
"IT": 0.02631578947368421,
"LB": 0.013157894736842105,
"SA": 0.49122807017543857,
"TR": 0.017543859649122806,
"US": 0.09210526315789472
}
}
},
{
"_index": "media",
"_type": "information",
"_id": "57696",
"_source": {
"demographics_countries": {
"TN": 0.8125,
"MA": 0.034375,
"DZ": 0.032812,
"FR": 0.0125,
"EG": 0.0125,
"IN": 0.009375,
"SA": 0.009375
}
}
]
Expected result:
Find out an document having specific country SA (saudi arabia) is among top 3 in demographics_countries
For example:
"_id": "6838" (first document) is matched because SA (saudi arabia) is among top 3 in the demographics_countries in the above mentioned example document.
Tried ? : I have tried to filter using top_hits, But it's not working as expected.
Any suggestion will be grateful
With the current data model it's quite difficult to do that. What I'd suggest might be not the easiest way to do it, but it will definitely be the fastest to query eventually.
I'd suggest remodelling your documents to already include top countries:
[
{
"_index": "media",
"_type": "information",
"_id": "6838",
"_source": {
"top_demographics_countries": ["TN", "MA", "DZ"],
"demographics_countries": {
"AE": 0.17543859649122806,
"CA": 0.013157894736842105,
"FR": 0.017543859649122806,
"GB": 0.043859649122807015,
"IT": 0.02631578947368421,
"LB": 0.013157894736842105,
"SA": 0.49122807017543857,
"TR": 0.017543859649122806,
"US": 0.09210526315789472
}
}
},
{
"_index": "media",
"_type": "information",
"_id": "57696",
"_source": {
"top_demographics_countries": ["TN", "MA", "DZ"],
"demographics_countries": {
"TN": 0.8125,
"MA": 0.034375,
"DZ": 0.032812,
"FR": 0.0125,
"EG": 0.0125,
"IN": 0.009375,
"SA": 0.009375
}
}
}
]
Ignore values I've picked for top_demographics_countries. With this kind of approach, you can always precalculate top and then you could use a simple terms query to check if document contains that value or not:
{
"query": {
"bool": {
"filter": {
"term": {
"top_demographics_countries": "SA"
}
}
}
}
}
It's going to be cheaper to compute them once during saving compared to always building that clause dynamically.
#Evaldas is right -- it's better to extract the top 3 beforehand.
But if you can't help yourself and feel compelled to use java/painless, here's one approach:
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "demographics_countries.SA"
}
},
{
"script": {
"script": {
"source": """
def tuple_list = new ArrayList();
for (def c : params.all_countries) {
def key = 'demographics_countries.'+c;
if (!doc.containsKey(key) || doc[key].size() == 0) {
continue;
}
def val = doc[key].value;
tuple_list.add([c, val]);
}
// sort tuple list by the country values
Collections.sort(tuple_list, (arr1, arr2) -> arr1[1] < arr2[1] ? 1 : -1);
// slice & take only the top 3
def top_3_countries = tuple_list.subList(0, 3).stream().map(arr -> arr[0]).collect(Collectors.toList());
return top_3_countries.size() >=3 && top_3_countries.contains(params.country_of_interest);
""",
"params": {
"country_of_interest": "SA",
"all_countries": [
"AE",
"CA",
"FR",
"GB",
"IT",
"LB",
"SA",
"TR",
"US",
"TN",
"MA",
"DZ",
"EG",
"IN"
]
}
}
}
}
]
}
}
}

Elasticsearch: Update/upsert an array field inside a document but ignore certain existing fields

GET _doc/1
"_source": {
"documents": [
{
"docid": "ID001",
"added_vals": [
{
"code": "123",
"label": "Abc"
},
{
"code": "113",
"label": "Xyz"
}
]
},
{
"docid": "ID002",
"added_vals": [
{
"code": "123",
"label": "Abc"
}
]
}
],
"id": "1"
}
POST /_bulk
{ "update": { "_id": "1"}}
{ "doc": { "documents": [ { "docid": "ID001", "status" : "cancelled" } ], "id": "1" }, "doc_as_upsert": true }
The problem above is when I run my bulk update script it replaces that document field, removing the added_vals list. Would I be able to achieve this using painless script? Thank you.
Using elasticsearch painless scripting
POST /_bulk
{ "update": { "_id": "1"} }
{ "scripted_upsert":true, "script" :{ "source": "if(ctx._version == null) { ctx._source = params; } else { def param = params; def src = ctx._source; for(s in src.documents) { boolean found = false; for(p in param.documents) { if (p.docid == s.docid) { found = true; if(s.added_vals != null) { p.added_vals = s.added_vals; } } } if(!found) param.documents.add(s); } ctx._source = param; }", "lang": "painless", "params" : { "documents": [ { "docid": "ID001", "status" : "cancelled" } ], "id": "1" } }, "upsert" : { } }
well, this one worked for me. I need to tweak a few more things that I require, but I will just leave it here for someone who may need it. Didnt know it was this simple. If there is any other answer that might be easier, please do submit so. Thanks.
"script" :
if(ctx._version == null)
{
ctx._source = params;
}
else
{
def param = params;
def src = ctx._source;
for(s in src.documents)
{
boolean found = false;
for(p in param.documents)
{
if (p.docid == s.docid)
{
found = true;
if(s.added_vals != null)
{
p.added_vals = s.added_vals;
}
}
}
if(!found) param.documents.add(s);
}
ctx._source = param;
}
I am not sure if I should modify the params directly so I used and pass the params to the param variable. I also used scripted_upsert: true with a ctx._version not null check.

How to upsert nested objects with dynamic properties in Elastic Search?

I have a document in elasticsearch that looks like this:
{
"_index": "stats",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"publishTime": {
"lastUpdate": 1580991095131,
"h0_4": 0,
"h4_8": 0,
"h8_12": 3,
"h12_16": 5,
"h16_20": 2,
"h20_24": 1
},
"postCategories": {
"lastUpdate": 1580991095131,
"tech": 56,
"lifestyle": 63,
"healthcare": 49,
"finances": 25,
}
}
}
Updating/Incrementing existing property values by sending a POST request to /stats/_update/1 works great! However, if I try to upsert a non-existing property name under postCategories, I get a Bad Request (400) error of type remote_transport_exception/illegal_argument_exception:
"ctx._source.postCategories.relationships += params.postCategories.relationships",
^---- HERE"
Upsert
{
"script": {
"source": "ctx._source.postCategories.relationships += params.postCategories.relationships",
"lang": "painless",
"params": {
"postCategories": {
"relationships": 2
}
}
},
"upsert": {
"postCategories": {
"relationships": 2
}
}
}
I also tried the Scripted Upsert method by following the documentation from here, however, the same error occurs:
Scripted Upsert
{
"scripted_upsert":true,
"script": {
"source": "ctx._source.postCategories.relationships += params.postCategories.relationships",
"params": {
"postCategories": {
"relationships": 2
}
}
},
"upsert": {}
}
Can anyone tell me how can I properly add/upsert new property names under postCategories object, please?
Thank You!
Its basically saying that you are trying to assign a value to a field that doesnt exist. I think below should work(not tested).
Try to check if field exists - continue with operation if it exists.
Else add new field and assign value.
"if (ctx._source.postCategories.containsKey(\"relationships\")) { ctx._source.postCategories.relationships += params.postCategories.relationships} else { ctx._source.postCategories[\"relationships\"] = params.postCategories.relationships}",

MongoDB aggregation query using spring

db.getCollection('questionbank').aggregate([
{ "$group": {
"_id": {
"technology": "$technology",
"level":"$level",
"type":"$type"
},
"Count": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.technology",
"QuestionCount": {
"$push": {
"level":"$_id.level",
"type":"$_id.type",
"count": "$Count"
},
}
}}
])
I am trying to get the same output structure.
Can anyone please help me to write above query in spring.
I have tried a lot but failed.
You can use the following .
group("technology", "level", "type").count().as("count"), group("_id.technology") .push( new BasicDBObject("level", "$_id.level") .append("type", "$_id.type") .append("count", "$count")) .as("questionCount")

Looking for Elasticsearch updateByQuery syntax example (Node driver)

You have an Elasticsearch index with two docs:
[
{
"_index": "myIndex",
"_type": "myType",
"_id": "es1472002807930",
"_source": {
"animal": "turtle",
"color": "green",
"weight": 20,
}
},
{
"_index": "myIndex",
"_type": "myType",
"_id": "es1472002809463",
"_source": {
"animal": "bear",
"color": "brown"
"weight": 400,
}
}
]
Later, you get this updated data about the bear:
{
"color": "pink",
"weight": 500,
"diet": "omnivore",
}
So, you want to update the "color" and "weight" values of the bear, and add the "diet" key to the "bear" doc. You know there's only one doc with "animal": "bear" (but you don't know the _id):
Using the Nodejs driver, what updateByQuery syntax would update the "bear" doc with these new values?
(NOTE: this question has been entirely edited to be more useful to the SO community!)
The answer was provided by Val in this other SO:
How to update a document based on query using elasticsearch-js (or other means)?
Here is the answer:
var theScript = {
"inline": "ctx._source.color = 'pink'; ctx._source.weight = 500; ctx._source.diet = 'omnivore';"
}
client.updateByQuery({
index: myindex,
type: mytype,
body: {
"query": { "match": { "animal": "bear" } },
"script": theScript
}
}, function(err, res) {
if (err) {
reportError(err)
}
cb(err, res)
}
)
The other answer is missing the point since it doesn't have any script to carry out the update.
You need to do it like this:
POST /myIndex/myType/_update_by_query
{
"query": {
"term": {
"animal": "bear"
}
},
"script": "ctx._source.color = 'green'"
}
Important notes:
you need to make sure to enable dynamic scripting in order for this to work.
if you are using ES 2.3 or later, then the update-by-query feature is built-in
if you are using ES 1.7.x or a former release you need to install the update-by-query plugin
if you are using anything between ES 2.0 and 2.2, then you don't have any way to do this in one shot, you need to do it in two operations.
UPDATE
Your node.js code should look like this, you're missing the body parameter:
client.updateByQuery({
index: index,
type: type,
body: {
"query": { "match": { "animal": "bear" } },
"script": { "inline": "ctx._source.color = 'pink'"}
}
}, function(err, res) {
if (err) {
reportError(err)
}
cb(err, res)
}
)
For elasticsearch 7.4 you could use
await client.updateByQuery({
index: "indexName",
body: {
query: {
match: { fieldName: "valueSearched" }
},
script: {
source: "ctx._source.fieldName = params.newValue",
lang: 'painless',
params: {
newValue: "newValue"
}
}
}
});

Resources