Boosting an Elasticsearch result by 'age' if applicable - elasticsearch

I want to search multiple indices in Elasticsearch (news items in search_news and documents in search_documents) and whenever an index has a publicationDate field (news items only), I want to 'sort' this, so I boost newer news items. I am using Elasticsearch 6.8.
I found the script_scoring example in https://dzone.com/articles/23-useful-elasticsearch-example-queries (last one). But this throws errors and based on the documentation I came up to
GET /search_*/_search
{
"query": {
"function_score": {
"query": {
"bool": {
"must": {
"query_string": {
"query": "Lorem Ipsum"
}
},
"must_not": {
"exists": {
"field": "some_exlusion_field"
}
}
}
},
"script_score": {
"script": {
"params" : {
"threshold": "2019-04-04"
},
"source": "publishDate = doc['publishDate'].value; if (publishDate > Date.parse('yyyy-MM-dd', threshold).getTime()) { return log(2.5) } return log(1);"
}
}
}
}
}
This results in the error:
{
"error": {
"root_cause": [
{
"type": "script_exception",
"reason": "compile error",
"script_stack": [
"publishDate = doc['publis ...",
"^---- HERE"
],
"script": "publishDate = doc['publishDate'].value; if (publishDate > Date.parse('yyyy-MM-dd', threshold).getTime()) { return log(2.5) } return log(1);",
"lang": "painless"
}
}
I managed to minify the source to:
"source": "if (doc['publishDate'] > '2019-04-04') { return 5 } return 1;"
But no success:
"failures" : [
{
"shard" : 0,
"index" : "search_document_page",
"node" : "c0iLpxiJRqmgwS0KY8OybA",
"reason" : {
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"org.elasticsearch.search.lookup.LeafDocLookup.get(LeafDocLookup.java:81)",
"org.elasticsearch.search.lookup.LeafDocLookup.get(LeafDocLookup.java:39)",
"if (doc['publishDate'] > '2019-04-04') { ",
" ^---- HERE"
],
"script" : "if (doc['publishDate'] > '2019-04-04') { return 5 } return 1;",
"lang" : "painless",
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "No field found for [publishDate] in mapping with types []"
}
}
},
{
"shard" : 0,
"index" : "search_news",
"node" : "c0iLpxiJRqmgwS0KY8OybA",
"reason" : {
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"if (doc['publishDate'] > '2019-04-04') { ",
" ^---- HERE"
],
"script" : "if (doc['publishDate'] > '2019-04-04') { return 5 } return 1;",
"lang" : "painless",
"caused_by" : {
"type" : "class_cast_exception",
"reason" : "Cannot apply [>] operation to types [org.elasticsearch.index.fielddata.ScriptDocValues.Dates] and [java.lang.String]."
}
}
}
]
}
}
Any suggestion for checking the existence of the field in doc and how to check the date properly?

For the existence check ( doc here ) :
if (!doc.containsKey('publishDate')) {
return 1;
}
And for the date comparison, you can try this way
if (Date.parse('yyyy-MM-dd', params.threshold).getMillis() > doc['publishDate'].getMillis()) {
return 5;
} else {
return 1;
}

Related

cannot convert MethodHandle(Dates)JodaCompatibleZonedDateTime to (Object)double

I am trying to add conditions if field exist, then sort according to it otherwise use another field. Since one of either will exist.
Here is my query:
GET /my_index/_search
{
"query": {
"match_all": {}
},
"sort": {
"_script": {
"type":"number",
"script": "if(doc['contextDates.event.date'].value != 0){ return doc['contextDates.event.date'].value} else { return doc['contextDates.start.date'].value}",
"order": "asc"
}
}
}
When I execute this query, I get following error:
"failed_shards" : [
{
"shard" : 0,
"index" : "my_inedx",
"node" : "UxKwS8SIR-uIbzo5_0IbcQ",
"reason" : {
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"return doc['contextDates.event.date'].value} else { ",
" ^---- HERE"
],
"script" : "if(doc['contextDates.event.date'].value != 0){ return doc['contextDates.event.date'].value} else { return doc['contextDates.start.date'].value}",
"lang" : "painless",
"position" : {
"offset" : 84,
"start" : 47,
"end" : 99
},
"caused_by" : {
"type" : "wrong_method_type_exception",
"reason" : "cannot convert MethodHandle(Dates)JodaCompatibleZonedDateTime to (Object)double"
}
}
}
]
I have tried Double.parseDouble method as well but it doesn't work. This is what I have inside document for contextDates
"contextDates" : {
"event" : {
"date" : "2020-06-26T00:00:00.000Z",
"resolution" : "day",
"score" : 0,
"type" : "event"
}
}
The doc value you're getting is of type JodaCompatibleZonedDateTime which you're trying to compare to a double value, so you need to modify your script like this
if(doc['contextDates.event.date'].value.getMillis() != 0){ return doc['contextDates.event.date'].value.getMillis()} else { return doc['contextDates.start.date'].value.getMillis()}

Elasticsearch Query DSL: Length of field, if field exists

Say I have a field, data.url. Some our logs contain this field, some do not. I want to return only results where data.url is more than, say, 50 characters long. Really I just need a list of URLs.
I'm trying:
GET _search
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['data.url'].value.length() > 50",
"lang": "painless"
}
}
}
}
}
}
But get mixed errors:
{
"error" : {
"root_cause" : [
{
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"org.elasticsearch.search.lookup.LeafDocLookup.get(LeafDocLookup.java:90)",
"org.elasticsearch.search.lookup.LeafDocLookup.get(LeafDocLookup.java:41)",
"doc['data.url'].value.length() > 50",
" ^---- HERE"
],
"script" : "doc['data.url'].value.length() > 50",
"lang" : "painless",
"position" : {
"offset" : 4,
"start" : 0,
"end" : 35
}
},
or
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"org.elasticsearch.index.fielddata.ScriptDocValues$Strings.get(ScriptDocValues.java:496)",
"org.elasticsearch.index.fielddata.ScriptDocValues$Strings.getValue(ScriptDocValues.java:503)",
"doc['data.url'].value.length() > 50",
" ^---- HERE"
],
"script" : "doc['data.url'].value.length() > 50",
"lang" : "painless",
"position" : {
"offset" : 15,
"start" : 0,
"end" : 35
}
With
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "No field found for [data.url] in mapping with types []"
}
and sometimes
"caused_by" : {
"type" : "illegal_state_exception",
"reason" : "A document doesn't have a value for a field! Use doc[<field>].size()==0 to check if a document is missing a field!"
}
This field definitely exists; I can see it in the logs, search in the search field, and using term works:
GET _search
{
"query": {
"bool": {
"filter": {
"term": {
"data.url": "www.google.com"
}
}
}
}
}
What am I missing?
I'm using Elasticsearch 7.8.
Since you are using version 7.*, you need to use this below script query
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['data.url.keyword'].length > 50",
"lang": "painless"
}
}
}
}
}
}
If data.url field is of keyword type, then ignore the ".keyword" at the end of the field

Add date field and boolean with ? in name to existing Elasticsearch documents

We need to add two new fields to an existing ElasticSearch (7.9 oss) instance.
Field 1: Date Field
We want to add an optional date field. It shouldn't have a value upon creation.
How to do this with update_by_query?
Tried this:
POST orders/_update_by_query
{
"query": {
"match_all": {}
},
"script": {
"source": "ctx._source.new_d3_field",
"lang": "painless",
"type": "date",
"format": "yyyy/MM/dd HH:mm:ss"
}
}
Field 2: Boolean field with ? in name
We want to keep the ? so that it matches the other fields that we already have in ES.
Also worth noting that even removing the ? and doing the below the field doesn't appear to be a boolean.
Tried this:
POST orders/_update_by_query
{
"query": {
"match_all": {}
},
"script": {
"source": "ctx._source.new_b_field? = false",
"lang": "painless"
}
}
Which gave the error:
{
"error" : {
"root_cause" : [
{
"type" : "script_exception",
"reason" : "compile error",
"script_stack" : [
"ctx._source.new_b_field? = false",
" ^---- HERE"
],
"script" : "ctx._source.new_b_field? = false",
"lang" : "painless",
"position" : {
"offset" : 25,
"start" : 0,
"end" : 32
}
}
],
"type" : "script_exception",
"reason" : "compile error",
"script_stack" : [
"ctx._source.new_b_field? = false",
" ^---- HERE"
],
"script" : "ctx._source.new_b_field? = false",
"lang" : "painless",
"position" : {
"offset" : 25,
"start" : 0,
"end" : 32
},
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "invalid sequence of tokens near ['='].",
"caused_by" : {
"type" : "no_viable_alt_exception",
"reason" : null
}
}
},
"status" : 400
}
Also tried:
POST orders/_update_by_query?new_b_field%3F=false
Which gave:
{
"error" : {
"root_cause" : [
{
"type" : "illegal_argument_exception",
"reason" : "request [/orders/_update_by_query] contains unrecognized parameter: [new_b_field?]"
}
],
"type" : "illegal_argument_exception",
"reason" : "request [/orders/_update_by_query] contains unrecognized parameter: [new_b_field?]"
},
"status" : 400
}
If you want to add two new fields to an existing ElasticSearch index that don't have value upon creation you should update its mapping using Put mapping API
PUT /orders/_mapping
{
"properties": {
"new_d3_field": {
"type": "date",
"format": "yyyy/MM/dd HH:mm:ss"
},
"new_b_field?": {
"type": "boolean"
}
}
}
If you still want to use _update_by_query you should set an initial value, then the field will be added.
POST orders/_update_by_query?wait_for_completion=false&conflicts=proceed
{
"query": {
"match_all": {}
},
"script": {
"source": "ctx._source.new_d3_field=params.date;ctx._source.new_b_field = params.val",
"lang": "painless",
"params": {
"date": "1980/01/01",
"val": false
}
}
}
Update By Query API is used to update documents so I guess you can't add a field to your schema without updating at list one doc. what you can do is to set a dummy doc and update only this certain doc. Something like that:
POST orders/_update_by_query
{
"query": {
"match": {
"my-field":"my-value"
}
},
"script": {
"source": "ctx._source.new_d3_field=params.date;ctx._source.new_b_field = params.val",
"lang": "painless",
"params": {
"date": "1980/01/01",
"val": false
}
}
}

How can i use Java's ArrayList class in painless script of Elasticsearch?

I want to write a script for script_score in elasticsearch.
In Painless Documentation there is a list of java classes under "Shared api reference".
GET hockey/_search
{
"explain": true,
"query": {
"match_all": {}
},
"script_fields": {
"total_goals": {
"script": {
"lang": "painless",
"source": """
int[] arr = new int[3];
arr[0] = 1;
arr[1] = 2;
arr[2] = 3;
return arr;
""",
"params":{
"last" : "any parameters required"
}
}
}
}
}
Above script works as expected. But i want to use Java's ArrayList or some other Class instead.
GET hockey/_search
{
"explain": true,
"query": {
"match_all": {}
},
"script_fields": {
"total_goals": {
"script": {
"lang": "painless",
"source": """
ArrayList<Integer> al = new ArrayList<Integer>();
al.add(1);
al.add(2);
return al;
""",
"params":{
"last" : "any parameters required"
}
}
}
}
}
this throws following error.
{
"error" : {
"root_cause" : [
{
"type" : "script_exception",
"reason" : "compile error",
"script_stack" : [
"\n ArrayList<Integer> al = new ArrayL ...",
" ^---- HERE"
],
"script" : "\n ArrayList<Integer> al = new ArrayList<Integer>();\n al.add(1);\n al.add(2);\n return al;\n \n ",
"lang" : "painless"
}
],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [
{
"shard" : 0,
"index" : "hockey",
"node" : "UIMgEAZNRzmIpRGyQtNk9g",
"reason" : {
"type" : "script_exception",
"reason" : "compile error",
"script_stack" : [
"\n ArrayList<Integer> al = new ArrayL ...",
" ^---- HERE"
],
"script" : "\n ArrayList<Integer> al = new ArrayList<Integer>();\n al.add(1);\n al.add(2);\n return al;\n \n ",
"lang" : "painless",
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "invalid sequence of tokens near ['<'].",
"caused_by" : {
"type" : "no_viable_alt_exception",
"reason" : null
}
}
}
}
]
},
"status" : 400
}
If it is possible to use ArrayList class then,
Do i have to import it, from java.util ?
You can use either ArrayList al = new ArrayList(); or even simply def al = new ArrayList(); More info in the docs.
FYI: you can make use of Debug.explain(al); to check what's what!

Elasticsearch, can't remove a field inside nested field

I have mappings
{
"candidate-index" : {
"mappings" : {
"properties" : {
"provider_candidates" : {
"type" : "nested",
"properties" : {
"foo" : {
"type" : "object"
},
"group_key" : {
"type" : "keyword"
}
}
}
}
}
}
I want to delete foo field
POST /candidate-index/_update_by_query
{
"script" : "ctx._source.remove(\"provider_candidates.foo\")",
"query": {
"nested": {
"path": "provider_candidates",
"query": {
"bool": {
"must": [
{
"exists": {
"field": "provider_candidates.foo"
}
}
]
}
}
}
}
}
It doesn't work. It doesn't generate an error, but the field is not removed.
I know the query part is correct, because if I turn it into _search it correctly finds documents
I also tried
POST /candidate-index/_update_by_query
{
"script" : "ctx._source.provider_candidates.remove(\"foo\")",
"query": {
"nested": {
"path": "provider_candidates",
"query": {
"bool": {
"must": [
{
"exists": {
"field": "provider_candidates.foo"
}
}
]
}
}
}
}
}
it says
{
"error" : {
"root_cause" : [
{
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"ctx._source.provider_candidates.remove(\"foo\")",
" ^---- HERE"
],
"script" : "ctx._source.provider_candidates.remove(\"foo\")",
"lang" : "painless"
}
],
"type" : "script_exception",
"reason" : "runtime error",
"script_stack" : [
"ctx._source.provider_candidates.remove(\"foo\")",
" ^---- HERE"
],
"script" : "ctx._source.provider_candidates.remove(\"foo\")",
"lang" : "painless",
"caused_by" : {
"type" : "wrong_method_type_exception",
"reason" : "cannot convert MethodHandle(List,int)Object to (Object,String)Object"
}
},
"status" : 400
}
You need to loop provider_candidates field and then delete field inside it
POST /index51/_update_by_query
{
"script" : "for (int i = 0; i < ctx._source.provider_candidates.length; ++i) { ctx._source.provider_candidates[i].remove(\"foo\") }",
"query": {
"nested": {
"path": "provider_candidates",
"query": {
"bool": {
"must": [
{
"exists": {
"field": "provider_candidates.foo"
}
}
]
}
}
}
}
}

Resources