Order results by smallest absolute difference from input - elasticsearch

Can elasticsearch find the closest number to an input?
Example: I have apartments with 1, 2, 5, 6 and 10 rooms. I want a search for apartments with 5 rooms to order results by absolute difference (e.g. |6-5| = 1, |2-5| = 3 etc)
What I want to see: 5, 6, 2, 1, 10.
GET appartaments/_search
{
"query": {
"bool": {
"must":[
{
"match":{
"properties.id":1
}
},
{
"match":{
"properties.value":"5"
}
}
]
}
}
}

You can probably achieve what you want using script-based sorting:
GET appartaments/_search
{
"sort": {
"_script": {
"type": "number",
"script": {
"lang": "painless",
"source": "Math.abs(params.value - Integer.parseInt(doc['properties.value.keyword'].value))",
"params": {
"value": 5
}
},
"order": "asc"
}
},
"query": {
"bool": {
"must": [
{
"match": {
"properties.id": 1
}
}
]
}
}
}
Results =>
"hits" : [
{
"_index" : "appartaments",
"_type" : "_doc",
"_id" : "5",
"_score" : null,
"_source" : {
"properties" : {
"id" : 1,
"value" : "5"
}
},
"sort" : [
0.0
]
},
{
"_index" : "appartaments",
"_type" : "_doc",
"_id" : "6",
"_score" : null,
"_source" : {
"properties" : {
"id" : 1,
"value" : "6"
}
},
"sort" : [
1.0
]
},
{
"_index" : "appartaments",
"_type" : "_doc",
"_id" : "2",
"_score" : null,
"_source" : {
"properties" : {
"id" : 1,
"value" : "2"
}
},
"sort" : [
3.0
]
},
{
"_index" : "appartaments",
"_type" : "_doc",
"_id" : "1",
"_score" : null,
"_source" : {
"properties" : {
"id" : 1,
"value" : "1"
}
},
"sort" : [
4.0
]
},
{
"_index" : "appartaments",
"_type" : "_doc",
"_id" : "10",
"_score" : null,
"_source" : {
"properties" : {
"id" : 1,
"value" : "10"
}
},
"sort" : [
5.0
]
}
]
}

Related

Date histogram with different Timezones with Elasticsearch v.7.13.3

I have to acquire datas from different part of Timezones (example New York -6.00 and Rome +2.00).
In the document I have a field 'timestamp' defined as "data" and I have for example create a "date_histogram" for example from 8.00 AM to 9.00 AM. How can I match the USA 8.00-9.00 and the ITA 8.00-9.00 datas in order to compare the two data from the same period?
This is my datas with two different fuse. 2 from USA and 2 from ITA:
"hits" : [
{
"_index" : "test-data-2021-8-4",
"_type" : "_doc",
"_id" : "9tS4EHsB4Ke8qtFfYqbg",
"_score" : 1.0,
"_source" : {
"id" : "mtKDIsEfSr3I8AwCDE1Gjw_11",
"value" : 87.2,
"timestamp" : "2021-08-04T12:32:04+02:00"
}
},
{
"_index" : "test-data-2021-8-4",
"_type" : "_doc",
"_id" : "99S4EHsB4Ke8qtFfYqbg",
"_score" : 1.0,
"_source" : {
"id" : "mtKDIsEfSr3I8AwCDE1Gjw_5",
"value" : 31.0025,
"timestamp" : "2021-08-04T12:32:04+02:00"
}
},
{
"_index" : "test-data-2021-8-4",
"_type" : "_doc",
"_id" : "wdOREHsB4Ke8qtFfuZAf",
"_score" : 1.0,
"_source" : {
"id" : "mtKDIsEfSr3I8AwCDE1Gjw_11",
"value" : 15.1,
"timestamp" : "2021-08-04T05:49:50-04:00"
}
},
{
"_index" : "test-data-2021-8-4",
"_type" : "_doc",
"_id" : "wtOREHsB4Ke8qtFfuZAg",
"_score" : 1.0,
"_source" : {
"id" : "mtKDIsEfSr3I8AwCDE1Gjw_5",
"value" : 27.9457,
"timestamp" : "2021-08-04T05:49:50-04:00"
}
}
]
This is my date_histogram query:
GET /test-data-*/_search?size=10000
{
"aggs": {
"agg_sum": {
"date_histogram": {
"field": "timestamp",
"fixed_interval": "1h"
},
"aggs": {
"aggregazione": {
"sum": {
"field": "value"
}
}
}
}
},
"size": 0,
"fields": [
{
"field": "timestamp",
"format": "date_time"
}
],
"stored_fields": [
"*"
],
"query": {
"bool": {
"must": [],
"filter": [
{
"range": {
"timestamp": {
"gte": "2021-08-04T08:00:00.000",
"lte": "2021-08-04T09:00:00.000",
"format": "strict_date_optional_time"
}
}
}
],
"should": [],
"must_not": []
}
}
}
Thanks in advance for the response.

Elasticsearch equivalent SQL In Query with multiple fields

I am looking for a Elasticsearch equivalent SQL In query like below
SELECT * from table
WHERE (section , class) in (('a','1'), ('b', '2'))
I know how to use In query for single fields in elasticsearch
SELECT * FROM table WHERE class IN ('1', '2');
Elasticsearch query -
{
"query" : {
"bool" : {
"filter" : {
"terms" : {
"class" : ['1', '2']
}
}
}
}
}
My actual problem statement -
Sample index data :
[
{
"_index" : "some_index",
"_type" : "_doc",
"_id" : "41",
"_score" : 1.0,
"_source" : {
"class" : "1",
"section" : "a",
"attribute_3" : "hello world"
},
{
"_index" : "some_index",
"_type" : "_doc",
"_id" : "42",
"_score" : 1.0,
"_source" : {
"class" : "2",
"section" : "a",
"attribute_3" : "hello world"
},
{
"_index" : "some_index",
"_type" : "_doc",
"_id" : "43",
"_score" : 1.0,
"_source" : {
"class" : "1",
"section" : "b",
"attribute_3" : "hello world"
},
{
"_index" : "some_index",
"_type" : "_doc",
"_id" : "44",
"_score" : 1.0,
"_source" : {
"class" : "2",
"section" : "b",
"attribute_3" : "hello world"
},
{
"_index" : "some_index",
"_type" : "_doc",
"_id" : "45",
"_score" : 1.0,
"_source" : {
"class" : "3",
"section" : "b",
"attribute_3" : "hello world"
}
]
I want to use a filter on data where (class is 1 AND section is a) OR (class is 2 AND section is b)
Note : I'm preparing this 'OR' combination dynamically and its going to be more than two combinations.
My expected search result should be -
[{
"_index" : "some_index",
"_type" : "_doc",
"_id" : "41",
"_score" : 1.0,
"_source" : {
"class" : "1",
"section" : "a",
"attribute_3" : "hello world"
},
{
"_index" : "some_index",
"_type" : "_doc",
"_id" : "44",
"_score" : 1.0,
"_source" : {
"class" : "2",
"section" : "b",
"attribute_3" : "hello world"
}]
This would translate to:
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"a": 0
}
},
{
"term": {
"b": 9
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"a": 0
}
},
{
"term": {
"b": 4
}
}
]
}
}
]
}
}
}
but if a is always 0 as you mentioned in the example the query can be rephrased to:
{
"query": {
"bool": {
"must": [
{
"term": {
"a": 0
}
},
{
"terms": {
"b": [
9,
4
]
}
}
]
}
}
}

Find nearest timestamp

I'm using Elasticsearch 6.4.2, and I need to find the previous and next docs considering a specified timestamp.
Kinda like if I did a SELECT TOP 1 * from table WHERE date < 2019-01-01 ORDER BY date DESC and SELECT TOP 1 * from table WHERE date > 2019-01-01 ORDER BY date ASCon a SQL table, to find the previous and next records from 2019-01-01, you know?
Any ideas?
Data:
[
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "mceIBm4B1qXGA4PnKzvZ",
"_score" : 1.0,
"_source" : {
"id" : 1,
"date" : "2019-10-01"
}
},
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "mseIBm4B1qXGA4PnRDvs",
"_score" : 1.0,
"_source" : {
"id" : 2,
"date" : "2019-10-02"
}
},
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "m8eIBm4B1qXGA4PncDv9",
"_score" : 1.0,
"_source" : {
"id" : 3,
"date" : "2019-10-03"
}
},
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "nMeIBm4B1qXGA4Pnhjvs",
"_score" : 1.0,
"_source" : {
"id" : 4,
"date" : "2019-10-04"
}
},
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "nceIBm4B1qXGA4Pnmjtm",
"_score" : 1.0,
"_source" : {
"id" : 5,
"date" : "2019-10-05"
}
}
]
Query: I am using two filter and terms aggregations to get first date greater than and less to 2019-10-03
{
"size": 0,
"aggs": {
"above": {
"filter": {
"range": {
"date": {
"gt": "2019-10-03"
}
}
},
"aggs": {
"TopDocument": {
"terms": {
"field": "date",
"size": 1,
"order": {
"_term": "asc"
}
},
"aggs": {
"documents": {
"top_hits": {
"size": 10
}
}
}
}
}
},
"below":{
"filter": {
"range": {
"date": {
"lt": "2019-10-03"
}
}
},
"aggs": {
"TopDocument": {
"terms": {
"field": "date",
"size": 1,
"order": {
"_term": "desc"
}
},
"aggs": {
"documents": {
"top_hits": {
"size": 10
}
}
}
}
}
}
}
}
Response:
{
"below" : {
"doc_count" : 2,
"TopDocument" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 1,
"buckets" : [
{
"key" : 1569974400000,
"key_as_string" : "2019-10-02T00:00:00.000Z",
"doc_count" : 1,
"documents" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "mseIBm4B1qXGA4PnRDvs",
"_score" : 1.0,
"_source" : {
"id" : 2,
"date" : "2019-10-02"
}
}
]
}
}
}
]
}
},
"above" : {
"doc_count" : 2,
"TopDocument" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 1,
"buckets" : [
{
"key" : 1570147200000,
"key_as_string" : "2019-10-04T00:00:00.000Z",
"doc_count" : 1,
"documents" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index25",
"_type" : "_doc",
"_id" : "nMeIBm4B1qXGA4Pnhjvs",
"_score" : 1.0,
"_source" : {
"id" : 4,
"date" : "2019-10-04"
}
}
]
}
}
}
]
}
}
}
You can try this :
SELECT TOP 1 * from table WHERE date < 2019-01-01 ORDER BY date DESC
{
"sort": [
{
"date": {
"order": "desc"
}
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"date": {
"lt": "2019-01-01"
}
}
}
]
}
},
"size": 1
}
SELECT TOP 1 * from table WHERE date > 2019-01-01 ORDER BY date ASC
{
"sort": [
{
"date": {
"order": "asc"
}
}
],
"query": {
"bool": {
"filter": [
{
"range": {
"date": {
"gt": "2019-01-01"
}
}
}
]
}
},
"size": 1
}

Elasticsearch: How to optimize the source parameter in a script function?

I have the following data in an Elasticsearch index called products
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "products",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"prod_id" : 1,
"currency" : "USD",
"price" : 1
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"prod_id" : 2,
"currency" : "INR",
"price" : 60
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"prod_id" : 3,
"currency" : "EUR",
"price" : 2
}
},
{
"_index" : "products",
"_type" : "_doc",
"_id" : "5",
"_score" : 1.0,
"_source" : {
"prod_id" : 5,
"currency" : "MYR",
"price" : 1
}
}
]
}
}
I am sorting the data based on the price field,
I have the following script to do so -
GET products/_search
{
"query": {
"function_score": {
"query": {
"match_all": {}
},
"functions": [{
"script_score": {
"script": {
"params": {
"USD": 1,
"SGD": 0.72,
"MYR": 0.24,
"INR": 0.014,
"EUR": 1.12
},
"source": "doc['price'].value * (doc.currency.value == 'eur'? params.EUR : doc.currency.value == 'myr' ? params.MYR : doc.currency.value == 'inr' ? params.INR : 1)"
}
}
}]
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
]
}
Because the field currency in the product index is of type text,
it is indexed with Standard Analyzer, which converts it to lower case.
I wish to optimise this part of the script, As I may end up with 20-30 currencies -
"source": "doc['price'].value * (doc.currency.value == 'eur'? params.EUR : doc.currency.value == 'myr' ? params.MYR : doc.currency.value == 'inr' ? params.INR : 1)"
I was able to optimize the source script with the following working solution -
GET products/_search
{
"query": {
"function_score": {
"query": {
"match_all": {}
},
"functions": [{
"script_score": {
"script": {
"params": {
"USD": 1,
"SGD": 0.72,
"MYR": 0.24,
"INR": 0.014,
"EUR": 1.12
},
"source": "doc['price'].value * params[doc['currency.keyword'].value]"
}
}
}]
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
]
}

Is there a way in ElasticSearch to get the shortest (closest) word at top?

I have the words inside my index: "Kem, Kemi, Kemah, Kemer, Kemerburgaz, Kemang, Kembs, Kemnay, Kempley, Kempsey, Kemerovo".
When i search for "Kem" i want "Kemi" to come at top because it is the closest word. (Kem + i = Kemi). But it doesn't go the way i want.
Index:
{
"settings": {
"number_of_shards": 1,
"analysis": {
"filter": {
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 15
}
},
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"autocomplete_filter"
]
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"name": {
"fields": {
"keyword": {
"type": "keyword"
}
},
"type": "text",
"similarity": "classic",
"analyzer": "autocomplete",
"search_analyzer": "standard"
},
"id": {
"type": "keyword"
},
"slug": {
"type": "keyword"
},
"type": {
"type": "keyword"
}
}
}
}
}
Query:
{
"from" : 0, "size" : 10,
"query": {
"bool": {
"must": [
{
"match": {
"name": "Kem"
}
}
],
"should": [
{
"term": {
"name.keyword": {
"value": "Kem"
}
}
}
]
}
}
}
'
Result:
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 143,
"max_score" : 20.795834,
"hits" : [
{
"_index" : "destinations",
"_type" : "_doc",
"_id" : "lPL8Y2YBqxTX_xwrZlGc",
"_score" : 20.795834,
"_source" : {
"id" : "c6317201",
"name" : "Kem",
"slug" : "yurtdisi/karelya-cumhuriyeti/kem"
}
},
{
"_index" : "destinations",
"_type" : "_doc",
"_id" : "se78Y2YBqxTX_xwrVFIU",
"_score" : 8.61574,
"_source" : {
"id" : "c121023",
"name" : "Kemah",
"slug" : "yurtdisi/houston-ve-civari/kemah"
}
},
{
"_index" : "destinations",
"_type" : "_doc",
"_id" : "ze78Y2YBqxTX_xwrVFo5",
"_score" : 8.61574,
"_source" : {
"id" : "c1783",
"name" : "Kemerovo",
"slug" : "yurtdisi/kemerovo-oblasti/kemerovo"
}
},
{
"_index" : "destinations",
"_type" : "_doc",
"_id" : "xe78Y2YBqxTX_xwrVFs9",
"_score" : 8.61574,
"_source" : {
"id" : "c1786",
"name" : "Kemi",
"slug" : "yurtdisi/rovaniemi/kemi"
}
},
{
"_index" : "destinations",
"_type" : "_doc",
"_id" : "Tu78Y2YBqxTX_xwrVG-X",
"_score" : 8.61574,
"_source" : {
"id" : "c1900",
"name" : "Kempsey",
"slug" : "yurtdisi/new-south-wales/kempsey"
}
},
{
"_index" : "destinations",
"_type" : "_doc",
"_id" : "Bu78Y2YBqxTX_xwrVILt",
"_score" : 8.61574,
"_source" : {
"id" : "c3000010982",
"name" : "Kempley",
"slug" : "yurtdisi/dymock/kempley"
}
},
{
"_index" : "destinations",
"_type" : "_doc",
"_id" : "B-78Y2YBqxTX_xwrVILt",
"_score" : 8.61574,
"_source" : {
"id" : "c3000010983",
"name" : "Kemnay",
"slug" : "yurtdisi/inverurie/kemnay"
}
},
{
"_index" : "destinations",
"_type" : "_doc",
"_id" : "CO78Y2YBqxTX_xwrVIb_",
"_score" : 8.61574,
"_source" : {
"id" : "c3000013079",
"name" : "Kemerburgaz",
"slug" : "eyup/kemerburgaz"
}
},
{
"_index" : "destinations",
"_type" : "_doc",
"_id" : "-fL8Y2YBqxTX_xwrZQxf",
"_score" : 8.61574,
"_source" : {
"id" : "c6190744",
"name" : "Kembs",
"slug" : "yurtdisi/haut-rhin-bolge/kembs"
}
},
{
"_index" : "destinations",
"_type" : "_doc",
"_id" : "xfL8Y2YBqxTX_xwrZSG-",
"_score" : 8.61574,
"_source" : {
"id" : "c6216986",
"name" : "Kemang",
"slug" : "yurtdisi/cakarta/kemang"
}
}
]
}
}
Now they are at same score because everyone have the "Kem" i guess. But if i do "match" or "match_phrase" the outcome is the same.
In your example it seems that you want your results sorted by length. You can do that with a script.
POST your_index/_doc/_search
{
"from": 0,
"size": 10,
"query": {
"bool": {
"must": [
{
"match": {
"name": "Kem"
}
}
],
"should": [
{
"term": {
"name.keyword": {
"value": "Kem"
}
}
}
]
}
},
"sort": [
{
"_score": {"order": "desc"}
},
{
"_script": {
"script": "doc['name.keyword'].value.length()",
"type": "number",
"order": "asc"
}
},
{
"name.keyword": {"order": "asc"}
}
]
}

Resources