Elasticsearch Geospatial giving me exaggerated distance measurements - elasticsearch

Problem:
I am using Elasticsearch to index some listings on my site and I keep getting ridiculously exaggerated distances. Even when I search using the same coordinates my document has I still get distances upward of 3000km.
Post:
[{
"expiryDate": "2014-04-11 02:32:16",
"geo": {
"lon": 45.297,
"lat": 75.0755
},
"id": "5571afb7ae2c287f4d54b713",
"images": [],
"imagesLinks": [],
}]
Mapping:
{
"listingsell" : {
"properties" : {
"geo" : {
"type" : "geo_point",
"fielddata" : {
"format" : "compressed",
"precision" : "1m"
}
}
}
}
}
Query:
{
"query": {
"filtered" : {
"query" : {
"match_all" : {}
},
"filter" : {
"geo_distance" : {
"distance" : "90000mi",
"geo" : {
"lat" : 45.1339,
"lon" : 75.019
}
}
}
}
},
"sort" : [
{
"_geo_distance" : {
"geo" : {
"lat" : 45.1339,
"lon" : 75.019
},
"order" : "asc",
"unit" : "mi"
}
}
],
}
Result :
{
"sort":[2247.4929]
}

You have the geo_point indexed in a wrong way:
"geo": {
"lon": 45.297,
"lat": 75.0755
}
or the sorting is done in a wrong way:
"geo" : {
"lat" : 45.1339,
"lon" : 75.019
}
Meaning you mixed up lat with lon.

Related

How to select the last bucket in a date_histogram selector in Elasticsearch

I have a date_histogram and I can use max_bucket to get the bucket with the greatest value, but I want to select the last bucket (i.e. the bucket with the highest timestamp).
Using max_bucket to get the greatest value works OK, but I don't know what to put in the buckets_path to get the last bucket.
My mapping:
{
"ee-2020-02-28" : {
"mappings" : {
"dynamic" : "strict",
"properties" : {
"date" : {
"type" : "date"
},
"frequency" : {
"type" : "long"
},
"keyword" : {
"type" : "keyword"
},
"text" : {
"type" : "text"
}
}
}
}
}
My working query, which returns the bucket for the day with higher frequency (it's named last_day because this is a WIP query to get to my goal):
{
"query": {
"range": {
"date": { /* Start away from the begining of data, so the rolling avg is full */
"gte": "2019-02-18"/*,
"lte": "2020-12-14"*/
}
}
},
"aggs": {
"palabrejas": {
"terms": {
"field": "keyword",
"size": 100
},
"aggs": {
"nnndiario": {
"date_histogram": {
"field": "date",
"calendar_interval": "day"
},
"aggs": {
"dailyfreq": {
"sum": {
"field": "frequency"
}
}
}
},
"ventanuco": {
"avg_bucket": {
"buckets_path": "nnndiario>dailyfreq",
"gap_policy": "insert_zeros"
}
},
"last_day": {
"max_bucket": {
"buckets_path": "nnndiario>dailyfreq"
}
}
}
}
}
}
Its output (notice I replaced long parts with [...]):
{
"aggregations" : {
"palabrejas" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "rama0",
"doc_count" : 20400,
"nnndiario" : {
"buckets" : [
{
"key_as_string" : "2020-01-01T00:00:00.000Z",
"key" : 1577836800000,
"doc_count" : 600,
"dailyfreq" : {
"value" : 3000.0
}
},
{
"key_as_string" : "2020-01-02T00:00:00.000Z",
"key" : 1577923200000,
"doc_count" : 600,
"dailyfreq" : {
"value" : 3000.0
}
},
{
"key_as_string" : "2020-01-03T00:00:00.000Z",
"key" : 1578009600000,
"doc_count" : 600,
"dailyfreq" : {
"value" : 3000.0
}
},
[...]
{
"key_as_string" : "2020-01-31T00:00:00.000Z",
"key" : 1580428800000,
"doc_count" : 600,
"dailyfreq" : {
"value" : 3000.0
}
}
]
},
"ventanuco" : {
"value" : 3290.3225806451615
},
"last_day" : {
"value" : 12000.0,
"keys" : [
"2020-01-13T00:00:00.000Z"
]
}
},
{
"key" : "rama1",
"doc_count" : 20400,
"nnndiario" : {
"buckets" : [
{
"key_as_string" : "2020-01-01T00:00:00.000Z",
"key" : 1577836800000,
"doc_count" : 600,
"dailyfreq" : {
"value" : 3000.0
}
},
[...]
]
},
"ventanuco" : {
"value" : 3290.3225806451615
},
"last_day" : {
"value" : 12000.0,
"keys" : [
"2020-01-13T00:00:00.000Z"
]
}
},
[...]
}
]
}
}
}
I don't know what to put in last_day's buckets_path to obtain the last bucket.
You might consider using a terms aggregation instead of a date_histogram-aggregation:
"max_date_bucket_agg": {
"terms": {
"field": "date",
"size": 1,
"order": {"_key": "desc"}
}
}
An issue might be the granularity of your data, you may consider storing the date-value of the expected granularity (e.g. day) in a separate field and use that field in the terms-aggregation.

Elasticsearch Aggregation sorting

My Elasticsearch mapping is
{
"mappings" : {
"loc" : {
"dynamic": "true",
"properties" : {
"geoip" : {
"properties" : {
"location" : { "type": "geo_point"}
}
},
"lon" : { "type" : "double" },
"lat" : { "type" : "double" },
"altitude" : { "type" : "double" },
"id" : { "type" : "long" },
"date" : { "type" : "date", "format" : "epoch_millis" },
"ip" : { "type" : "string" },
"port" : { "type" : "string" }
}
}
}
}
And I want to sort by time.
So i made query.
{
"query": {
"bool" : {
"must" : {
"match_all" : {}
},
"filter" : {
"geo_distance" : {
"distance" : "0.2km",
"geoip.location" : {
"lat" : 36.773353,
"lon" : 126.933847
}
}
}
}
},
"size" : 0,
"sort" : { "date" : { "order" : "desc" } },
"aggs" : {
"ids" : {
"terms" : {
"field" : "id"
},
"aggs" : {
"dedup_docs" : {
"top_hits" : {"size" : 1}
}
}
}
}
}
I want to return the latest time by grouping the results of applying the gps filter by id and sorting in chronological order.
However, the date value of the result is an unordered result.
I do not know how to modify the query.

Elasticsearch - Conditional nested fetching

I have index mapping:
{
"dev.directory.3" : {
"mappings" : {
"profile" : {
"properties" : {
"email" : {
"type" : "string",
"index" : "not_analyzed"
},
"events" : {
"type" : "nested",
"properties" : {
"id" : {
"type" : "integer"
},
"name" : {
"type" : "string",
"index" : "not_analyzed"
},
}
}
}
}
}
}
}
with data:
"hits" : [ {
"_index" : "dev.directory.3",
"_type" : "profile",
"_id" : "1",
"_score" : 1.0,
"_source" : {
"email" : "test#dummy.com",
"events" : [
{
"id" : 111,
"name" : "ABC",
},
{
"id" : 222,
"name" : "DEF",
}
],
}
}]
I'd like to filter only matched nested elements instead of returning all events array - is this possible in ES?
Example query:
{
"nested" : {
"path" : "events",
"query" : {
"bool" : {
"filter" : [
{ "match" : { "events.id" : 222 } },
]
}
}
}
}
Eg. If I query for events.id=222 there should be only single element on the result list returned.
What strategy for would be the best to achieve this kind of requirement?
You can use inner_hits to only get the nested records which matched the query.
{
"query": {
"nested": {
"path": "events",
"query": {
"bool": {
"filter": [
{
"match": {
"events.id": 222
}
}
]
}
},
"inner_hits": {}
}
},
"_source": false
}
I am also excluding the source to get only nested hits

How can I sort by a calculated value in elasticsearch

I want to use elasticsearch as a search engine. I'm copying records from mysql to elasticsearch and when I query elasticsearch i want to calculate a value with the data in elastic and use it to sort the results
My index looks like:
{
"busquedas" : {
"aliases" : { },
"mappings" : {
"coche" : {
"properties" : {
"coeff_e" : {
"type" : "double"
},
"coeff_r" : {
"type" : "double"
},
"desc" : {
"type" : "string"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1460116924258",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"uuid" : "N6jhy_ilQmSb6og16suZ4g",
"version" : {
"created" : "2030199"
}
}
},
"warmers" : { }
}
}
And i want to compute a value per record like
myCustomOrder = (coeff_e + coeff_r) * timestamp
And use it to sort results
{
"sort" : [
{ "myCustomOrder" : {"order" : "asc"}},
"_score"
],
"query" : {
"term" : { ... }
}
}
I know i can use groovy to compute values but I only could use it to filter like its shown in the examples
{
"from": 10,
"size": 3,
"filter": {
"script": {
"script": "doc['coeff_e'].value < 0.5"
}
}
}
Thank you in advance, I'm totally newbie to elasticsearch :D
The same as with filtering. Take a look at this section of the documentation. It should be self-explanatory once you already know about scripts :-).
For the completeness sake:
{
"query" : {
....
},
"sort" : {
"_script" : {
"type" : "number",
"script" : {
"inline": "doc['field_name'].value * factor",
"params" : {
"factor" : 1.1
}
},
"order" : "asc"
}
}
}

How to exclude a filter from a facet?

I have come from a Solr background and am trying to find the equivalent of "tagging" and "excluding" in Elasticsearch.
In the following example, how can I exclude the price filter from the calculation of the prices facet? In other words, the prices facet should take into account all of the filters except for price.
{
query : {
"filtered" : {
"query" : {
"match_all" : {}
},
"filter" : {
"and" : [
{
"term" : {
"colour" : "Red"
}
},
{
"term" : {
"feature" : "Square"
}
},
{
"term" : {
"feature" : "Shiny"
}
},
{
"range" : {
"price" : {
"from" : "10",
"to" : "20"
}
}
}
]
}
}
},
"facets" : {
"colours" : {
"terms" : {
"field" : "colour"
}
},
"features" : {
"terms" : {
"field" : "feature"
}
},
"prices" : {
"statistical" : {
"field" : "price"
}
}
}
}
You can apply price filter as a top level filter to your query and add it to all facets expect prices as a facet_filter:
{
query : {
"filtered" : {
"query" : {
"match_all" : {}
},
"filter" : {
"and" : [
{
"term" : {
"colour" : "Red"
}
},
{
"term" : {
"feature" : "Square"
}
},
{
"term" : {
"feature" : "Shiny"
}
}
]
}
}
},
"facets" : {
"colours" : {
"terms" : {
"field" : "colour"
},
"facet_filter" : {
"range" : { "price" : { "from" : "10", "to" : "20" } }
}
},
"features" : {
"terms" : {
"field" : "feature"
},
"facet_filter" : {
"range" : { "price" : { "from" : "10", "to" : "20" } }
}
},
"prices" : {
"statistical" : {
"field" : "price"
}
}
},
"filter": {
"range" : { "price" : { "from" : "10", "to" : "20" } }
}
}
Btw, important change since ES 1.0.0. Top-level filter was renamed to post_filter (http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/_search_requests.html#_search_requests). And filtered queries using is still preferred as described here: http://elasticsearch-users.115913.n3.nabble.com/Filters-vs-Queries-td3219558.html
And there is global option for facets to avoid filtering by query filter (elasticsearch.org/guide/en/elasticsearch/reference/current/search-facets.html#_scope).

Resources