elasticsearch geo_point don't work properly - elasticsearch

I want to make geo_point queries on elasticsearch but it doesn't work properly for me. I always get empty result for geo_polygon queries. maybe my mapping is wrong or the way i get the data.
mapping :
curl -XPUT 'localhost:9200/botanique_localisation/' -d '{
"mappings":{
"botanique_localisation" : {
"_all" : {"enabled" : true},
"_index" : {"enabled" : true},
"_id" : {"index": "not_analyzed", "store" : false},
"properties" : {
"_id" : {"type" : "string", "store" : "no","index": "not_analyzed" } ,
"LOCATION" : { "type" : "geo_point","lat_lon" :true ,"validate":true , "store":"yes" }
}
}
}
}'
creating the view in oracle
create view all_specimens_localisation as select RAWTOHEX( SPECIMENS.occurrenceid ) as "_id" ,
decode(LOCALISATIONS.decimalLatitude ||',' || LOCALISATIONS.decimalLongitude, ',', null ,
'{"lat":' || replace(LOCALISATIONS.decimalLatitude,',' ,'.' ) ||',"lon":' || replace(LOCALISATIONS.decimalLongitude , ',' ,'.' ) || '}'
) as location
from SPECIMENS left outer join ... where rownum < 1000 ;
i create a json object in the sql because sending lat_lon as a string didn't work for me ( elastic don't split the string as write her http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/mapping-geo-point-type.html#_lat_lon_as_string_6 )
creating the river from oracle to elasticsearch
curl -XPUT 'localhost:9200/_river/localisation_river/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"index" : "botanique_localisation",
"bulk_size" : 2000,
"max_bulk_requests" : 10,
"bulk_flush_interval" : "1s",
"type" : "specimens",
"url" : "********",
"user" : "********",
"password" : "********",
"sql" : "select * from all_specimens_localisation"
}
}'
exemple of indexed data in elastichsearch
{
_index: botanique_localisation
_type: specimens
_id: 38C8F872A449491C881791DE8B501B17
_score: 1.4142135
_source: {
LOCATION: {
lon: 47.05
lat: -19.95
}
}
}
working range query
curl -XGET 'localhost:9200/botanique_localisation/specimens/_search?size=10&pretty' -d '
{ "query": { "bool": { "must": [
{ "range": {
"LOCATION.lon": {
"from": 47.04,
"to": 47.08
}
}
},{ "range": {
"LOCATION.lat": {
"from": -20,
"to": -19.90
}
}
}
]}}}'
and the result :
hits:{[
{ "_index": botanique_localisation,
"_type": specimens,
"_id": 38C8F872A449491C881791DE8B501B17,
"_score": 1.4142135,
"_source": {
"LOCATION": { "lon": 47.05, "lat": -19.95 }
}
},...
now the fun not working part ! with the geo_polygon query :
curl -XGET 'localhost:9200/botanique_localisation/_search?size=10&pretty' -d '{
"query":{
"filtered" : {
"query" : { "match_all" : {}},
"filter" : {
"geo_polygon" : {
"LOCATION" : {
"points" : [
{ "lat": 100, "lon": -100},
{ "lat": 100, "lon": 100},
{ "lat": -100, "lon": 100 },
{ "lat": -100 , "lon": -100 }
]
}
}
}
}
}
}'
this return no hits !
what i'm missing ?
thank you

this query work :
curl -XGET 'localhost:9200/botanique_localisation/_search?pretty' -d '{
"query" : {
"filtered" : {
"filter" : {
"geo_bounding_box" : {
"type" : "indexed",
"LOCATION" : {
"top_left" : {
"lat" : 50,
"lon" : -50
},
"bottom_right" : {
"lat" :-50,
"lon" : 50
}
}
}
}
}
}
}'

Related

Using Email tokenizer in elasticsearch

Did try some examples from elasticsearch documentation and from google but nothing helped in figuring out..
just a sample data I have is just few blog posts. I am trying to see all posts with email address. When I use "email":"someone" I see all the posts matching someone but when I change to use someone#gmail.com nothing shows up!
"hits": [
{
"_index": "blog",
"_type": "post",
"_id": "2",
"_score": 1,
"_source": {
"user": "sreenath",
"email": "someone#gmail.com",
"postDate": "2011-12-12",
"body": "Trying to figure out this",
"title": "Elastic search testing"
}
}
]
when I use Get query is as shown below, I see all posts matching someone#anything.com. But I want to change this
{ "term" : { "email" : "someone" }} to { "term" : { "email" : "someone#gmail.com" }}
GET blog/post/_search
{
"query" : {
"filtered" : {
"filter" : {
"and" : [
{ "term" :
{ "email" : "someone" }
}
]
}
}
}
}
I did the curl -XPUT for the following, but did not help
curl -XPUT localhost:9200/test/ -d '
{
"settings" : {
"analysis" : {
"filter" : {
"email" : {
"type" : "pattern_capture",
"preserve_original" : 1,
"patterns" : [
"([^#]+)",
"(\\p{L}+)",
"(\\d+)",
"#(.+)"
]
}
},
"analyzer" : {
"email" : {
"tokenizer" : "uax_url_email",
"filter" : [ "email", "lowercase", "unique" ]
}
}
}
}
}
'
You have created a custom analyzer for email addresses but you are not using it. You need to declare the email field in your mapping type to actually use that analyzer, like below. Also make sure to create the right index with that analyzer, i.e. blog and not test
change this
|
v
curl -XPUT localhost:9200/blog/ -d '{
"settings" : {
"analysis" : {
"filter" : {
"email" : {
"type" : "pattern_capture",
"preserve_original" : 1,
"patterns" : [
"([^#]+)",
"(\\p{L}+)",
"(\\d+)",
"#(.+)"
]
}
},
"analyzer" : {
"email" : {
"tokenizer" : "uax_url_email",
"filter" : [ "email", "lowercase", "unique" ]
}
}
}
},
"mappings": { <--- add this
"post": {
"properties": {
"email": {
"type": "string",
"analyzer": "email"
}
}
}
}
}
'

Using geo_distance filter doesn't return any hits using ElasticSearch

I can't get my filter query to work correctly with geo_distance. It seems to return 0 hits. But all of my other queries work if I'm not trying to find a geo position.
I'm using version 2.3.1 of ElasticSearch
{
"name" : "Mar-Vell",
"cluster_name" : "elastic-logs",
"version" : {
"number" : "2.3.1",
"build_hash" : "bd980929010aef404e7cb0843e61d0665269fc39",
"build_timestamp" : "2016-04-04T12:25:05Z",
"build_snapshot" : false,
"lucene_version" : "5.5.0"
},
"tagline" : "You Know, for Search"
}
I've mapped my location key with a type of "geo_point" by making a request with json like so:
curl -XPUT 'http://10.0.7.181:9200/connections/profile/_mapping' -d '
{
"profile" : {
"properties" : {
"location" : {
"type" : "geo_point"
}
}
}
}
And it returns this. I'm assuming my changes are in affect.
{"acknowledged":true}
Here's an example of my data
{
"_index": "connections",
"_type": "profile",
"_id": "92",
"_score": 1,
"_source": {
"profile": {
"location": {
"lon": -111.8909815,
"lat": 40.7607818
},
"age": 44,
"school": {
"undergraduate": {
"universityId": 1814,
"active": true,
"programId": 9
},
"graduate": {
"universityId": 1814,
"active": true,
"programId": 7
}
},
"bio": "Everything is awesome! 👽"
},
"id": 0,
"active": false,
"optIn": true
}
}
My query that I'm sending over to our ElasticSearch. Nothing crazy.
{
"filter" : {
"geo_distance" : {
"distance" : "1000mi",
"distance_type": "plane",
"location" : {
"lon": -111.8391029,
"lat": 40.7607818
}
}
},
"query" : {
"match_all" : {}
}
}
I've tried changing the distance to 1 mile, 100 miles and 1000 miles. It should return something with 100 miles but no show. I've also tried using different unit measurements just to see if it would do anything. The same deal.
The coordinates are in Salt Lake City. And the order of longitude and latitude should be right. I'm not sure what else I should try.
The name of your location field is location, but in your query you are using geoPoint as the field name. Try this:
{
"filter" : {
"geo_distance" : {
"distance" : "1000mi",
"distance_type": "plane",
"location" : {
"lon": -111.8391029,
"lat": 40.7607818
}
}
},
"query" : {
"match_all" : {}
}
}
Update: Ok, that was the obvious mistake i saw so I didn't look any further. Here is a working query(with your values) from one of my projects:
{
"query": {
"bool": {
"must": {
"match_all": []
},
"filter": {
"geo_distance": {
"distance": "1000mi",
"distance_type": "plane",
"location": {
"lat": "-111.8391029",
"lon": "40.7607818"
}
}
}
}
}
}
That should work.
#Pelmered thanks for helping me with the filter query. I found out that my mapping was done incorrectly. Now everything works as expected. So make sure you are mapping it correctly.
curl -XPUT 'http://10.0.7.181:9200/connections' -d '
{
"mappings":{
"profile":{
"properties":{
"location":{
"type":"geo_point"
}
}
}
}
}'

Geo query using elasticsearch

I have created and index the same as the example tutorials, in here...
https://www.elastic.co/guide/en/elasticsearch/reference/2.0/geo-point.html
in specific writing the following:
curl -PUT 'localhost:9200/my_index?pretty' -d '
{
"mappings": {
"my_type": {
"properties": {
"location": {
"type": "geo_point"
}
}
}
}
}'
I have also added two points as data
curl -PUT 'localhost:9200/my_index/my_type/1?pretty' -d'
{
"text": "first geo-point",
"location": {
"lat": 41.12,
"lon": -71.34
}
}'
curl -PUT 'localhost:9200/my_index/my_type/1?pretty' -d'
{
"text": "second geo-point",
"location": {
"lat": 41.13,
"lon": -71.35
}
}'
The example geo bounding box query on the page works (i.e):
curl -XGET 'localhost:9200/my_index/_search?pretty' -d'
{
"query": {
"geo_bounding_box": {
"location": {
"top_left": {
"lat": 42,
"lon": -72
},
"bottom_right": {
"lat": 40,
"lon": -74
}
}
}
}
}'
But the example from this page (https://www.elastic.co/guide/en/elasticsearch/reference/2.0/query-dsl-geo-bounding-box-query.html) doesn't work:
What I have tried looks like the following:
curl -XGET 'localhost:9200/my_index/_search?pretty' -d'
{
"bool" : {
"must" : {
"match_all" : {}
},
"filter" : {
"geo_bounding_box" : {
"my_type.location" : {
"top_left" : {
"lat" : 42,
"lon" : -72
},
"bottom_right" : {
"lat" : 40,
"lon" : -74
}
}
}
}
}
}'
The error I get is as follows:
"error" : {
"root_cause" : [ {
"type" : "search_parse_exception",
"reason" : "failed to parse search source. unknown search element [bool]",
"line" : 3,
"col" : 5
} ],
"type" : "search_phase_execution_exception",
"reason" : "all shards failed",
"phase" : "query",
"grouped" : true,
"failed_shards" : [ {
"shard" : 0,
"index" : "my_index",
"node" : "0qfvkynhTRyjHFRurBLJeQ",
"reason" : {
"type" : "search_parse_exception",
"reason" : "failed to parse search source. unknown search element [bool]",
"line" : 3,
"col" : 5
}
} ]
},
"status" : 400
}
I hope its just a simple error, so would like to know what am i doing wrong?
You need to specify that the whole thing is a query:
curl -XGET 'localhost:9200/my_index/_search?pretty' -d'
{
"query": {
"bool" : {
"must" : {
"match_all" : {}
},
"filter" : {
"geo_bounding_box" : {
"my_type.location" : {
"top_left" : {
"lat" : 42,
"lon" : -72
},
"bottom_right" : {
"lat" : 40,
"lon" : -74
}
}
}
}
}
}
}'
However as far as I understand using bool with must and filter is the old way of doing things. In previous versions, geo queries were thought of as "filters", so you had to first run a match_all query to return all the results, and then filter using the geo bounding box. In Elasticssearch 2.0+, there is no separation between filters and queries - everything is a query. So you can run the geo query directly:
curl -XGET 'localhost:9200/my_index/_search?pretty' -d'
{
"query": {
"geo_bounding_box": {
"location": {
"top_left": {
"lat": 42,
"lon": -72
},
"bottom_right": {
"lat": 40,
"lon": -74
}
}
}
}
}'

ElasticSearch - searching different doc_types with the same field name but different analyzers

Let's say I make a simple ElasticSearch index:
curl -XPUT 'http://localhost:9200/test/' -d '{
"settings": {
"analysis": {
"char_filter": {
"de_acronym": {
"type": "mapping",
"mappings": [".=>"]
}
},
"analyzer": {
"analyzer1": {
"type": "custom",
"tokenizer": "keyword",
"char_filter": ["de_acronym"]
}
}
}
}
}'
And I make two doc_types that have the same property name but they are analyzed slightly differently from one another:
curl -XPUT 'http://localhost:9200/test/_mapping/docA' -d '{
"docA": {
"properties": {
"name": {
"type": "string",
"analyzer": "simple"
}
}
}
}'
curl -XPUT 'http://localhost:9200/test/_mapping/docB' -d '{
"docB": {
"properties": {
"name": {
"type": "string",
"analyzer": "analyzer1"
}
}
}
}'
Next, let's say I put a document in each doc_type with the same name:
curl -XPUT 'http://localhost:9200/test/docA/1' -d '{ "name" : "U.S. Army" }'
curl -XPUT 'http://localhost:9200/test/docB/1' -d '{ "name" : "U.S. Army" }'
Let's try to search for "U.S. Army" in both doc types at the same time:
curl -XGET 'http://localhost:9200/test/_search?pretty' -d '{
"query": {
"match_phrase": {
"name": {
"query": "U.S. Army"
}
}
}
}'
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.5,
"hits" : [ {
"_index" : "test",
"_type" : "docA",
"_id" : "1",
"_score" : 1.5,
"_source":{ "name" : "U.S. Army" }
} ]
}
}
I only get one result! I get the other result when I specify docB's analyzer:
curl -XGET 'http://localhost:9200/test/_search?pretty' -d '
{
"query": {
"match_phrase": {
"name": {
"query": "U.S. Army",
"analyzer": "analyzer1"
}
}
}
}'
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [ {
"_index" : "test",
"_type" : "docB",
"_id" : "1",
"_score" : 1.0,
"_source":{ "name" : "U.S. Army" }
} ]
}
}
I was under the impression that ES would search each doc_type with the appropriate analyzer. Is there a way to do this?
The ElasticSearch docs say that precedence for search analyzer goes:
1) The analyzer defined in the query itself, else
2) The analyzer defined in the field mapping, else
...
In this case, is ElasticSearch arbitrarily choosing which field mapping to use?
Take a look at this issue in github, which seems to have started from this post in ES google groups. I believe it answers your question:
if its in a filtered query, we can't infer it, so we simply pick one of those and use its analysis settings

ElasticSearch _Type with geolocation

I have set up an elastic Search index which includes different _type mapping for every country.
So there is a mapping for "us" "au" "uk" etc.
Each mapping includes a location mapping of type "geo_point"
prior to adding different _types
My query sort would look like:
"sort" : [
{
"_geo_distance" : {
"postcode.location" : [' . $mylocation_long . ',' . $mylocation_lat . '],
"order" : "asc",
"unit" : "km"
}
}
],
with adding _types to the data and mapping this no longer works, instead I specify it like:
"sort" : [
{
"_geo_distance" : {
"$country.location" : [' . $mylocation_long . ',' . $mylocation_lat . '],
"order" : "asc",
"unit" : "km"
}
}
],
this works fine.
However there are times when queries need to be done beyond a single country. So setting it to "us.location" isn't correct, and wont work.
In this case, how do I make this sorting work, when I don't know the country and I need to sort it by a mapped location.
Or is it a case of this can not be done and all docs must have the same _type in order for this to work?
Sorry if I am missing something obvious, but why cannot you just sort on "location". It seems to work just fine:
curl -XDELETE localhost:9200/test-idx/ && echo
curl -XPUT localhost:9200/test-idx/ -d '
{
"settings":{
"number_of_shards":1,
"number_of_replicas":0
},
"mappings": {
"us": {
"properties": {
"location": {
"type": "geo_point"
}
}
},
"uk": {
"properties": {
"location": {
"type": "geo_point"
}
}
},
"au": {
"properties": {
"location": {
"type": "geo_point"
}
}
}
}
}' && echo
curl -XPUT localhost:9200/test-idx/us/1 -d '
{
"location": "42.3606402,-71.0674569"
}
' && echo
curl -XPUT localhost:9200/test-idx/uk/2 -d '
{
"location": "51.5286416,-0.1017943"
}
' && echo
curl -XPUT localhost:9200/test-idx/au/3 -d '
{
"location": "-33.8471226,151.0594183"
}
' && echo
curl -XPOST localhost:9200/test-idx/_refresh && echo
curl "localhost:9200/test-idx/_search?pretty" -d '{
"query": {
"match_all": {}
},
"sort" : [
{
"_geo_distance" : {
"location" : "52.3712989,4.8937347",
"order" : "asc",
"unit" : "km"
}
}
]
}' && echo
output:
{"ok":true,"acknowledged":true}
{"ok":true,"acknowledged":true}
{"ok":true,"_index":"test-idx","_type":"us","_id":"1","_version":1}
{"ok":true,"_index":"test-idx","_type":"uk","_id":"2","_version":1}
{"ok":true,"_index":"test-idx","_type":"au","_id":"3","_version":1}
{"ok":true,"_shards":{"total":1,"successful":1,"failed":0}}
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : null,
"hits" : [ {
"_index" : "test-idx",
"_type" : "uk",
"_id" : "2",
"_score" : null, "_source" : {"location": "51.5286416,-0.1017943"},
"sort" : [ 355.2735714686373 ]
}, {
"_index" : "test-idx",
"_type" : "us",
"_id" : "1",
"_score" : null, "_source" : {"location": "42.3606402,-71.0674569"},
"sort" : [ 5563.606078215864 ]
}, {
"_index" : "test-idx",
"_type" : "au",
"_id" : "3",
"_score" : null, "_source" : {"location": "-33.8471226,151.0594183"},
"sort" : [ 16650.926847312003 ]
} ]
}
}
What happens when you point the working query at /index/_search instead of /index/type/_search ?

Resources