elasticsearch not attaching a default - elasticsearch

I have tried a lot of iterations of this but it simply won't work. I am looking to have _timestamp or #timestamp attached to each document automatically.
can't seem to get this to work.
although the documents are being ingested properly
curl -XPOST 'http://X.X.X.X:9200/associations' -d '{
"settings" : {
"number_of_shards" : 1
},
"mappings" : {
"_default_":{
"_timestamp" : {
"enabled" : true,
"store" : true,
"path" : "post_date"
}
}
}
}'
i have also tried setting this directly within my index
curl -XPUT 'http://X.X.X.X:9200/associations' -d '{
"mappings": {
"_timestamp" :
{
"enabled":true,
"store": "yes",
"path" : "post_date",
"type": "date",
"format" : "yyyy-MM-dd HH:mm:ss"
}
}
}'

The first code block you have seems to work for me. If I use it to define an index:
DELETE /test_index
PUT /test_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"_default_": {
"_timestamp": {
"enabled": true,
"store": true,
"path": "post_date"
}
}
}
}
Then add a couple of docs to a new type:
PUT /test_index/doc/1
{
"post_date": "2015-1-25"
}
PUT /test_index/doc/2
{
"post_date": "2015-1-15"
}
I can see the timestamp in the new mapping now:
GET /test_index/_mapping
...
{
"test_index": {
"mappings": {
"_default_": {
"_timestamp": {
"enabled": true,
"store": true,
"path": "post_date"
},
"properties": {}
},
"doc": {
"_timestamp": {
"enabled": true,
"store": true,
"path": "post_date"
},
"properties": {
"post_date": {
"type": "date",
"format": "dateOptionalTime"
}
}
}
}
}
}
and I can search against the type, and ask for "_timestamp" in my fields, I get back the timestamps in the results:
POST /test_index/doc/_search
{
"fields": [
"_timestamp",
"post_date"
]
}
...
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 1,
"fields": {
"post_date": [
"2015-1-25"
],
"_timestamp": 1422144000000
}
},
{
"_index": "test_index",
"_type": "doc",
"_id": "2",
"_score": 1,
"fields": {
"post_date": [
"2015-1-15"
],
"_timestamp": 1421280000000
}
}
]
}
}
Here is the code I used:
http://sense.qbox.io/gist/1ab1ecb73d3e87cffe0052ce1706e7985d197fad
I'm running Elasticsearch version 1.3.4, by the way.

Related

elasticsearch highlight not working

Elasticsearch 5.5
using examples from document, cannot get the highlight field from result.
The document says that store is required. But the 'title' field has been stored.
mapping
PUT my_index
{
"mappings": {
"user": {
"properties": {
"title": {
"type": "text",
"store": true
},
"date": {
"type": "date",
"store": true
},
"content": {
"type": "text"
}
}
}
}
}
indexing
PUT my_index/user/1
{
"title": "Some short title",
"date": "2015-01-01",
"content": "A very long content field..."
}
query
GET my_index/_search
{
"query" : {
"match" : {
"_all" : "short"
}
},
"highlight": {
"fields" : {
"_all" : {}
}
}
}
output
{
"took": 11,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.24257512,
"hits": [
{
"_index": "my_index",
"_type": "user",
"_id": "1",
"_score": 0.24257512,
"_source": {
"title": "Some short title",
"date": "2015-01-01",
"content": "A very long content field..."
}
}
]
}
}
There is no highlight field in output json.
There must be something wrong. Please point it, thanks in advance.

Elastic Search : Restricting the search result in array

My index metadata :
{
"never": {
"aliases": {},
"mappings": {
"userDetails": {
"properties": {
"Residence_address": {
"type": "nested",
"include_in_parent": true,
"properties": {
"Address_type": {
"type": "string",
"analyzer": "standard"
},
"Pincode": {
"type": "string",
"analyzer": "standard"
},
"address": {
"type": "string",
"analyzer": "standard"
}
}
}
}
}
},
"settings": {
"index": {
"creation_date": "1468850158519",
"number_of_shards": "5",
"number_of_replicas": "1",
"version": {
"created": "1060099"
},
"uuid": "v2njuC2-QwSau4DiwzfQ-g"
}
},
"warmers": {}
}
}
My setting :
POST never
{
"settings": {
"number_of_shards" : 5,
"analysis": {
"analyzer": {
"standard": {
"tokenizer": "keyword",
"filter" : ["lowercase","reverse"]
}
}
}
}
}
My data :
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.375,
"hits": [
{
"_index": "never",
"_type": "userDetails",
"_id": "1",
"_score": 0.375,
"_source": {
"Residence_address": [
{
"address": "Omega Residency",
"Address_type": "Owned",
"Pincode": "500004"
},
{
"address": "Collage of Engineering",
"Address_type": "Rented",
"Pincode": "411005"
}
]
}
}
]
}
}
My query :
POST /never/_search?pretty
{
"query": {
"match": {
"Residence_address.address": "Omega"
}
}
}
My Result :
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.375,
"hits": [
{
"_index": "never",
"_type": "userDetails",
"_id": "1",
"_score": 0.375,
"_source": {
"Residence_address": [
{
"address": "Omega Residency",
"Address_type": "Owned",
"Pincode": "500004"
},
{
"address": "Collage of Engineering",
"Address_type": "Rented",
"Pincode": "411005"
}
]
}
}
]
}
}
Is there any way to restrict my result to only object containing address = Omega Residency and NOT the other object having address = Collage of Engineering?
You can only do it with nested query and inner_hits. I see that you have include_in_parent: true and not using nested queries though. If you only want to get the matched nested objects you'd need to use inner_hits from nested queries:
GET /never/_search?pretty
{
"_source": false,
"query": {
"nested": {
"path": "Residence_address",
"query": {
"match": {
"Residence_address.address": "Omega Residency"
}
},
"inner_hits" : {}
}
}
}

Elasticsearch generate suggestion fields

I've been reading in to the suggestion in elasticsearch in blogs like: https://www.elastic.co/blog/you-complete-me
But there you have to put in the name_suggest data your self, isn't there a way to automaticly add the data to the name_suggest when you map the object.
so update this mapping:
curl -X PUT localhost:9200/hotels -d '
{
"mappings": {
"hotel" : {
"properties" : {
"name" : { "type" : "string" },
"city" : { "type" : "string" },
"name_suggest" : {
"type" : "completion"
}
}
}
}
}'
and with these puts:
curl -X PUT localhost:9200/hotels/hotel/1 -d '
{
"name" : "Mercure Hotel Munich",
"city" : "Munich",
"name_suggest" : "Mercure Hotel Munich"
}'
curl -X PUT localhost:9200/hotels/hotel/2 -d '
{
"name" : "Hotel Monaco",
"city" : "Munich",
"name_suggest" : "Hotel Monaco"
}'
curl -X PUT localhost:9200/hotels/hotel/3 -d '
{
"name" : "Courtyard by Marriot Munich City",
"city" : "Munich",
"name_suggest" : "Courtyard by Marriot Munich City"
}'
so we can lose the name_suggest field.
So the ultimate goal is when you start typing Ho the first result would be Hotel
You can do it with ngrams if you want partial matches within words, or edge ngrams if you just want to match from the beginning of words.
Here's an example. I set up an index like this:
PUT /test_index
{
"settings": {
"analysis": {
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 20
}
},
"analyzer": {
"edge_ngram_analyzer": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"lowercase",
"edge_ngram_filter"
]
}
}
}
},
"mappings": {
"doc": {
"properties": {
"name": {
"type": "string",
"index_analyzer": "edge_ngram_analyzer",
"search_analyzer": "standard"
},
"city": {
"type": "string"
}
}
}
}
}
Then added your docs:
POST /test_index/doc/_bulk
{"index":{"_id":1}}
{"name":"Mercure Hotel Munich","city":"Munich"}
{"index":{"_id":2}}
{"name":"Hotel Monaco","city":"Munich"}
{"index":{"_id":3}}
{"name":"Courtyard by Marriot Munich City","city":"Munich"}
Now I can query for documents with "hot" in the name like this:
POST /test_index/_search
{
"query": {
"match": {
"name": "hot"
}
}
}
and I get back the correct docs:
{
"took": 41,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.625,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "2",
"_score": 0.625,
"_source": {
"name": "Hotel Monaco",
"city": "Munich"
}
},
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 0.5,
"_source": {
"name": "Mercure Hotel Munich",
"city": "Munich"
}
}
]
}
}
There are various ways this can be tweaked or generalized. For example, you can apply the ngram analyzer to the _all field if you want to match on more than one field.
Here is the code I used to test it:
http://sense.qbox.io/gist/3583de02c4f7d33e07ba4c2def9badf90692a290

How to return nested documents and some of its fileds via a query over main document?

I have the following index on elasticsearch:
PUT /blog
{
"mappings": {
"threadQ":{
"properties": {
"title" : {
"type" : "string",
"analyzer" : "standard"
},
"body" : {
"type" : "string",
"analyzer" : "standard"
},
"posts":{
"type": "nested",
"properties": {
"comment": {
"type": "string",
"analyzer": "standard"
},
"prototype": {
"type": "string",
"analyzer": "standard"
},
"customScore":{
"type": "long"
}
}
}
}
}
}
}
And I added one document:
PUT /blog/threadQ/1
{
"title": "What is c#?",
"body": "C# is a good programming language, makes it easy to develop!",
"posts": [{
"comment": "YEP!",
"prototype": "Hossein Bakhtiari",
"customScore": 2
},
{
"comment": "NEVER EVER :O",
"prototype": "Garpizio En Larri",
"customScore": 3
}]
}
So the following query works:
POST /blog/threadQ/_search
{
"query": {
"bool": {
"must": [{
"nested": {
"query": {
"query_string": {
"fields": ["posts.comment"],
"query": "YEP"
}
},
"path": "posts"
}
}]
}
}
}
And the result is the document.
Now want to make a query like this:
SELECT threadQ.posts.customScore FROM threadQ WHERE threadQ.posts.comment = "YEP!"
Please tell me how I can implement it.
To return a specific field in the document either use the fields or _source parameters
Here _source is used
curl -XGET http://localhost:9200/blog/threadQ/_search -d '
{
"_source" : "posts.customScore",
"query": {
"bool": {
"must": [{
"nested": {
"query": {
"query_string": {
"fields": ["posts.comment"],
"query": "YEP"
}
},
"path": "posts"
}
}]
}
}
}'
it will return:
"hits" : {
"total" : 1,
"max_score" : 2.252763,
"hits" : [ {
"_index" : "myindex",
"_type" : "threadQ",
"_id" : "1",
"_score" : 2.252763,
"_source":{"posts":[{"customScore":2},{"customScore":3}]}
} ]
}
}
Finally the problem has been solved by dynamic templates. So the new index structure is like this:
PUT /my_index
{
"mappings": {
"my_type": {
"properties": {
"Id":{
"type": "integer",
"analyzer": "standard"
},
"name":{
"type": "string",
"analyzer": "english"
}
},
"dynamic_templates": [
{ "en": {
"match": "*",
"match_mapping_type": "string",
"mapping": {
"type": "string",
"analyzer": "english"
}
}}
]
}}}
And the query:
POST /my_index/my_type/_search
{
"query": {
"function_score": {
"query": {"match_all": {}},
"functions": [
{
"script_score": {
"script": "doc.apple.value * _score"
}
}
]
}
}
}
And the result looks like this:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 3,
"max_score": 14,
"hits": [
{
"_index": "my_index",
"_type": "my_type",
"_id": "2",
"_score": 14,
"_source": {
"Id": 2,
"name": "Second One",
"iphone": 20,
"apple": 14
}
},
{
"_index": "my_index",
"_type": "my_type",
"_id": "3",
"_score": 14,
"_source": {
"Id": 3,
"name": "Third One",
"apple": 14
}
},
{
"_index": "my_index",
"_type": "my_type",
"_id": "1",
"_score": 1,
"_source": {
"Id": 1,
"name": "First One",
"iphone": 2,
"apple": 1
}
}
]
}
}

Not able to search for string within a string in elasticsearch index

I'm trying to setup the mapping for my elasticsearch instance with full name matching and partial name matching:
curl -XPUT 'http://127.0.0.1:9200/test/?pretty=1' -d '{
"mappings": {
"venue": {
"properties": {
"location": {
"type": "geo_point"
},
"name": {
"fields": {
"name": {
"type": "string",
"analyzer": "full_name"
},
"partial": {
"search_analyzer": "full_name",
"index_analyzer": "partial_name",
"type": "string"
}
},
"type": "multi_field"
}
}
}
},
"settings": {
"analysis": {
"filter": {
"swedish_snow": {
"type": "snowball",
"language": "Swedish"
},
"name_synonyms": {
"type": "synonym",
"synonyms_path": "name_synonyms.txt"
},
"name_ngrams": {
"side": "front",
"min_gram": 2,
"max_gram": 50,
"type": "edgeNGram"
}
},
"analyzer": {
"full_name": {
"filter": [
"standard",
"lowercase"
],
"type": "custom",
"tokenizer": "standard"
},
"partial_name": {
"filter": [
"swedish_snow",
"lowercase",
"name_synonyms",
"name_ngrams",
"standard"
],
"type": "custom",
"tokenizer": "standard"
}
}
}
}
}'
I fill it with some data:
curl -XPOST 'http://127.0.0.1:9200/_bulk?pretty=1' -d '
{"index" : {"_index" : "test", "_type" : "venue"}}
{"location" : [59.3366, 18.0315], "name" : "johnssons"}
{"index" : {"_index" : "test", "_type" : "venue"}}
{"location" : [59.3366, 18.0315], "name" : "johnsson"}
{"index" : {"_index" : "test", "_type" : "venue"}}
{"location" : [59.3366, 18.0315], "name" : "jöhnsson"}
'
Perform some searches to test,
Full name:
curl -XGET 'http://127.0.0.1:9200/test/venue/_search?pretty=1' -d '{
"query": {
"bool": {
"should": [
{
"text": {
"name": {
"boost": 1,
"query": "johnsson"
}
}
},
{
"text": {
"name.partial": "johnsson"
}
}
]
}
}
}'
Result:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.29834434,
"hits": [
{
"_index": "test",
"_type": "venue",
"_id": "CAO-dDr2TFOuCM4pFfNDSw",
"_score": 0.29834434,
"_source": {
"location": [
59.3366,
18.0315
],
"name": "johnsson"
}
},
{
"_index": "test",
"_type": "venue",
"_id": "UQWGn8L9Squ5RYDMd4jqKA",
"_score": 0.14663845,
"_source": {
"location": [
59.3366,
18.0315
],
"name": "johnssons"
}
}
]
}
}
Partial name:
curl -XGET 'http://127.0.0.1:9200/test/venue/_search?pretty=1' -d '{
"query": {
"bool": {
"should": [
{
"text": {
"name": {
"boost": 1,
"query": "johns"
}
}
},
{
"text": {
"name.partial": "johns"
}
}
]
}
}
}'
Result:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0.14663845,
"hits": [
{
"_index": "test",
"_type": "venue",
"_id": "UQWGn8L9Squ5RYDMd4jqKA",
"_score": 0.14663845,
"_source": {
"location": [
59.3366,
18.0315
],
"name": "johnssons"
}
},
{
"_index": "test",
"_type": "venue",
"_id": "CAO-dDr2TFOuCM4pFfNDSw",
"_score": 0.016878016,
"_source": {
"location": [
59.3366,
18.0315
],
"name": "johnsson"
}
}
]
}
}
Name within name:
curl -XGET 'http://127.0.0.1:9200/test/venue/_search?pretty=1' -d '{
"query": {
"bool": {
"should": [
{
"text": {
"ame": {
"boost": 1,
"query": "johnssons"
}
}
},
{
"text": {
"name.partial": "johnssons"
}
}
]
}
}
}'
Result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.39103588,
"hits": [
{
"_index": "test",
"_type": "venue",
"_id": "UQWGn8L9Squ5RYDMd4jqKA",
"_score": 0.39103588,
"_source": {
"location": [
59.3366,
18.0315
],
"name": "johnssons"
}
}
]
}
}
As you can see I'm only getting one venue back which is johnssons. Shouldn't I get both johnssons and johnsson back? What am I doing wrong in my settings?
You are using full_name analyzed as a search analyzer for the name.partial field. As a result your query is getting translated into the query for the term johnssons, which doesn't match anything.
You can use Analyze API to see what how your records are indexed. For example, this command
curl -XGET 'http://127.0.0.1:9200/test/_analyze?analyzer=partial_name&pretty=1' -d 'johnssons'
will show you that during indexing the string "johnssons" is getting translated into the following terms: "jo", "joh", "john", "johns", "johnss", "johnsso", "johnsson". While this command
curl -XGET 'http://127.0.0.1:9200/test/_analyze?analyzer=full_name&pretty=1' -d 'johnssons'
will show you that during searching the string "johnssons" is getting translated into term "johnssons". As you can see there is no match between your search term and your data here.

Resources