elasticsearch - complex querying - elasticsearch

I am using the jdbc river and I can create the following index:
curl -XPUT 'localhost:9201/_river/email/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"strategy":"simple",
"poll":"10",
"driver" : "org.postgresql.Driver",
"url" : "jdbc:postgresql://localhost:5432/api_development",
"username" : "paulcowan",
"password" : "",
"sql" : "SELECT id, subject, body, personal, sent_at, read_by, account_id, sender_user_id, sender_contact_id, html, folder, draft FROM emails"
},
"index" : {
"index" : "email",
"type" : "jdbc"
},
"mappings" : {
"email" : {
"properties" : {
"account_id" : { "type" : "integer" },
"subject" : { "type" : "string" },
"body" : { "type" : "string" },
"html" : { "type" : "string" },
"folder" : { "type" : "string", "index" : "not_analyzed" },
"id" : { "type" : "integer" }
}
}
}
}'
I can run basic queries using curl like this:
curl -XGET 'http://localhost:9201/email/jdbc/_search?pretty&q=fullcontact'
I get back results
But what I want to do is restrict the results to a particular email account_id and a particular email, I run the following query:
curl -XGET 'http://localhost:9201/email/jdbc/_search' -d '{
"query": {
"filtered": {
"filter": {
"and": [
{
"term": {
"folder": "INBOX"
}
},
{
"term": {
"account_id": 1
}
}
]
},
"query": {
"query_string": {
"query": "fullcontact*"
}
}
}
}
}'
I get the following results:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
Can anyone tell me what is wrong with my query?

It turns out that you need to use the type_mapping section to specify a field is not_analyzed in the jdbc river the normal mappings node is ignored.
Below is how it turned out:
curl -XPUT 'localhost:9200/_river/your_index/_meta' -d '{
"type" : "jdbc",
"jdbc" : {
"strategy":"simple",
"poll":"10",
"driver" : "org.postgresql.Driver",
"url" : "jdbc:postgresql://localhost:5432/api_development",
"username" : "user",
"password" : "your_password",
"sql" : "SELECT field_one, field_two, field_three, the_rest FROM blah"
},
"index" : {
"index" : "your_index",
"type" : "jdbc",
"type_mapping": "{\"your_index\" : {\"properties\" : {\"field_two\":{\"type\":\"string\",\"index\":\"not_analyzed\"}}}}"
}
}'
Strangely or annoyingly, the type_mapping section, takes a json encoded string and not a normal json node:
I can check the mappings by running:
# check mappings
curl -XGET 'http://localhost:9200/your_index/jdbc/_mapping?pretty=true'
Which should give something like:
{
"jdbc" : {
"properties" : {
"field_one" : {
"type" : "long"
},
"field_two" : {
"type" : "string",
"index" : "not_analyzed",
"omit_norms" : true,
"index_options" : "docs"
},
"field_three" : {
"type" : "string"
}
}
}
}

Related

Not able to get any results on using bucket aggregations

I have some PR data in my ES. This is how the documents are modelled
{
"Author" : "dheerajrav",
"Date" : "2012-10-05T10:16:49Z",
"Number" : 2554441,
"IsMerged" : false,
"MergedBy" : "",
"Body" : ""
},
{
"Author" : "dheerajrav",
"Date" : "2012-10-05T09:11:35Z",
"Number" : 2553883,
"IsMerged" : false,
"MergedBy" : "",
"Body" : ""
},
{
"Author" : "crodjer",
"Date" : "2012-10-04T15:40:22Z",
"Number" : 2544540,
"IsMerged" : false,
"MergedBy" : "",
"Body" : ""
},
{
"Author" : "crodjer",
"Date" : "2012-10-04T07:52:20Z",
"Number" : 2539410,
"IsMerged" : false,
"MergedBy" : "",
"Body" : ""
}
.
.
.
]
}
I am trying the following terms agg on my index but I get no results
curl -X GET "localhost:9200/newidx/_search?pretty" -H 'Content-Type: application/json' -d'
{
"aggs" : {
"contributors" : {
"terms" : {
"field" : "Author",
"size" : 100
}
}
}
}
'
The desired result would have been separate buckets for each PR author. This is the response
"aggregations" : {
"contributors" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ ]
}
}
Am I modeling my data wrong?
This is the mapping for my index
{
"newidx" : {
"mappings" : {
"properties" : {
"Stats" : {
"properties" : {
"Author" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Body" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Date" : {
"type" : "date"
},
"IsMerged" : {
"type" : "boolean"
},
"MergedBy" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"Number" : {
"type" : "long"
}
}
}
}
}
}
}
I generate a json file in my code and index it to elasticsearch using elasticsearch_loader, here is the command
elasticsearch_loader --es-host 'localhost' --index org-skills --type incident json --lines processed.json
Based on your mapping:
Author field is declared as text (used for full-text search) and keyword (used for matching whole values).
Read difference between textv/skeyword.
The parent mapping name is Stats.
You should therefore use Stats.Author.keyword in your aggregation query i.e:
curl -X GET "localhost:9200/newidx/_search?pretty" -H 'Content-Type: application/json' -d'
{
"aggs" : {
"contributors" : {
"terms" : {
"field" : "Stats.Author.keyword",
"size" : 100
}
}
}
}
'
It needs to be
curl -X GET "localhost:9200/newidx/_search?pretty" -H 'Content-Type: application/json' -d'
{
"aggs" : {
"contributors" : {
"terms" : {
"field" : "Stats.Author.keyword",
"size" : 100
}
}
}
}
'
Your field Stats.Author is of type text. For the use of aggregations, text-based fields have also to be keyword-fields. Therefore you need to use the field Stats.Author.keyword

How to index percolator queries containing filters on inner objects?

Using Elasticsearch 2.1.1
I have documents with inner objects:
{
"level1": {
"level2": 42
}
}
I want to register percolator queries applying filters on the inner property:
$ curl -XPUT http://localhost:9200/myindex/.percolator/myquery?pretty -d '{
"query": {
"filtered": {
"filter": {
"range": {
"level1.level2": {
"gt": 10
}
}
}
}
}
}'
It fails because I don't have a mapping:
{
"error" : {
"root_cause" : [ {
"type" : "query_parsing_exception",
"reason" : "Strict field resolution and no field mapping can be found for the field with name [level1.level2]",
"index" : "myindex",
"line" : 1,
"col" : 58
} ],
"type" : "percolator_exception",
"reason" : "failed to parse query [myquery]",
"index" : "myindex",
"caused_by" : {
"type" : "query_parsing_exception",
"reason" : "Strict field resolution and no field mapping can be found for the field with name [level1.level2]",
"index" : "myindex",
"line" : 1,
"col" : 58
}
},
"status" : 500
}
So I start again, but this time I add a mapping template before:
curl -XDELETE http://localhost:9200/_template/myindex
curl -XDELETE http://localhost:9200/myindex
curl -XPUT http://localhost:9200/_template/myindex?pretty -d 'x
{
"template": "myindex",
"mappings" : {
"mytype" : {
"properties" : {
"level1" : {
"properties" : {
"level2" : {
"type" : "long"
}
}
}
}
}
}
}
'
I try to register my percolator query again:
curl -XPUT http://localhost:9200/myindex/.percolator/myquery?pretty -d '{
"query": {
"filtered": {
"filter": {
"range": {
"level1.level2": {
"gt": 10
}
}
}
}
}
}'
And now it succeeds:
{
"_index" : "myindex",
"_type" : ".percolator",
"_id" : "myquery",
"_version" : 1,
"_shards" : {
"total" : 1,
"successful" : 1,
"failed" : 0
},
"created" : true
}
And I can see the mapping that has been created:
curl http://localhost:9200/myindex/_mapping?pretty
{
"myindex" : {
"mappings" : {
".percolator" : {
"properties" : {
"query" : {
"type" : "object",
"enabled" : false
}
}
},
"mytype" : {
"properties" : {
"level1" : {
"properties" : {
"level2" : {
"type" : "long"
}
}
}
}
}
}
}
}
Now my problem is that I also need to perform searches on my percolator queries and the default percolate mapping doesn’t index the query field.
So I start again, this time specifying in my mapping template that I want percolator queries to be indexed (note "enabled": true):
curl -XPUT http://localhost:9200/_template/myindex?pretty -d '
{
"template": "myindex",
"mappings" : {
".percolator" : {
"properties" : {
"query" : {
"type" : "object",
"enabled" : true
}
}
},
"mytype" : {
"properties" : {
"level1" : {
"properties" : {
"level2" : {
"type" : "long"
}
}
}
}
}
}
}
'
I try to register my percolator query again:
curl -XPUT http://localhost:9200/myindex/.percolator/myquery?pretty -d '{
"query": {
"filtered": {
"filter": {
"range": {
"level1.level2": {
"gt": 10
}
}
}
}
}
}'
But now I get an error:
{
"error" : {
"root_cause" : [ {
"type" : "mapper_parsing_exception",
"reason" : "Field name [level1.level2] cannot contain '.'"
} ],
"type" : "mapper_parsing_exception",
"reason" : "Field name [level1.level2] cannot contain '.'"
},
"status" : 400
}
How can I create and index a percolator query matching an inner property?

Why isn't my elastic search query returning the text analyzed by english analyzer?

I have an index named test_blocks
{
"test_blocks" : {
"aliases" : { },
"mappings" : {
"block" : {
"dynamic" : "false",
"properties" : {
"content" : {
"type" : "string",
"fields" : {
"content_en" : {
"type" : "string",
"analyzer" : "english"
}
}
},
"id" : {
"type" : "long"
},
"title" : {
"type" : "string",
"fields" : {
"title_en" : {
"type" : "string",
"analyzer" : "english"
}
}
},
"user_id" : {
"type" : "long"
}
}
}
},
"settings" : {
"index" : {
"creation_date" : "1438642440687",
"number_of_shards" : "5",
"number_of_replicas" : "1",
"version" : {
"created" : "1070099"
},
"uuid" : "45vkIigXSCyvHN6g-w5kkg"
}
},
"warmers" : { }
}
}
When I do a search for killing, a word in the content, the search results return as expected.
http://localhost:9200/test_blocks/_search?q=killing&pretty=1
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : 0.07431685,
"hits" : [ {
"_index" : "test_blocks",
"_type" : "block",
"_id" : "218",
"_score" : 0.07431685,
"_source":{"block":{"id":218,"title":"The \u003ci\u003eparticle\u003c/i\u003e streak","content":"Barry Allen is a Central City police forensic scientist\n with a reasonably happy life, despite the childhood\n trauma of a mysterious red and yellow being killing his\n mother and framing his father. All that changes when a\n massive \u003cb\u003eparticle\u003c/b\u003e accelerator accident leads to Barry\n being struck by lightning in his lab.","user_id":82}}
}, {
"_index" : "test_blocks",
"_type" : "block",
"_id" : "219",
"_score" : 0.07431685,
"_source":{"block":{"id":219,"title":"The \u003ci\u003eparticle\u003c/i\u003e streak","content":"Barry Allen is a Central City police forensic scientist\n with a reasonably happy life, despite the childhood\n trauma of a mysterious red and yellow being killing his\n mother and framing his father. All that changes when a\n massive \u003cb\u003eparticle\u003c/b\u003e accelerator accident leads to Barry\n being struck by lightning in his lab.","user_id":83}}
} ]
}
}
However given that I have an english analyzer for the content field (content_en), I would have expected it to return me the same document for the query kill. But it doesn't. I get 0 hits.
http://localhost:9200/test_blocks/_search?q=kill&pretty=1
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
My understanding through this analyze query is that "killing" would have got broken down in to "kill"
http://localhost:9200/_analyze?analyzer=english&text=killing
{
"tokens" : [ {
"token" : "kill",
"start_offset" : 0,
"end_offset" : 7,
"type" : "<ALPHANUM>",
"position" : 1
} ]
}
So why isn't the query "kill" match that document ? Are my mappings incorrect or is it my search that is incorrect?
I am using elasticsearch v1.7.0
You need to use fuzzysearch (some introduction available here):
curl -XPOST 'http://localhost:9200/test_blocks/_search' -d '
{
"query": {
"match": {
"title": {
"query": "kill",
"fuzziness": 2,
"prefix_length": 1
}
}
}
}'
UPD. Having content_en field with content which was given by stemmer, it makes sense to actually query that field:
curl -XPOST 'http://localhost:9200/test_blocks/_search' -d '
{
"query": {
"multi_match": {
"type": "most_fields",
"query": "kill",
"fields": ["block.title", "block.title.title_en"]
}
}
}'
The following queries http://localhost:9200/_search?q=kill. ,http://localhost:9200/_search?q=kill. end up searching across
_all field .
_all field uses the default analyzer which unless overridden happens to be standard analyzer and not english analyzer .
For making the above query work you would need to add english analyzer to _all field and re-index
Example:
{
"mappings": {
"block": {
"_all" : {"analyzer" : "english"}
}
}
Also would point out the mapping in OP doesn't seem consistent with the document structure. As #EugZol pointed our the content is within block object so the mapping should be something on these lines :
{
"mappings": {
"block": {
"properties": {
"block": {
"properties": {
"content": {
"type": "string",
"analyzer": "standard",
"fields": {
"content_en": {
"type": "string",
"analyzer": "english"
}
}
},
"id": {
"type": "long"
},
"title": {
"type": "string",
"analyzer": "standard",
"fields": {
"title_en": {
"type": "string",
"analyzer": "english"
}
}
},
"user_id": {
"type": "long"
}
}
}
}
}
}
}

Boolean query does not return expected data in Elasticsearch

I have the following document in Elasticsearch as reported by Kibana:
{"deviceId":"C1976429369BFE063ED8B3409DB7C7E7D87196D9","appId":"DisneyDigitalBooks.PlanesAdventureAlbum","ostype":"iOS"}
Why the following query does not return success?
[root#myvm elasticsearch-1.0.0]# curl -XGET 'http://localhost:9200/unique_app_install/_search?pretty=1' -d '
{
"query" : {
"bool" : {
"must" : [ {
"term" : {
"deviceId" : "C1976429369BFE063ED8B3409DB7C7E7D87196D9"
}
}, {
"term" : {
"appId" : "DisneyDigitalBooks.PlanesAdventureAlbum"
}
}, {
"term" : {
"ostype" : "iOS"
}
} ]
}
}
}'
Here is the response from Elasticsearch:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
As a side question, is this the fastest way to query the data in my case?
Thx in advance.
UPDATE:
Could it be related to the fact that I used the following mapping for this index?
curl -XPOST localhost:9200/unique_app_install -d '{
"settings" : {
"number_of_shards" : 5
},
"mappings" : {
"sdk_sync" : {
"properties" : {
"deviceId" : { "type" : "string" , "index": "not_analyzed"},
"appId" : { "type" : "string" , "index": "not_analyzed"},
"ostype" : { "type" : "string" , "index": "not_analyzed"}
}
}
}
}'
Check if the type of your document was right while inserting: sdk_sync.
I have used your items and for me it works. Using the following curl request give the right response for me:
curl -XPOST localhost:9200/unique_app_install/sdk_sync/1 -d '{
"settings" : {
"number_of_shards" : 5
},
"mappings" : {
"sdk_sync" : {
"properties" : {
"deviceId" : { "type" : "string" , "index": "not_analyzed"},
"appId" : { "type" : "string" , "index": "not_analyzed"},
"ostype" : { "type" : "string" , "index": "not_analyzed"}
}
}
}
}'
curl -XPOST localhost:9200/unique_app_install/sdk_sync/1 -d '{
"deviceId":"C1976429369BFE063ED8B3409DB7C7E7D87196D9",
"appId":"DisneyDigitalBooks.PlanesAdventureAlbum",
"ostype":"iOS"
}'
curl -XGET 'http://localhost:9200/unique_app_install/_search?pretty=1' -d '
{
"query" : {
"bool" : {
"must" : [ {
"term" : {
"deviceId" : "C1976429369BFE063ED8B3409DB7C7E7D87196D9"
}
}, {
"term" : {
"appId" : "DisneyDigitalBooks.PlanesAdventureAlbum"
}
}, {
"term" : {
"ostype" : "iOS"
}
} ]
}
}
}'
Unless you specify the field NOT to be analyzed, every fields are analyzed by default.
It means that deviceId "C1976429369BFE063ED8B3409DB7C7E7D87196D9" will be indexed as "c1976429369bfe063ed8b3409db7c7e7d87196d9" (lower case).
You have to use term query or term filter with string in LOWER CASE.
That is the reason why you should specify {"index": "not_analyzed"}
for the mapping.

How to get a detailed results output from ElasticSearch

When I do a query like this:
curl 'http://localhost:9200/xenforo/_search?q=message:test'
I get the following result:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 12.816886,
"hits": [
{
"_index": "xenforo",
"_type": "post",
"_id": "1778114",
"_score": 12.816886
}
]
}
}
The important _id is shown, but how would I get more information like the date, user and node information.
Here is some of my mapping information, I think the important part is shown:
curl -X GET 'http://localhost:9200/xenforo/_mapping?pretty=true'
{
"xenforo113" : {
"post" : {
"_source" : {
"enabled" : false
},
"properties" : {
"date" : {
"type" : "long",
"store" : "yes"
},
"discussion_id" : {
"type" : "long",
"store" : "yes"
},
"message" : {
"type" : "string"
},
"node" : {
"type" : "long"
},
"thread" : {
"type" : "long"
},
"title" : {
"type" : "string"
},
"user" : {
"type" : "long",
"store" : "yes"
}
}
},
I assume I will need to do a DSL query but I don't know which command would show the other information I'm after in the results.
As you have disabled _source, you have to ask for explicit fields:
curl 'http://localhost:9200/xenforo/_search -d '{
"fields" : ["user", "date", "node"],
"query" : {
"match" : { "message" : "test" }
}
}'
See documentation.

Resources