elastic search 5 - how to query Object datatype and nested array of json - elasticsearch

I want to query against nested data already loaded into Elasticsearch 5 but every query returns nothing. The data is of object datatype and nested array of json.
This the nested datatype ie team_members array of json:
[{
"id": 6,
"name": "mike",
"priority": 1
}, {
"id": 7,
"name": "james",
"priority": 2
}]
This object datatype ie the availability_slot json:
{
"monday": {
"on": true,
"end_time": "15",
"start_time": "9",
"end_time_unit": "pm",
"start_time_unit": "am",
"events_starts_every": 10
}
}
This is my elasticsearch mapping:
{
"meetings_development_20170716013030509": {
"mappings": {
"meeting": {
"properties": {
"account": {"type": "integer"},
"availability_slot": {
"properties": {
"monday": {
"properties": {
"end_time": {"type": "text"},
"end_time_unit": {"type": "text"},
"events_starts_every": {
"type":"integer"
},
"on": {"type": "boolean"},
"start_time": {"type": "text"},
"start_time_unit": {
"type": "text"
}
}
}
}
},
"team_members": {
"type": "nested",
"properties": {
"id": {"type": "integer"},
"name": {"type": "text"},
"priority": {"type": "integer"}
}
}
}
}
}
}
}
I have two queries which are failing for different reasons:
query 1
This query returns a count of zero despite the records existing in elasticsearch, I discovered the queries are failing because of the filter:
curl -u elastic:changeme http://172.19.0.4:9200/meetings_development/_search?pretty -d '{"query":{"nested":{"path":"team_members","score_mode":"avg","query":{"bool":{"must":[{"match":{"team_members.name":"mike"}},{"match":{"team_members.priority":1}}],"filter":[{"match":{"account":1}}]}}}}}'
This returns zero result:
{
"took" : 8,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
query 1 without filter
Thesame query from above without the filter works:
curl -u elastic:changeme http://172.19.0.4:9200/meetings_development/_search?pretty -d '{"query":{"nested":{"path":"team_members","score_mode":"avg","query":{"bool":{"must":[{"match":{"team_members.name":"mike"}},{"match":{"team_members.priority":1}}]}}}}}'
The query above returns 3 hits:
{
"took" : 312,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 3,
"max_score" : 2.1451323,
"hits" : [{**results available here**} ]
}
}
query 2 for the object datatype
curl -u elastic:changeme http://172.19.0.4:9200/meetings_development/_search?pretty -d '{"query":{"bool":{"must":{"match":{"availability_slot.start_time":1}},"filter":[{"match":{"account":1}}]}}}'
The query returns a hit of zero but the data is in elasticsearch:
{
"took" : 172,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 0,
"max_score" : null,
"hits" : [ ]
}
}
How do I get both queries to work filtering by account. Thanks

This elasticsearch guide link was very helpful in coming up with the correct elasticsearch queries shown below:
query 1 for the nested array of json
{
"query" => {
"bool": {
"must": [
{
"match": {
"name": "sales call"
}
},
{"nested" => {
"path" => "team_members",
"score_mode" => "avg",
"query" => {
"bool" => {
"must" => {
"match" => {"team_members.name" => "mike"}
}
}
}
}
}
],
"filter": {
"term": {
"account": 1
}
}
},
}
}
Just pass the query to elastic search like this:
curl http://172.19.0.4:9200/meetings_development/_search?pretty -d '{"query":{"bool":{"must":[{"match":{"name":"sales call"}},{"nested":{"path":"team_members","score_mode":"avg","query":{"bool":{"must":{"match":{"team_members.name":"mike"}}}}}}],"filter":{"term":{"account":1}}}}}'
correct syntax for query 2 for the object datatype ie json
{
"query": {
"bool": {
"must": {
"match": {'availability_slot.monday.start_time' => '9'}
},
"filter": [{
"match": {'account': 1}
}]
}
}
}
You the pass this to elasticsearch like this:
curl http://172.19.0.4:9200/meetings_development/_search?pretty -d '{"query":{"bool":{"must":{"match":{"availability_slot.monday.start_time":"9"}},"filter":[{"match":{"account":1}}]}}}'

Related

"match-boolean-query doesn't return the "exact match"

I'm using "match-Boolean-prefix query but I can't get the exact match of the query.I can't use prefix queries because I also need "not exact match" results and I also need the fuzziness and word completion.I get every thing I need by match-boo-prefix query(the fuzziness not work that good though) but my problem is when I'm looking for exact match like "apple" it shows everything that includes "apple" I need the exact match gets higher ranking than others.
GET /_search
{
"query": {
"bool": {
"must": [
{
"match_bool_prefix": {
"name": {
"query": "apple",
"fuzziness": "auto"
}
}
},
{
"bool": {
"must_not": [
{
"match": {
"type": "3"
}
},
{
"match": {
"type": "4"
}
}
]
}
},
{
"match": {
"status": "A"
}
}
],
"should": [
{
"exists": {
"field": "",
"boost": 10
}
}
]
}
},
"indices_boost": [
{
"index1": 3
},
{
"index2": 1.3
},
{
"index3": 1.5
}
],
"size": 20
}
the result I'm getting with this query is :
{
"took" : 6,
"timed_out" : false,
"_shards" : {
"total" : 20,
"successful" : 20,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4970,
"relation" : "eq"
},
"max_score" : 14.451834,
"hits" : [
{
"_index" : "index",
"_id" : "11434",
"_score" : 14.451834,
"_source" : {
"name" : "Apple Slices With Peanut Butter".
is there any solution for this?

Elasticsearch _search not providing results

I'm trying to return all name fields and count fields from my index however when I try to search for data no data is returned (as shown in last code stub). I definitely have data in my index. What am I doing wrong in my _search command?
My mappings:
PUT /visual
{
"mappings": {
"properties": {
"#timestamp": {"type": "date"},
"name": {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword"
}
}
},
"count": {"type": "integer"},
"err": {"type": "integer"},
"delta1": {"type": "integer"},
"str_list": {"type": "text"}
}
}
}
My search command where I have tried to return the name field, count field and timestamp:
POST visual/_search
{
"query":{
"range":{
"order_date":{
"gte":"now-80d"
}
}
},
"aggs": {
"names":{
"terms":{"field":"name.keyword"},
"aggs": {
"counts":{
"terms":{"field":"count"},
"aggs": {
"time_buckets": {
"date_histogram": {
"field": "#timestamp",
"fixed_interval": "1h",
"extended_bounds": {
"min": "now-80d"
},
"min_doc_count": 0
}
}
}
}
}
}
},"size":100
}
The Response where no data has been returned:
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"names" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [ ]
}
}
}
In your range query, you're using the field order_field, which doesn't exist given your mappings. So maybe using #timestamp will already solve the problem?
"query":{
"range":{
"#timestamp":{
"gte":"now-80d"
}
}
}
Check the range query doc for more information.

How to get multiple fields returned in elasticsearch query?

How to get multiple fields returned that are unique using elasticsearch query?
All of my documents have duplicate name and job fields. I would like to use an es query to get all the unique values which include the name and job in the same response, so they are tied together.
[
{
"name": "albert",
"job": "teacher",
"dob": "11/22/91"
},
{
"name": "albert",
"job": "teacher",
"dob": "11/22/91"
},
{
"name": "albert",
"job": "teacher",
"dob": "11/22/91"
},
{
"name": "justin",
"job": "engineer",
"dob": "1/2/93"
},
{
"name": "justin",
"job": "engineer",
"dob": "1/2/93"
},
{
"name": "luffy",
"job": "rubber man",
"dob": "1/2/99"
}
]
Expected result in any format -> I was trying to use aggs but I only get one field
[
{
"name": "albert",
"job": "teacher"
},
{
"name": "justin",
"job": "engineer"
},
{
"name": "luffy",
"job": "rubber man"
},
]
This is what I tried so far
GET name.test.index/_search
{
"size": 0,
"aggs" : {
"name" : {
"terms" : { "field" : "name.keyword" }
}
}
}
using the above query gets me this which is good that its unique
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 95,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Justin",
"doc_count" : 56
},
{
"key" : "Luffy",
"doc_count" : 31
},
{
"key" : "Albert",
"doc_count" : 8
}
]
}
}
}
I tried doing nested aggregation but that did not work. Is there an alternative solution for getting multiple unique values or am I missing something?
That's a good start! There are a few ways to achieve what you want, each provides a different response format, so you can decide which one you prefer.
The first option is to leverage the top_hits sub-aggregation and return the two fields for each name bucket:
GET name.test.index/_search
{
"size": 0,
"aggs": {
"name": {
"terms": {
"field": "name.keyword"
},
"aggs": {
"top": {
"top_hits": {
"_source": [
"name",
"job"
],
"size": 1
}
}
}
}
}
}
The second option is to use a script in your terms aggregation instead of a field to return a compound value:
GET name.test.index/_search
{
"size": 0,
"aggs": {
"name": {
"terms": {
"script": "doc['name'].value + ' - ' + doc['job'].value"
}
}
}
}
The third option is to use two levels of field collapsing:
GET name.test.index/_search
{
"collapse": {
"field": "name",
"inner_hits": {
"name": "by_job",
"collapse": {
"field": "job"
},
"size": 1
}
}
}

Elasticsearch Filtering Parents by Filtered Child Document Count

I'm attempting to do some elasticsearch query fu on a set of data I have.
I have a user document that is the parent to many child page view documents. I'm looking to return all users that have viewed a specific page an arbitrary amount of times (defined by user input box). So far, I've got a has_child query that will return me all the users that have a page view with certain ids. However, this will return those parents with all their children. Next, I've tried to write an aggregation on those query results, that will essentially do the same has_child query in aggregation form. Now, I have the right document count for my filtered child documents. I need to use this document count to go back and filter the parents. To explain the query in words, "return to me all the users that have viewed a specific page more than 4 times". It's possible that I may need to restructure my data. Any thoughts?
Here is my query thus far:
curl -XGET 'http://localhost:9200/development_users/_search?pretty=true' -d '
{
"query" : {
"has_child" : {
"type" : "page_view",
"query" : {
"terms" : {
"viewed_id" : [175,180]
}
}
}
},
"aggs" : {
"to_page_view": {
"children": {
"type" : "page_view"
},
"aggs" : {
"page_views_that_match" : {
"filter" : { "terms": { "viewed_id" : [175,180] } }
}
}
}
}
}'
This returns me a response like:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [ {
"_index" : "development_users",
"_type" : "user",
"_id" : "22548",
"_score" : 1.0,
"_source":{"id":22548,"account_id":1009}
} ]
},
"aggregations" : {
"to_page_view" : {
"doc_count" : 53,
"page_views_that_match" : {
"doc_count" : 2
}
}
}
}
Associated Mappings:
{
"development_users" : {
"mappings" : {
"page_view" : {
"dynamic" : "false",
"_parent" : {
"type" : "user"
},
"_routing" : {
"required" : true
},
"properties" : {
"created_at" : {
"type" : "date",
"format" : "date_time"
},
"id" : {
"type" : "integer"
},
"viewed_id" : {
"type" : "integer"
},
"time_on_page" : {
"type" : "integer"
},
"title" : {
"type" : "string"
},
"type" : {
"type" : "string"
},
"updated_at" : {
"type" : "date",
"format" : "date_time"
},
"url" : {
"type" : "string"
}
}
},
"user" : {
"dynamic" : "false",
"properties" : {
"account_id" : {
"type" : "integer"
},
"id" : {
"type" : "integer"
}
}
}
}
}
}
Okay, so this is kind of involved. I made a few simplifications to keep it straight in my head. First, I used this mapping:
PUT /test_index
{
"mappings": {
"page_view": {
"_parent": {
"type": "development_user"
},
"properties": {
"viewed_id": {
"type": "string"
}
}
},
"development_user": {
"properties": {
"id": {
"type": "string"
}
}
}
}
}
Then I added some data. In this little universe, I have three users and two pages. I want to find users who have viewed "page_a" at least twice, so if I construct the correct query only user 3 will be returned.
POST /test_index/development_user/_bulk
{"index":{"_type":"development_user","_id":1}}
{"id":"user_1"}
{"index":{"_type":"page_view","_parent":1}}
{"viewed_id":"page_a"}
{"index":{"_type":"development_user","_id":2}}
{"id":"user_2"}
{"index":{"_type":"page_view","_parent":2}}
{"viewed_id":"page_b"}
{"index":{"_type":"development_user","_id":3}}
{"id":"user_3"}
{"index":{"_type":"page_view","_parent":3}}
{"viewed_id":"page_a"}
{"index":{"_type":"page_view","_parent":3}}
{"viewed_id":"page_a"}
{"index":{"_type":"page_view","_parent":3}}
{"viewed_id":"page_b"}
To get that answer we'll use aggregations. Notice that I don't want documents returned (the normal way), but I do want to filter down the documents we analyze, because it will make things more efficient. So I use the same basic filter you had before.
So the aggregation tree starts with terms_parent_id which will just separate parent documents. Inside that I have children_page_view which filters the child documents down to the ones I want ("page_a"), and next to it in the hierarchy is bucket_selector_page_id_term_count which uses a bucket selector (you'll need ES 2.x) to filter the parent documents by those meeting the criterium, and then finally a top hits aggregation which shows us the documents that match the requirements.
POST /test_index/development_user/_search
{
"size": 0,
"query": {
"has_child": {
"type": "page_view",
"query": {
"terms": {
"viewed_id": [
"page_a"
]
}
}
}
},
"aggs": {
"terms_parent_id": {
"terms": {
"field": "id"
},
"aggs": {
"children_page_view": {
"children": {
"type": "page_view"
},
"aggs": {
"filter_page_ids": {
"filter": {
"terms": {
"viewed_id": [
"page_a"
]
}
}
}
}
},
"bucket_selector_page_id_term_count": {
"bucket_selector": {
"buckets_path": {
"children_count": "children_page_view>filter_page_ids._count"
},
"script": "children_count >= 2"
}
},
"top_hits_users": {
"top_hits": {
"_source": {
"include": [
"id"
]
}
}
}
}
}
}
}
which returns:
{
"took": 14,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"aggregations": {
"terms_parent_id": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "user_3",
"doc_count": 1,
"children_page_view": {
"doc_count": 3,
"filter_page_ids": {
"doc_count": 2
}
},
"top_hits_users": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "development_user",
"_id": "3",
"_score": 1,
"_source": {
"id": "user_3"
}
}
]
}
}
}
]
}
}
}
Here's all the code I used:
http://sense.qbox.io/gist/43f24461448519dc884039db40ebd8e2f5b7304f

Unexpected Geo_shape query behaviour

My documents have geo_shapes to associate them to an area. If I give ES (1.7) a geo_point I'm wanting it to give me back the documents where the point falls within that area.
I've recreated with the following toy example:-
# create the index
put location_test
put location_test/_mapping/place
{
"place": {
"properties": {
"message": {"type": "string"},
"coverage": {"type": "geo_shape"}
}
}
}
# check the mapping is correct
get location_test/place/_mapping
# location 1
put location_test/place/1
{
"message": "we will be in this box",
"coverage": {
"type" : "envelope",
"coordinates" : [[1, 0], [0, 1] ]
}
}
# location 2
put location_test/place/2
{
"message": "we will be outside this box",
"coverage": {
"type" : "envelope",
"coordinates" : [[2, 1], [1, 2] ]
}
}
# all documents returned - OK
get location_test/place/_search
{
"query": { "match_all": {}}
}
# should only get document 1, but get both.
get location_test/place/_search
{
"query": {
"geo_shape": {
"coverage": {
"shape": {
"type": "point"
"coordinates": [0.1,0.1]
}
}
}
}
}
Besides the fact that you're missing a comma after "type": "point" in your last query, I do get a single point when POSTing the query to the _search endpoint:
curl -XPOST localhost:9200/location_test/place/_search -d '{
"query": {
"geo_shape": {
"coverage": {
"shape": {
"type": "point", <---- comma missing here
"coordinates": [0.1,0.1]
}
}
}
}
}'
Results:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 1,
"max_score" : 1.0,
"hits" : [ {
"_index" : "location_test",
"_type" : "place",
"_id" : "1",
"_score" : 1.0,
"_source":{"message":"we will be in this box","coverage":{"type":"envelope","coordinates":[[1,0],[0,1]]}}
} ]
}
}
When sending a payload you should use POST instead of GET as not all HTTP clients send payload when using GET.

Resources