elasticsearch retrieving nested objects - not individual fields - elasticsearch

When I use the "fields" option of a query I get a separate array for each field. Is it possible to get back the "complete" nested objects rather than just the field?
In the following example if I try to do "fields": ["cast"] it tells me that cast is not a leaf node. And if I do "fields": ["cast.firstName", "cast.middleName", "cast.lastName"] it returns 3 arrays.
Is there another way of retrieving just a partial amount of the document? Or is there a way to "reassemble" the separate fields into a complete "cast" object?
Example Index and Data:
POST /movies
{
"mappings": {
"movie": {
"properties": {
"cast": {
"type": "nested"
}
}
}
}
}
POST /movies/movie
{
"title": "The Matrix",
"cast": [
{
"firstName": "Keanu",
"lastName": "Reeves",
"address": {
"street": "somewhere",
"city": "LA"
}
},
{
"firstName": "Laurence",
"middleName": "John",
"lastName": "Fishburne",
"address": {
"street": "somewhere else",
"city": "NYC"
}
}
]
}
Example Query:
GET /movies/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "cast",
"filter": {
"bool": {
"must": [
{ "term": { "firstName": "laurence"} },
{ "term": { "lastName": "fishburne"} }
]
}
}
}
}
}
},
"fields": [
"cast.address.city",
"cast.firstName",
"cast.middleName",
"cast.lastName"
]
}
Result of example query:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "movies",
"_type": "movie",
"_id": "AU1JeyBseLgwMCOuOLsZ",
"_score": 1,
"fields": {
"cast.firstName": [
"Keanu",
"Laurence"
],
"cast.lastName": [
"Reeves",
"Fishburne"
],
"cast.address.city": [
"LA",
"NYC"
],
"cast.middleName": [
"John"
]
}
}
]
}
}

I think this is what you're looking for:
POST /movies/_search
{
"_source": {
"include": [
"cast.address.city",
"cast.firstName",
"cast.middleName",
"cast.lastName"
]
},
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "cast",
"filter": {
"bool": {
"must": [
{
"term": {
"firstName": "laurence"
}
},
{
"term": {
"lastName": "fishburne"
}
}
]
}
}
}
}
}
}
}
Result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "movies",
"_type": "movie",
"_id": "AU1PIJgBA_0Cyshym7-m",
"_score": 1,
"_source": {
"cast": [
{
"lastName": "Reeves",
"address": {
"city": "LA"
},
"firstName": "Keanu"
},
{
"middleName": "John",
"lastName": "Fishburne",
"address": {
"city": "NYC"
},
"firstName": "Laurence"
}
]
}
}
]
}
}
You can also choose to exclude fields instead of including or both, see documentation here: http://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html

Related

Elasticsearch - Is it possible to collapse first then aggregate data of a nested field?

I am using Elasticsearch and I want to group our results by a specific field, returning top n documents per group. The document have a nested filed and I want to aggregate all the documents' nested field for each group.
Example
I have 5 documents and each have a groupId and also a nested field peoples. I want group these documents by the groupId. And then for each group, I want to get top 2 people(some documents may contain same people).
PUT test/_mapping
{
"properties": {
"groupId":{
"type":"keyword"
},
"id":{
"type":"keyword"
},
"name":{
"type":"text"
},
"people":{
"type":"nested",
"properties":{
"email":{
"type":"keyword"
}
}
}
}
}
PUT test/_doc/1
{
"name": "docs1",
"groupId": "1",
"people":[{
"email":"people1#test.com"
}]
}
PUT test/_doc/2
{
"name": "docs2",
"groupId": "1",
"people":[{
"email":"people2.1#test.com"
},
{
"email":"people2.2#test.com"
}]
}
PUT test/_doc/3
{
"name": "docs3",
"groupId": "2",
"people":[{
"email":"people3.1#test.com"
},
{
"email":"people2.2#test.com"
}]
}
PUT test/_doc/4
{
"name": "docs4",
"groupId": "1",
"people":[{
"email":"people4.1#test.com"
},
{
"email":"people4.2#test.com"
}]
}
PUT test/_doc/5
{
"name": "docs5",
"groupId": "3",
"people":[{
"email":"people5.1#test.com"
},
{
"email":"people5.2#test.com"
}]
}
Search query
GET test/_search
{
"collapse": {
"field": "groupId",
"inner_hits": {
"name":"inner",
"size": 2
}
},
"sort": [
{
"groupId": {
"order": "asc"
}
}
],
"size": 2,
"from": 0
}
Result
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 5,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "test",
"_id": "1",
"_score": null,
"_source": {
"name": "docs1",
"groupId": "1",
"people": [
{
"email": "people1#test.com"
}
]
},
"fields": {
"groupId": [
"1"
]
},
"sort": [
"1"
],
"inner_hits": {
"inner": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test",
"_id": "1",
"_score": 0,
"_source": {
"name": "docs1",
"groupId": "1",
"people": [
{
"email": "people1#test.com"
}
]
}
},
{
"_index": "test",
"_id": "2",
"_score": 0,
"_source": {
"name": "docs2",
"groupId": "1",
"people": [
{
"email": "people2.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
}
}
]
}
}
}
},
{
"_index": "test",
"_id": "3",
"_score": null,
"_source": {
"name": "docs3",
"groupId": "2",
"people": [
{
"email": "people3.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
},
"fields": {
"groupId": [
"2"
]
},
"sort": [
"2"
],
"inner_hits": {
"inner": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0,
"hits": [
{
"_index": "test",
"_id": "3",
"_score": 0,
"_source": {
"name": "docs3",
"groupId": "2",
"people": [
{
"email": "people3.1#test.com"
},
{
"email": "people2.2#test.com"
}
]
}
}
]
}
}
}
}
]
}
}
Expecting is to aggregate a groupPeople field for each group and it contains top n people of that group(should not affected by the inner_hit size, like for groupId=1, it contains 3 documents and 5 people).
The query that you're looking for is this one:
POST test/_search
{
"size": 0,
"aggs": {
"groups": {
"terms": {
"field": "groupId",
"size": 10
},
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"emails": {
"terms": {
"field": "people.email",
"size": 2
}
}
}
}
}
}
}
}
If you need pagination, you can achieve the same using the composite aggregation:
POST test/_search
{
"size": 0,
"aggs": {
"pages": {
"composite": {
"sources": [
{
"groups": {
"terms": {
"field": "groupId"
}
}
}
]
},
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"emails": {
"terms": {
"field": "people.email",
"size": 2
}
}
}
}
}
}
}
}

How to return only the matched object in a nested field, instead of the whole object?

Elasticsearch version ( bin/elasticsearch --version ):
5.6.11
Plugins installed :
JVM version ( java -version ):
openjdk version "1.8.0_222"
OpenJDK Runtime Environment (build 1.8.0_222-8u222-b10-1ubuntu1~16.04.1-b10)
OpenJDK 64-Bit Server VM (build 25.222-b10, mixed mode)
OS version ( uname -a if on a Unix-like system):
Linux myserver 4.4.0-131-generic #157-Ubuntu SMP Thu Jul 12 15:51:36 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux
I'm currently trying to prevent if possible having to create another index, and as result i got to the question:
How do I retrieve only the matched element in a nested object instead of the whole object? in my case being the: my_nested_field.my_object.name that matches my criteria?
my mapping:
{
"my_super_special_index": {
"aliases": {
"my_super_special_index_alias": {}
},
"mappings": {
"my_super_special_index": {
"properties": {
"my_nested_field": {
"type": "nested",
"properties": {
"my_object": {
"properties": {
"id": {
"type": "integer"
},
"last_known_party": {
"type": "boolean"
},
"name": {
"type": "text",
"store": true,
"analyzer": "translation_index_analyzer",
"search_analyzer": "translation_search_analyzer"
},
"name_raw": {
"type": "keyword"
},
}
}
}
}
}
}
}
}
}
my query:
GET my_super_special_index/_search
{
"_source": "my_nested_field",
"query": {
"bool": {
"should": [
{
"nested": {
"path": "my_nested_field",
"query": {
"bool": {
"should": [
{
"match_phrase": {
"my_nested_field.my_object.name": {
"query": "my name"
}
}
},
{
"match": {
"my_nested_field.my_object.name": {
"query": "my name",
"boost": 100
}
}
}
]
}
}
}
}
],
"minimum_should_match": 1
}
},
"size": 50
}
What im getting as result :
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1337,
"hits": [
{
"_index": "my_super_special_index",
"_type": "my_super_special_index",
"_score": 2433.0676,
"_source": {
"id": "4712182",
"my_nested_fields": [
{
...
"my_object": [
{
"id": "22222",
"name": "name i want",
"name_raw": "name i want",
"last_known_party": true
},
{
"id": "13333",
"name": "hiyoo definitely doesnt match",
"name_raw": "hiyoo definitely doesnt match",
"last_known_party": true
}
],
"my_other_object": [
{
"id": "26672",
"name": "dont really like this",
"name_raw": "dont really like this",
"last_known_party": true
}
]
}
],
}
},
{
"_index": "my_super_special_index",
"_type": "my_super_special_index",
"_score": 2422.111,
"_source": {
"id": "357878",
"my_nested_fields": [
{
...
"my_object": [
{
"id": "661111",
"name": "ratatoille",
"name_raw": "ratatoille",
"last_known_party": true
},
{
"id": "2334",
"name": "name i want or close match",
"name_raw": "name i want or close match",
"last_known_party": true
}
],
"my_other_object": [
{
"id": "63111",
"name": "ttttt ok 1337",
"name_raw": "ttttt ok 1337",
"last_known_party": true
}
]
}
],
}
}
}
}
What I wish to get from ES:
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1337,
"hits": [
{
"_index": "my_super_special_index",
"_type": "my_super_special_index",
"_score": 2433.0676,
"_source": {
"id": "4712182",
"my_nested_fields": [
{
...
"my_object": [
{
"id": "22222",
"name": "name i want",
"name_raw": "name i want",
"last_known_party": true
}
]
}
],
}
},
{
"_index": "my_super_special_index",
"_type": "my_super_special_index",
"_score": 2422.111,
"_source": {
"id": "357878",
"my_nested_fields": [
{
...
"my_object": [
{
"id": "2334",
"name": "name i want or close match",
"name_raw": "name i want or close match",
"last_known_party": true
}
]
}
],
}
}
}
}
Thanks in advance!
Nested inner_hist to the rescue:
GET my_super_special_index/_search
{
"_source": "my_nested_field",
"query": {
"bool": {
"should": [
{
"nested": {
"path": "my_nested_field",
"inner_hits": {}, <--- add this
"query": {
"bool": {
"should": [
{
"match_phrase": {
"my_nested_field.my_object.name": {
"query": "my name"
}
}
},
{
"match": {
"my_nested_field.my_object.name": {
"query": "my name",
"boost": 100
}
}
}
]
}
}
}
}
],
"minimum_should_match": 1
}
},
"size": 50
}
Note: Since my_object is not nested as well, you'll still get the full array, but you'll only get the my_nested_fields object and not the my_other_object one.

elastic bool query must match mot getting considered

i am basically trying to write a query where it should return the document where
school is "holy international" AND grade is "second".
but the issue with the current query is that its not considering the must match query part. ie even though i don't i specify the school is the giving me this document where as it is not a match.
query is giving me all the documents where the grade is second.
i want only document where school is "holy international" AND grade is "second".
as well as i have not specified in the match query for "schools.school" but its giving me results.
mapping
{
"settings": {
"analysis": {
"analyzer": {
"my_keyword_lowercase1": {
"tokenizer": "keyword",
"filter": ["lowercase", "my_pattern_replace1", "trim"]
},
"my_keyword_lowercase2": {
"tokenizer": "standard",
"filter": ["lowercase", "trim"]
}
},
"filter": {
"my_pattern_replace1": {
"type": "pattern_replace",
"pattern": ".",
"replacement": ""
}
}
}
},
"mappings": {
"test_data": {
"properties": {
"schools": {
"type": "nested",
"properties": {
"school": {
"type": "string",
"analyzer": "my_keyword_lowercase1"
},
"grade": {
"type": "string",
"analyzer": "my_keyword_lowercase2"
}
}
}
}
}
}
}
data
{
"_index": "data_index",
"_type": "test_data",
"_id": "57a33ebc1d41",
"_version": 1,
"found": true,
"_source": {
"summary": null,
"schools": [{
"school": "little flower",
"grade": "first",
"date": "2007-06-01",
},
{
"school": "holy international",
"grade": "second",
"date": "2007-06-01",
},
],
"first_name": "Adam",
"location": "Kansas City",
"last_name": "Roger",
"country": "US",
"name": "Adam Roger",
}
}
query
{
"_source": ["first_name"],
"query": {
"nested": {
"path": "schools",
"inner_hits": {
"_source": {
"includes": [
"schools.school",
"schools.grade"
]
}
},
"query": {
"bool": {
"must": {
"match": {
"schools.school": {
"query": "" <-----X didnt specify anything
}
}
},
"filter": {
"match": {
"schools.grade": {
"query": "second",
"operator": "and",
"minimum_should_match": "100%"
}
}
}
}
}
}
}
}
result
{
"took": 26,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.2876821,
"hits": [
{
"_index": "data_test",
"_type": "test_data",
"_id": "57a33ebc1d41",
"_score": 0.2876821,
"_source": {
"first_name": "Adam"
},
"inner_hits": {
"schools": {
"hits": {
"total": 1,
"max_score": 0.2876821,
"hits": [
{
"_nested": {
"field": "schools",
"offset": 0
},
"_score": 0.2876821,
"_source": {
"schools": {
"school": "holy international",
"grade": "second"
}
}
}
]
}
}
}
}
]
}
}
So, basically your problem is analysis step, when I load everything and checked, it become very clear:
This filter completely wipes all string from schools.school field
"filter": {
"my_pattern_replace1": {
"type": "pattern_replace",
"pattern": ".",
"replacement": ""
}
}
I think, that's happening because . is regexp literal, so, when I checked it:
POST /_analyze
{
"field": "schools.school",
"text": "holy international"
}
{
"tokens": [
{
"token": "",
"start_offset": 0,
"end_offset": 18,
"type": "word",
"position": 0
}
]
}
That's why you always get a match, every string you passed during indexing time and during search time becomes "". Some additional info from Elastic wiki - https://www.elastic.co/guide/en/elasticsearch/reference/5.1/analysis-pattern_replace-tokenfilter.html
After I removed patter replace filter, this query returns everything as expected:
{
"_source": ["first_name"],
"query": {
"nested": {
"path": "schools",
"inner_hits": {
"_source": {
"includes": [
"schools.school",
"schools.grade"
]
}
},
"query": {
"bool": {
"must": {
"match": {
"schools.school": {
"query": "holy international"
}
}
},
"filter": {
"match": {
"schools.grade": {
"query": "second"
}
}
}
}
}
}
}
}

Configuring elasticsearch to search and filter with has many / belongs to relationship

I have a Product model where each product has many skus.
I need to be able to search and filter via elasticsearch across both models, but not quite sure how to go about it. I'm currently uploading to elasticsearch in this format:
[{
id: 1
title: 'Product 1'
image: 'image1.jpg'
skus: [{
id: 1
material: 'cotton'
quantity: 4
},{
id: 2
material: 'polyester'
quantity: 22
}]
},{
...
}]
I can search the title just fine, but I am unsure as to how I could do something like
Search for title 'foobar' and filter by material 'cotton' and quantity > 5
Is this possible with elasticsearch?
Edit
I am open to uploading in a different format or using multiple indices.
I think the parent/child relationship is what you're looking for.
As a quick example, I can set up an index with a parent type and child type like this:
PUT /test_index
{
"mappings": {
"product": {
"properties": {
"id": {
"type": "long"
},
"image": {
"type": "string"
},
"title": {
"type": "string"
}
}
},
"sku": {
"_parent": {
"type": "product"
},
"properties": {
"id": {
"type": "long"
},
"material": {
"type": "string"
},
"quantity": {
"type": "long"
}
}
}
}
}
Then add a parent document and two child documents:
POST /test_index/_bulk
{"index":{"_type":"product","_id":1}}
{"id": 1,"title": "Product1","image": "image1.jpg"}
{"index":{"_type":"sku", "_id":1,"_parent":1}}
{"id": 1,"material": "cotton","quantity": 4}
{"index":{"_type":"sku","_id":2,"_parent":1}}
{"id": 2,"material": "polyester","quantity": 22}
Now if I search for a "product" with "title": "Product1" that has a child "sku" with "material": "cotton" and "quantity" greater than 5, I won't find one:
POST /test_index/product/_search
{
"query": {
"filtered": {
"query": {
"match": {
"title": "Product1"
}
},
"filter": {
"has_child": {
"type": "sku",
"filter": {
"bool": {
"must": [
{
"term": {
"material": "cotton"
}
},
{
"range": {
"quantity": {
"gt": 5
}
}
}
]
}
}
}
}
}
}
}
...
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 0,
"max_score": null,
"hits": []
}
}
But if I search for a "product" with "title": "Product1" that has a child "sku" with "material": "polyester" and "quantity" greater than 5, I will find one:
POST /test_index/product/_search
{
"query": {
"filtered": {
"query": {
"match": {
"title": "Product1"
}
},
"filter": {
"has_child": {
"type": "sku",
"filter": {
"bool": {
"must": [
{
"term": {
"material": "polyester"
}
},
{
"range": {
"quantity": {
"gt": 5
}
}
}
]
}
}
}
}
}
}
}
...
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1.4054651,
"hits": [
{
"_index": "test_index",
"_type": "product",
"_id": "1",
"_score": 1.4054651,
"_source": {
"id": 1,
"title": "Product1",
"image": "image1.jpg"
}
}
]
}
}
Here is some code I used for testing:
http://sense.qbox.io/gist/d1989a28372ac9daae335d585601c11818b2fa11

term and range filters together in an elasticsearch query

In my eleasticsearch index, I have two documents indexed as below:
POST dyn-props/item
{
"name": "bar foo",
"properties": [
{
"type": "foo",
"value": 1.45
},
{
"type": "bar",
"value": 256.34
},
{
"type": "foobar",
"value": 43.43
}
]
}
POST dyn-props/item
{
"name": "foo bar",
"properties": [
{
"type": "foo",
"value": 33.34
},
{
"type": "bar",
"value": 22.23
}
]
}
On this item type, I would like to query for items which have foo property whose value is greater than 10. I can filter the results down for items that has a property whose type is foo with the below query:
POST dyn-props/item/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"properties.type": "foo"
}
}
}
}
}
but I am not sure how I can apply the range filter for value. Any idea?
Edit:
Issuing the below query gives me the wrong results as expected:
POST dyn-props/item/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"term": {
"properties.type": "foo"
}
},
{
"range": {
"properties.value": {
"gte" : 10
}
}
}
]
}
}
}
}
}
The result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "dyn-props",
"_type": "item",
"_id": "PetPVxwARLOcZqlv28xjpw",
"_score": 1,
"_source": {
"name": "bar foo",
"properties": [
{
"type": "foo",
"value": 1.45
},
{
"type": "bar",
"value": 256.34
},
{
"type": "foobar",
"value": 43.43
}
]
}
},
{
"_index": "dyn-props",
"_type": "item",
"_id": "KqOTXcC9RG6FzPsDDDs8Hw",
"_score": 1,
"_source": {
"name": "foo bar",
"properties": [
{
"type": "foo",
"value": 33.34
},
{
"type": "bar",
"value": 22.23
}
]
}
}
]
}
}
Found the answer. This post helped a lot: ElasticSearch – nested mappings and filters
Changed the mapping of the type:
PUT dyn-props
{
"mappings": {
"item": {
"properties": {
"name": {
"type": "string"
},
"properties": {
"type": "nested"
}
}
}
}
}
By making the properties as nested type, I was able to maintain the association between type and value fields.
Finally, I was able to issue the nested query for this:
POST dyn-props/item/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "properties",
"filter": {
"bool": {
"must": [
{
"term": {
"type": "foo"
}
},
{
"range": {
"value": {
"gte": 10
}
}
}
]
}
}
}
}
}
}
}
This got me the correct result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "dyn-props",
"_type": "item",
"_id": "CzTL4sseR2GVYtvf-0slVQ",
"_score": 1,
"_source": {
"name": "foo bar",
"properties": [
{
"type": "foo",
"value": 33.34
},
{
"type": "bar",
"value": 22.23
}
]
}
}
]
}
}
You have got to change the mapping of the index and change the type of the properties to nested.
This case has been explained in the docs:
http://www.elasticsearch.org/blog/managing-relations-inside-elasticsearch/

Resources