elasticsearch parent/child query logic - elasticsearch

elastic version: 5.0.1
define mappingļ¼š
PUT test
{
"mappings": {
"my_parent": {
"properties": {
"key": {
"type": "keyword"
}
}
},
"my_child": {
"_parent": {
"type": "my_parent"
},
"properties": {
"key": {
"type": "keyword"
}
}
}
}
}
add demo data:
POST _bulk
{"update": {"_index": "test","_type": "my_parent","_id": "1"}}
{"doc": {"key": 1},"doc_as_upsert": true}
{"update": {"_index": "test","_type": "my_child","_parent": 1,"_id": "11"}}
{"doc": {"key": 11},"doc_as_upsert": true}
{"update": {"_index": "test","_type": "my_child","_parent": 1,"_id": "12"}}
{"doc": {"key": 12},"doc_as_upsert": true}
query:
POST test/my_parent/_search
{
"query": {
"bool": {
"filter": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"term": {
"key": 3
}
},
{
"has_child": {
"type": "my_child",
"inner_hits": {
"name": "a"
},
"query": {
"term": {
"key": 11
}
}
}
}
]
}
},
{
"has_child": {
"type": "my_child",
"inner_hits": {
"name": "b"
},
"query": {
"term": {
"key": 12
}
}
}
}
]
}
}
}
}
}
result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0,
"hits": [
{
"_index": "test",
"_type": "my_parent",
"_id": "1",
"_score": 0,
"_source": {
"key": 1
},
"inner_hits": {
"a": {
"hits": {
"total": 1,
"max_score": 0.9808292,
"hits": [
{
"_type": "my_child",
"_id": "11",
"_score": 0.9808292,
"_routing": "1",
"_parent": "1",
"_source": {
"key": 11
}
}
]
}
},
"b": {
"hits": {
"total": 1,
"max_score": 0.9808292,
"hits": [
{
"_type": "my_child",
"_id": "12",
"_score": 0.9808292,
"_routing": "1",
"_parent": "1",
"_source": {
"key": 12
}
}
]
}
}
}
}
]
}
}
question here:
Do 'must'\'should'\'must_not' clause have the same meaning between plain search and parent\child search?
Why the result of inner_hits with name 'a' is returned?

'must'|'should'|'must_not' clauses have different meaning. Let me explain you with example of the plain search.
Understand these clause with equivalent SQL query.
must: The clause (query) must appear in matching documents and will contribute to the score.
SQL: select * from user where country_code = 'US' AND state_code = 'NY'
Query DSL:
POST _search
{
"query": {
"bool": {
"must": [
{"term": {"country_code": "US"}},
{"term": {"state_code": "NY"}}
]
}
}
}
should: At least one of these clauses must match, like logical OR.
SQL: select * from user where country_code = 'US' OR state_code = 'NY'
Query DSL:
POST _search
{
"query": {
"bool": {
"should": [
{"term": {"country_code": "US"}},
{"term": {"state_code": "NY"}}
]
}
}
}
must_not: Condition must not match the documents.
SQL: select * from user where country_code != 'US' AND state_code != 'NY'
Query DSL:
POST _search
{
"query": {
"bool": {
"must_not": [
{"term": {"country_code": "US"}},
{"term": {"state_code": "NY"}}
]
}
}
}
Why the result of inner_hits with name 'a' is returned?
Because you put two has_child condition inside the should filter. As explain above it is matching the document from (inner_hits.name =a ..) OR ( inner_hits.name=b ..)

Related

Elasticsearch returns NullPointerException during inner_hits query

I have an index, which stores a nested document. I wanna see this nested documents, for this purpose I used 'inner_hits' in request, but elastic returns nullPointerException. Do anyone meet with this problem?)
Request to elasticsearch using Postman:
GET http://localhost/my-index/_search
{
"query": {
"nested": {
"path": "address_object",
"query": {
"bool": {
"must": {
"term": {"address_object.city": "Paris"}
}
}
},
"inner_hits" : {}
}
}
}
Response with status code 200:
{
"took": 161,
"timed_out": false,
"_shards": {
"total": 2,
"successful": 1,
"skipped": 0,
"failed": 1,
"failures": [
{
"shard": 0,
"index": "my-index",
"node": "DWdD83KaTmUiodENQkGDww",
"reason": {
"type": "null_pointer_exception",
"reason": null
}
}
]
},
"hits": {
"total": 6500039,
"max_score": 2.1761138,
"hits": []
}
}
Elasticsearch version: 6.2.4
Lucene version: 7.2.1
Update:
Mapping:
{
"my-index": {
"mappings": {
"mytype": {
"dynamic": "false",
"_source": {
"enabled": false
},
"properties": {
"adverts_count": {
"type": "integer",
"store": true
},
...
"address_object": {
"type": "nested",
"properties": {
"adverts_count": {
"type": "integer",
"store": true
},
"city": {
"type": "keyword",
"store": true
}
}
},
...
Sample document:
{
"_index": "my-index",
"_type": "mytype",
"_id": "XDWrGncBdwNBWGEagAM2",
"_score": 2.1587489,
"fields": {
"is_target_page_shown": [
0
],
"updated_at": [
1612264276
],
"is_shown": [
0
],
"nb_queries": [
1
],
"search_query": [
"phone"
],
"target_category": [
15
],
"adverts_count": [
1
]
}
}
Extra information:
If I remove the "inner_hits": {} from search request, elastic returns nested documents(_index, _type, _id, _score), but ain't other fields(e.g city)
Also, as suggested in the comments, I tried setting to true ignore_unmapped, but it doesn't helped. The same nullPointerException.
I tried reproducing your issue, but as you have not provided the proper sample documents(one which you provided doesn't have the address_object properties), I used your mapping and below sample documents.
PUT index-name/_doc/1
{
"address_object" :{
"adverts_count" : 1,
"city": "paris"
}
}
PUT index-name/_doc/2
{
"address_object" :{
"adverts_count" : 1,
"city": "blr"
}
}
And when I use the same search provided by you.
POST 71907588/_search
{
"query": {
"nested": {
"path": "address_object",
"query": {
"bool": {
"must": {
"term": {
"address_object.city": "paris"
}
}
}
},
"inner_hits": {}
}
}
}
I get a proper response, matching paris as city as shown in the search response.
"hits": [
{
"_index": "71907588",
"_id": "1",
"_score": 0.6931471,
"_source": {
"address_object": {
"adverts_count": 1,
"city": "paris"
}
},
"inner_hits": {
"address_object": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.6931471,
"hits": [
{
"_index": "71907588",
"_id": "1",
"_nested": {
"field": "address_object",
"offset": 0
},
"_score": 0.6931471,
"_source": {
"city": "paris",
"adverts_count": 1
}
}
]
}
}
}
}
]

Extracting and aggregating text values from a field with Elasticsearch

Im new to ElasticSearch, and my first mission is as follow:
Here is the beginning of my query:
"_source": "pageVisitsValues",
"query": {
"filtered": {
"filter": {
"and": {
"filters": [
{
"term": {
"blockSize": 30
}
},
{
"range": {
"pageViews": {
"from": 1,
"to": null,
"include_lower": true,
"include_upper": true
}
}
},
{
"bool": {
"should": {
"bool": {
"must": {
"regexp": {
"pageVisitsValues": {
"value": ".*utm_medium=.*"
}
}
}
}
}
}
}
]
}
}
}
},
"aggregations": {
"Utm_term1": {
"terms": {
"field": "pageVisitsValues",
"size": 50,
"order": {
"_count": "desc"
}
}
},
now here is part of the result:
"hits": { -
"total": 204223,
"max_score": 1,
"hits": [ -
{ -
"_index": "my_index",
"_type": "user",
"_id": "AX45WhAzfmq",
"_score": 1,
"_source": { -
"pageVisitsValues": [ -
"www.test.com/search?&q=bullion deals&utm_source=iterable&utm_medium=email&utm_campaign=weeklydeals"
]
}
},
{ -
"_index": "8767827_all_25216812968619266m",
"_type": "user",
"_id": "AX45vDJEamKugpqPWxqJ",
"_score": 1,
"_source": { -
"pageVisitsValues": [ -
"www.mytestsite.com/search?&q=indian",
"www.mytestsite.com/search?&q=indianhead cents&rows=80&view=grid&version=v2&start=160",
"www.mytestsite.com/product/24158/1867-indianvg",
"www.mytestsite.com/search?q=&x=11&y=7&silver&page=1&utm_source=bing&utm_medium=cpc&utm_campaign=cpa&utm_term=testx&utm_content=test",
"www.mytestsite.com/product/233871/flying-eagle-indian-head-cent-1856-1909",
"www.mytestsite.com/search?&q=silver eagles bu"
]
}
},
{ -
"_index": "my_index",
"_type": "user",
"_id": "userID",
"_score": 1,
"_source": { -
"pageVisitsValues": [ -
"www.mytestsite.com/product/1/bu-random-year?utm_source=criteo&utm_medium=email&utm_campaign=prospecting&dclid=cpyz4_j54fqcfcdbpaqd0dkkiw"
]
}
}
what I need to do is to find a way to aggregate all "utm_medium" values from the "pageVisitsValues" field, so my expected result should be something like this:
Utm_term1": { -
"doc_count_error_upper_bound": 426,
"sum_other_doc_count": 1557591,
"buckets": [ -
{ -
"key": "email",
"doc_count": 31283
},
{ -
"key": "cpc",
"doc_count": 23615
Any idea how can I do that?
I'd suggest adding a new list to ES and aggregating on it since it would be less expensive, but if you don't want to do that, you can create a dynamic variable per record runtime_mappings and do a term aggregation on it, check the script section in the official docs for Elasticsearch.
To extract the keyword needed using painless script, you can use the .split function, check this ticket out.
If you need any further help please reply to this comment.

Query Vs Filter in Elastic Search

I am trying to index a document which has three fields first_name, last_name, occupation of type "keyword" and has values XYZ, ABC, DEF respectively.
I have written query using filter for an exact match with AND condition as follows,
"query": {
"bool": {
"filter": [
{"term": {"first_name": "XYZ"}},
{"term": {"last_name": "ABC"}}
]
}
}
This has to return one document, but returns nothing.
I have another query for the same operation,
"query": {
"bool": {
"must": [
{"match": {"first_name": "XYZ"}},
{"match": {"last_name": "ABC"}}
]
}
}
This returns one document.
According to Elasticsearch documentation, I understand that the difference between query and filter is that filter does not score the result. I am not sure why the first query does not return any result. Is my understanding correct?
As documentation states there is no difference between query and filter except scoring. Of course this applies to the situation when both query and filters uses the same query type. Here you are using two different types - term and match. term is designed for exact comparison while match is analyzed and used as full-text search.
Take a look at the example below.
Your mapping:
PUT /index_53053054
{
"mappings": {
"_doc": {
"properties": {
"first_name": {
"type": "text"
},
"last_name": {
"type": "text"
},
"occupation": {
"type": "keyword"
}
}
}
}
}
Your document:
PUT index_53053054/_doc/1
{
"first_name": "XYZ",
"last_name": "ABC",
"occupation": "DEF"
}
filter query:
GET index_53053054/_search
{
"query": {
"bool": {
"filter": [
{
"match": {
"first_name": "XYZ"
}
},
{
"match": {
"last_name": "ABC"
}
},
{
"term": {
"occupation": "DEF"
}
}
]
}
}
}
and result:
{
"took": 7,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0,
"hits": [
{
"_index": "index_53053054",
"_type": "_doc",
"_id": "1",
"_score": 0,
"_source": {
"first_name": "XYZ",
"last_name": "ABC",
"occupation": "DEF"
}
}
]
}
}
Similar must query:
GET index_53053054/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"first_name": "XYZ"
}
},
{
"match": {
"last_name": "ABC"
}
},
{
"term": {
"occupation": "DEF"
}
}
]
}
}
}
and response:
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.8630463,
"hits": [
{
"_index": "index_53053054",
"_type": "_doc",
"_id": "1",
"_score": 0.8630463,
"_source": {
"first_name": "XYZ",
"last_name": "ABC",
"occupation": "DEF"
}
}
]
}
}
As you can see hits are almost the same. The only difference is that in filter score is not calculated while in must query is.
Read more: https://www.elastic.co/guide/en/elasticsearch/reference/6.4/query-filter-context.html

How do I perform an "OR" filter on an aggregate?

I am trying to grab the first 10 documents grouped by domain. These 10 documents need to have a value for "crawl_date" that haven't been crawled for a while or haven't been crawled at all (eg a blank value). I have:
curl -XPOST 'http://localhost:9200/tester/test/_search' -d '
{
"size": 10,
"aggs": {
"group_by_domain": {
"filter": {
"or":[
"term": {"crawl_date": ""},
"term": {"crawl_date": ""} // how do I put a range here? e.g. <= '2014-12-31'
]
},
"terms": {
"field": "domain"
}
}
}
}'
I am new to ES and using version 2.2. Since the documentation isn't fully updated I am struggling.
EDIT:
To clarify, I need 10 urls that haven't been crawled or haven't been crawled for a while. Each of those 10 urls has to come from a unique domain so that when I crawl them I don't overload someone's server.
Another Edit:
So, I need something like this (1 link for each of 10 unique domains):
1. www.domain1.com/page
2. www.domain2.com/url
etc...
Instead, I am getting just the domain and the number of pages:
"buckets": [
{
"key": "http://www.dailymail.co.uk",
"doc_count": 212
},
{
"key": "https://sedo.com",
"doc_count": 196
},
{
"key": "http://www.foxnews.com",
"doc_count": 118
},
{
"key": "http://data.worldbank.org",
"doc_count": 117
},
{
"key": "http://detail.1688.com",
"doc_count": 117
},
{
"key": "https://twitter.com",
"doc_count": 112
},
{
"key": "http://search.rakuten.co.jp",
"doc_count": 104
},
{
"key": "https://in.1688.com",
"doc_count": 92
},
{
"key": "http://www.abc.net.au",
"doc_count": 87
},
{
"key": "http://sport.lemonde.fr",
"doc_count": 85
}
]
The "hits" returns multiple pages for just 1 domain:
"hits": [
{
"_index": "tester",
"_type": "test",
"_id": "http://www.barnesandnoble.com/w/at-the-edge-of-the-orchard-tracy-chevalier/1121908441?ean=9780525953005",
"_score": 1,
"_source": {
"domain": "http://www.barnesandnoble.com",
"crawl_date": "0001-01-01T00:00:00Z"
}
},
{
"_index": "tester",
"_type": "test",
"_id": "http://www.barnesandnoble.com/b/bargain-books/_/N-8qb",
"_score": 1,
"_source": {
"domain": "http://www.barnesandnoble.com",
"crawl_date": "0001-01-01T00:00:00Z"
}
},
etc....
Barnes and Noble will quickly ban my UA if I try to crawl that many domains at the same time.
I need something like this:
1. "http://www.dailymail.co.uk/page/text.html",
2. "https://sedo.com/another/page"
3. "http://www.barnesandnoble.com/b/bargain-books/_/N-8qb"
4. "http://www.starbucks.com/homepage/"
etc.
Using Aggregations
If you want to use aggregations, I'd suggest using the terms aggregations to remove the duplicates from your result set and as sub aggregation, I'd use the top_hits aggregation, which gives you the best hit from the aggregated documents of each domain (per default the score for each document within a domain should be the same.)
Consequently the query will look like that:
POST sites/page/_search
{
"size": 0,
"aggs": {
"filtered_domains": {
"filter": {
"bool": {
"should": [
{
"bool": {
"must_not": {
"exists": {
"field": "crawl_date"
}
}
}
},
{
"range": {
"crawl_date": {
"lte": "2016-01-01"
}
}
}
]
}
},
"aggs": {
"domains": {
"terms": {
"field": "domain",
"size": 10
},
"aggs": {
"pages": {
"top_hits": {
"size": 1
}
}
}
}
}
}
}
}
Giving you results like that
"aggregations": {
"filtered_domains": {
"doc_count": 3,
"domains": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "barnesandnoble.com",
"doc_count": 2,
"pages": {
"hits": {
"total": 2,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "page",
"_id": "barnesandnoble.com/test2.html",
"_score": 1,
"_source": {
"crawl_date": "1982-05-16",
"domain": "barnesandnoble.com"
}
}
]
}
}
},
{
"key": "starbucks.com",
"doc_count": 1,
"pages": {
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "page",
"_id": "starbucks.com/index.html",
"_score": 1,
"_source": {
"crawl_date": "1982-05-16",
"domain": "starbucks.com"
}
}
]
}
}
}
]
}
}
Using Parent/Child Aggregations
If you can change your index structure, I'd suggest to create an index with either parent/child relationship or nested documents.
If you do so, you can select 10 distinct domains and retrieve one (or more) specific pages of this url.
Let me show you an example with parent/child (if you use sense, you should be able to just copy paste):
First generate the mappings for the documents:
PUT /sites
{
"mappings": {
"domain": {},
"page": {
"_parent": {
"type": "domain"
},
"properties": {
"crawl_date": {
"type": "date"
}
}
}
}
}
Insert some documents
PUT sites/domain/barnesandnoble.com
{}
PUT sites/domain/starbucks.com
{}
PUT sites/domain/dailymail.co.uk
{}
POST /sites/page/_bulk
{ "index": { "_id": "barnesandnoble.com/test.html", "parent": "barnesandnoble.com" }}
{ "crawl_date": "1982-05-16" }
{ "index": { "_id": "barnesandnoble.com/test2.html", "parent": "barnesandnoble.com" }}
{ "crawl_date": "1982-05-16" }
{ "index": { "_id": "starbucks.com/index.html", "parent": "starbucks.com" }}
{ "crawl_date": "1982-05-16" }
{ "index": { "_id": "dailymail.co.uk/index.html", "parent": "dailymail.co.uk" }}
{}
Search for the urls to crawl
POST /sites/domain/_search
{
"query": {
"has_child": {
"type": "page",
"query": {
"bool": {
"filter": {
"bool": {
"should": [
{
"bool": {
"must_not": {
"exists": {
"field": "crawl_date"
}
}
}
},
{
"range": {
"crawl_date": {
"lte": "2016-01-01"
}
}
}]
}
}
}
},
"inner_hits": {
"size": 1
}
}
}
}
We do a has_child query on the parent type and therefor receive only distinct urls of the parent type. To get the specific pages, we have to add an inner_hits query, which gives us the child documents leading to the hits in the parent type.
If you set inner_hits size to 1, you get only one page per domain.
You can even add a sorting in the inner_hits query... For example, you can sort by the crawl_date. ;)
The above search gives you the following result:
"hits": [
{
"_index": "sites",
"_type": "domain",
"_id": "starbucks.com",
"_score": 1,
"_source": {},
"inner_hits": {
"page": {
"hits": {
"total": 1,
"max_score": 1.9664046,
"hits": [
{
"_index": "sites",
"_type": "page",
"_id": "starbucks.com/index.html",
"_score": 1.9664046,
"_routing": "starbucks.com",
"_parent": "starbucks.com",
"_source": {
"crawl_date": "1982-05-16"
}
}
]
}
}
}
},
{
"_index": "sites",
"_type": "domain",
"_id": "dailymail.co.uk",
"_score": 1,
"_source": {},
"inner_hits": {
"page": {
"hits": {
"total": 1,
"max_score": 1.9664046,
"hits": [
{
"_index": "sites",
"_type": "page",
"_id": "dailymail.co.uk/index.html",
"_score": 1.9664046,
"_routing": "dailymail.co.uk",
"_parent": "dailymail.co.uk",
"_source": {}
}
]
}
}
}
},
{
"_index": "sites",
"_type": "domain",
"_id": "barnesandnoble.com",
"_score": 1,
"_source": {},
"inner_hits": {
"page": {
"hits": {
"total": 2,
"max_score": 1.4142135,
"hits": [
{
"_index": "sites",
"_type": "page",
"_id": "barnesandnoble.com/test.html",
"_score": 1.4142135,
"_routing": "barnesandnoble.com",
"_parent": "barnesandnoble.com",
"_source": {
"crawl_date": "1982-05-16"
}
}
]
}
}
}
}
]
Finally, let me note one thing. Parent/child relationship comes with small costs at query time. If this isn't a problem for your use case, I'd go for this solution.
I suggest you use the exists filter instead of trying to match an empty term (the missing filter is deprecated in 2.2). Then, the range filter will help you filter out the documents you don't need.
Finally, since you have used the absolute URL as id, make sure to aggregate on the _uid field and not the domain field, that way you'll get unique counts per exact page.
curl -XPOST 'http://localhost:9200/tester/test/_search' -d '{
"size": 10,
"aggs": {
"group_by_domain": {
"filter": {
"bool": {
"should": [
{
"bool": {
"must_not": {
"exists": {
"field": "crawl_date"
}
}
}
},
{
"range": {
"crawl_date": {
"lte": "2014-12-31T00:00:00.000"
}
}
}
]
}
},
"aggs": {
"domains": {
"terms": {
"field": "_uid"
}
}
}
}
}
}'
You have to use Filter Aggregation and then sub-aggregation
{
"size": 10,
"aggs": {
"filter_date": {
"filter": {
"bool": {
"should": [
{
"bool": {
"must_not": [
{
"exists": {
"field": "crawl_date"
}
}
]
}
},
{
"range": {
"crawl_date": {
"from": "now-100d"
}
}
}
]
}
},
"aggs": {
"group_by_domain": {
"terms": {
"field": "domain"
}
}
}
}
}
}

elasticsearch retrieving nested objects - not individual fields

When I use the "fields" option of a query I get a separate array for each field. Is it possible to get back the "complete" nested objects rather than just the field?
In the following example if I try to do "fields": ["cast"] it tells me that cast is not a leaf node. And if I do "fields": ["cast.firstName", "cast.middleName", "cast.lastName"] it returns 3 arrays.
Is there another way of retrieving just a partial amount of the document? Or is there a way to "reassemble" the separate fields into a complete "cast" object?
Example Index and Data:
POST /movies
{
"mappings": {
"movie": {
"properties": {
"cast": {
"type": "nested"
}
}
}
}
}
POST /movies/movie
{
"title": "The Matrix",
"cast": [
{
"firstName": "Keanu",
"lastName": "Reeves",
"address": {
"street": "somewhere",
"city": "LA"
}
},
{
"firstName": "Laurence",
"middleName": "John",
"lastName": "Fishburne",
"address": {
"street": "somewhere else",
"city": "NYC"
}
}
]
}
Example Query:
GET /movies/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "cast",
"filter": {
"bool": {
"must": [
{ "term": { "firstName": "laurence"} },
{ "term": { "lastName": "fishburne"} }
]
}
}
}
}
}
},
"fields": [
"cast.address.city",
"cast.firstName",
"cast.middleName",
"cast.lastName"
]
}
Result of example query:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "movies",
"_type": "movie",
"_id": "AU1JeyBseLgwMCOuOLsZ",
"_score": 1,
"fields": {
"cast.firstName": [
"Keanu",
"Laurence"
],
"cast.lastName": [
"Reeves",
"Fishburne"
],
"cast.address.city": [
"LA",
"NYC"
],
"cast.middleName": [
"John"
]
}
}
]
}
}
I think this is what you're looking for:
POST /movies/_search
{
"_source": {
"include": [
"cast.address.city",
"cast.firstName",
"cast.middleName",
"cast.lastName"
]
},
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "cast",
"filter": {
"bool": {
"must": [
{
"term": {
"firstName": "laurence"
}
},
{
"term": {
"lastName": "fishburne"
}
}
]
}
}
}
}
}
}
}
Result:
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "movies",
"_type": "movie",
"_id": "AU1PIJgBA_0Cyshym7-m",
"_score": 1,
"_source": {
"cast": [
{
"lastName": "Reeves",
"address": {
"city": "LA"
},
"firstName": "Keanu"
},
{
"middleName": "John",
"lastName": "Fishburne",
"address": {
"city": "NYC"
},
"firstName": "Laurence"
}
]
}
}
]
}
}
You can also choose to exclude fields instead of including or both, see documentation here: http://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-source-filtering.html

Resources