I'm using laravel + elasticsearch.
I have an array like this:
[
{
"title": "product_title",
"stocks": [
{
"country": "EN",
"stock": 0
},
{
"country": "IN",
"stock": 1
}
]
},
{
"title": "product_title_2",
"stocks": [
{
"country": "EN",
"stock": 1
},
{
"country": "IN",
"stock": 0
}
]
}
]
Now I want to find all objects has country equal EN and stock is greater than 1.
updated
my query:
{
"index": "products",
"body": {
"size": 15,
"from": 1,
"sort": [
{
"stock": {
"order": "desc"
}
}
],
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "**",
"type": "best_fields",
"fields": [
"erp_id",
"title_en^2",
"translations.title^2",
"erp.title_en",
"erp.title",
"erp.options.title",
"erp.options.title_en"
],
"analyze_wildcard": true,
"allow_leading_wildcard": true
}
}
],
"filter": [
{
"term": {
"is_active": 1
}
},
{
"term": {
"shops.shop_id": 1
}
}
]
}
},
"aggs": {
"max_price": {
"filter": {
"term": {
"erp.price_lists.currency.abbr": "tmn"
}
},
"aggs": {
"result": {
"max": {
"field": "erp.price_lists.pivot.price_tt"
}
}
}
},
"min_price": {
"filter": {
"term": {
"erp.price_lists.currency.abbr": "tmn"
}
},
"aggs": {
"result": {
"min": {
"field": "erp.price_lists.pivot.price_tt"
}
}
}
}
}
}
}
You can use nested query along with inner_hits to get the object satisfying the requirements
Adding a working example
Index Mapping:
{
"mappings": {
"properties": {
"stocks": {
"type": "nested"
}
}
}
}
Index Data:
{
"title": "product_title_2",
"stocks": [
{
"country": "EN",
"stock": 1
},
{
"country": "IN",
"stock": 0
}
]
}
{
"title": "product_title",
"stocks": [
{
"country": "EN",
"stock": 0
},
{
"country": "IN",
"stock": 1
}
]
}
{
"title": "product_title_3",
"stocks": [
{
"country": "EN",
"stock": 2
},
{
"country": "IN",
"stock": 0
}
]
}
Search Query:
{
"query": {
"nested": {
"path": "stocks",
"query": {
"bool": {
"filter": [
{
"match": {
"stocks.country": "EN"
}
},
{
"range": {
"stocks.stock": {
"gt": 1
}
}
}
]
}
},
"inner_hits":{}
}
}
}
Search Result:
"hits": [
{
"_index": "67294405",
"_type": "_doc",
"_id": "3",
"_score": 0.0,
"_source": {
"title": "product_title_3",
"stocks": [
{
"country": "EN",
"stock": 2
},
{
"country": "IN",
"stock": 0
}
]
},
"inner_hits": {
"stocks": {
"hits": {
"total": {
"value": 1,
"relation": "eq"
},
"max_score": 0.0,
"hits": [
{
"_index": "67294405",
"_type": "_doc",
"_id": "3",
"_nested": {
"field": "stocks",
"offset": 0
},
"_score": 0.0,
"_source": {
"country": "EN",
"stock": 2
}
}
]
}
}
}
}
]
Related
I am stuck on one of my tasks.
Overview:
There are some records on elastic search. Which includes information about the candidates and their employment.
There is a field that stores information about the statuses in which the candidate got submitted.
{
"submittedJobs": [
{
"status": "PendingPM", "jobId": "ABC", ...
},
{
"status": "PendingClient", "jobId": "XYZ", ...
},
{
"status": "PendingPM", "jobId": "WXY", ...
},
...
]
}
I want to write an es query to fetch all the records in which submitted jobs array "only" have "pendingPM" statuses and no other statuses.
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "submittedJobs",
"query": {
"bool": {
"must": [
{
"term": {
"submittedJobs.status.keyword": "PendingPM"
}
}
]
}
}
}
}
]
}
}
I tried this query, and it returns the records which include "pendingPM" along with other statuses - might use contains() logic.
here is the mapping
"submittedJobs": {
"type": "nested",
"properties": {
"statusId": {
"type": "long"
},
"status": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
}
}
},
"jobId": {
"type": "keyword"
}
}
}
For example. let's suppose there are two documents
document #1:
{
"submittedJobs": [
{
"status": "PendingPM", "jobId": "ABC", ...
},
{
"status": "PendingClient", "jobId": "XYZ", ...
},
{
"status": "PendingPM", "jobId": "WXY", ...
},
...
]
},
document #2:
{
"submittedJobs": [
{
"status": "PendingPM", "jobId": "ABC", ...
},
{
"status": "PendingPM", "jobId": "WXY", ...
},
...
]
}
Only document #2 should be returned, as the entire array contains only "PendingPM" and no other statuses.
Document #1 will be filtered-out since it includes mixed statuses.
Any help will be appreciated.
Try this:
Will be return only document with all item of array with status PendingPM.
{
"query": {
"bool": {
"must_not": [
{
"nested": {
"path": "submittedJobs",
"query": {
"bool": {
"must_not": [
{
"match": {
"submittedJobs.status": {
"query": "PendingPM"
}
}
},
{
"match": {
"submittedJobs.status": {
"query": "PendingClient"
}
}
}
]
}
}
}
}
]
}
}
}
You can use inner_hits along with nested query to get only the matched results from the document
Adding a working example
Index Mapping:
{
"mappings": {
"properties": {
"submittedJobs": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "submittedJobs",
"query": {
"bool": {
"must": [
{
"term": {
"submittedJobs.status.keyword": "PendingPM"
}
}
]
}
},
"inner_hits": {}
}
}
]
}
}
}
Search Result would be:
"hits": [
{
"_index": "73062439",
"_id": "1",
"_score": 0.0,
"_source": {
"submittedJobs": [
{
"status": "PendingPM",
"jobId": "ABC"
},
{
"status": "PendingClient",
"jobId": "XYZ"
},
{
"status": "PendingPM",
"jobId": "WXY"
}
]
},
"inner_hits": { // note this
"submittedJobs": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 0.4700036,
"hits": [
{
"_index": "73062439",
"_id": "1",
"_nested": {
"field": "submittedJobs",
"offset": 0
},
"_score": 0.4700036,
"_source": {
"jobId": "ABC",
"status": "PendingPM"
}
},
{
"_index": "73062439",
"_id": "1",
"_nested": {
"field": "submittedJobs",
"offset": 2
},
"_score": 0.4700036,
"_source": {
"jobId": "WXY",
"status": "PendingPM"
}
}
]
}
}
}
}
]
is My Mapping.
"script": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"age": {
"type": "integer"
}
}
}
and sample document below
PUT /btest/_create/1
{
"script": [
{
"name": "john",
"age": 14
}
]
}
PUT /btest/_create/2
{
"script": [
{
"name": "tt",
"age": 14
},
{
"name": "jj",
"age": 17
},
{
"name": "tim",
"age": 34
}
]
}
PUT /btest/_create/3
{
"script": [
{
"name": "john",
"age": 42
},
{
"name": "jj",
"age": 12
}
]
}
and use max aggregation for get max ages :
GET /btest/_search
{
"query": {
"nested": {
"path": "script",
"query": {
"match": {
"script.name": "john"
}
}
}
},
"aggs": {
"age": {
"nested": {
"path": "script"
},
"aggs": {
"script_age": {
"filter": {
"match": {
"script.name": "john"
}
},
"aggs": {
"length": {
"max": {
"field": "script.age"
}
}
}
}
}
}
}
}
but it returns all matched "script.name": "john".
i want to get document only max age john.
should I use aggregation to get this document?
or is there a way to use a query similar to max without aggregation for nested field?
According to your requirement, you need to fetch only those documents that match with name john. This can be achieved in the query section using a nested query with match query.
Now, to get the document having max-age (with name john) you can perform top hits aggregation with sort on script.age field.
{
"size": 0,
"query": {
"nested": {
"path": "script",
"query": {
"match": {
"script.name": "john"
}
}
}
},
"aggs": {
"nested-agg": {
"nested": {
"path": "script"
},
"aggs": {
"by_age": {
"top_hits": {
"sort": [
{
"script.age": {
"order": "desc"
}
}
],
"size": 1
}
}
}
}
}
}
The search response will be
"aggregations": {
"nested-agg": {
"doc_count": 3,
"by_age": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "71081556",
"_type": "_doc",
"_id": "3",
"_nested": {
"field": "script",
"offset": 0
},
"_score": null,
"_source": {
"name": "john",
"age": 42
},
"sort": [
42
]
}
]
}
}
}
}
Option 2
You can use sort with the nested query, to get the document having max age
{
"size": 1,
"sort": [
{
"script.age": {
"order": "desc",
"nested": {
"path": "script",
"filter": {
"term": {
"script.name": "john"
}
}
}
}
}
]
}
But in this case, the response contains the entire document, instead of only the matching document
"hits": [
{
"_index": "71081556",
"_type": "_doc",
"_id": "3",
"_score": null,
"_source": {
"script": [
{
"name": "john",
"age": 42
},
{
"name": "jj",
"age": 12
}
]
},
"sort": [
42
]
}
]
i am basically trying to write a query where it should return the document where
school is "holy international" AND grade is "second".
but the issue with the current query is that its not considering the must match query part. ie even though i don't i specify the school is the giving me this document where as it is not a match.
query is giving me all the documents where the grade is second.
i want only document where school is "holy international" AND grade is "second".
as well as i have not specified in the match query for "schools.school" but its giving me results.
mapping
{
"settings": {
"analysis": {
"analyzer": {
"my_keyword_lowercase1": {
"tokenizer": "keyword",
"filter": ["lowercase", "my_pattern_replace1", "trim"]
},
"my_keyword_lowercase2": {
"tokenizer": "standard",
"filter": ["lowercase", "trim"]
}
},
"filter": {
"my_pattern_replace1": {
"type": "pattern_replace",
"pattern": ".",
"replacement": ""
}
}
}
},
"mappings": {
"test_data": {
"properties": {
"schools": {
"type": "nested",
"properties": {
"school": {
"type": "string",
"analyzer": "my_keyword_lowercase1"
},
"grade": {
"type": "string",
"analyzer": "my_keyword_lowercase2"
}
}
}
}
}
}
}
data
{
"_index": "data_index",
"_type": "test_data",
"_id": "57a33ebc1d41",
"_version": 1,
"found": true,
"_source": {
"summary": null,
"schools": [{
"school": "little flower",
"grade": "first",
"date": "2007-06-01",
},
{
"school": "holy international",
"grade": "second",
"date": "2007-06-01",
},
],
"first_name": "Adam",
"location": "Kansas City",
"last_name": "Roger",
"country": "US",
"name": "Adam Roger",
}
}
query
{
"_source": ["first_name"],
"query": {
"nested": {
"path": "schools",
"inner_hits": {
"_source": {
"includes": [
"schools.school",
"schools.grade"
]
}
},
"query": {
"bool": {
"must": {
"match": {
"schools.school": {
"query": "" <-----X didnt specify anything
}
}
},
"filter": {
"match": {
"schools.grade": {
"query": "second",
"operator": "and",
"minimum_should_match": "100%"
}
}
}
}
}
}
}
}
result
{
"took": 26,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.2876821,
"hits": [
{
"_index": "data_test",
"_type": "test_data",
"_id": "57a33ebc1d41",
"_score": 0.2876821,
"_source": {
"first_name": "Adam"
},
"inner_hits": {
"schools": {
"hits": {
"total": 1,
"max_score": 0.2876821,
"hits": [
{
"_nested": {
"field": "schools",
"offset": 0
},
"_score": 0.2876821,
"_source": {
"schools": {
"school": "holy international",
"grade": "second"
}
}
}
]
}
}
}
}
]
}
}
So, basically your problem is analysis step, when I load everything and checked, it become very clear:
This filter completely wipes all string from schools.school field
"filter": {
"my_pattern_replace1": {
"type": "pattern_replace",
"pattern": ".",
"replacement": ""
}
}
I think, that's happening because . is regexp literal, so, when I checked it:
POST /_analyze
{
"field": "schools.school",
"text": "holy international"
}
{
"tokens": [
{
"token": "",
"start_offset": 0,
"end_offset": 18,
"type": "word",
"position": 0
}
]
}
That's why you always get a match, every string you passed during indexing time and during search time becomes "". Some additional info from Elastic wiki - https://www.elastic.co/guide/en/elasticsearch/reference/5.1/analysis-pattern_replace-tokenfilter.html
After I removed patter replace filter, this query returns everything as expected:
{
"_source": ["first_name"],
"query": {
"nested": {
"path": "schools",
"inner_hits": {
"_source": {
"includes": [
"schools.school",
"schools.grade"
]
}
},
"query": {
"bool": {
"must": {
"match": {
"schools.school": {
"query": "holy international"
}
}
},
"filter": {
"match": {
"schools.grade": {
"query": "second"
}
}
}
}
}
}
}
}
I have a nested object mapping, the sample data:
{
"_index": "simpleindex",
"_type": "games",
"_id": "AU_eC-Uzt6KxlUliF68N",
"_score": 1,
"_source": {
"continents": [
{
"name": "Asia",
"countries": [
{
"name": "India",
"states": [
{
"name": "TN",
"game": "soccor",
"wins": 1
}
]
},
{
"name": "India",
"states": [
{
"name": "KA",
"game": "soccor",
"wins": 1
}
]
}
]
}
]
}
},
{
"_index": "simpleindex",
"_type": "games",
"_id": "AU_eCf5dt6KxlUliF637",
"_score": 1,
"_source": {
"continents": [
{
"name": "Asia",
"countries": [
{
"name": "India",
"states": [
{
"name": "TN",
"game": "soccor",
"wins": 1
}
]
}
]
}
]
}
},
{
"_index": "simpleindex",
"_type": "games",
"_id": "AU_eDIdXt6KxlUliF69i",
"_score": 1,
"_source": {
"continents": [
{
"name": "Asia",
"countries": [
{
"name": "India",
"states": [
{
"name": "TN",
"game": "soccor",
"wins": 1
}
]
},
{
"name": "India",
"states": [
{
"name": "KA",
"game": "soccor",
"wins": 1
}
]
},
{
"name": "Pak",
"states": [
{
"name": "NA",
"game": "soccor",
"wins": 1
}
]
}
]
}
]
}
}
Here is my Filtered Aggregation that returns documents that matches the filter criteria (i.e. continent should be 'Asia' AND country should be 'India'):
{
"aggs": {
"DocumentSet": {
"filter": {
"and": {
"filters": [
{
"nested": {
"path": "continents",
"query": {
"match": {
"continents.name": "asia"
}
}
}
},
{
"nested": {
"path": "continents.countries",
"query": {
"match": {
"continents.countries.name": "india"
}
}
}
}
]
}
},
"aggs": {
"continents": {
"nested": {
"path": "continents"
},
"aggs": {
"countries": {
"nested": {
"path": "continents.countries"
},
"aggs": {
"states": {
"nested": {
"path": "continents.countries.states"
},
"aggs": {
"count": {
"value_count": {
"field": "continents.countries.states.wins"
}
}
}
}
}
}
}
}
}
}}}
And here is the result (copy pasted only the aggregation here):
"aggregations": {
"DocumentSet": {
"doc_count": 3,
"continents": {
"doc_count": 3,
"countries": {
"doc_count": 6,
"states": {
"doc_count": 6,
"count": {
"value": 6
}
}
}
}
}
}
My intention is to get "wins" only from continents.name=asia AND countries.name=india. The filter works as expected but I need to narrow down the aggregation scope only to countries.name=india; essentially another level of scope on the docs returned by Filter aggregation so that leaf aggregation count is 5 instead of 6.
Try this aggregation:
{
"aggs": {
"continents": {
"nested": {
"path": "continents"
},
"aggs": {
"asia_continent": {
"filter": {
"query": {
"match": {
"continents.name": "asia"
}
}
},
"aggs": {
"countries": {
"nested": {
"path": "continents.countries"
},
"aggs": {
"india_country": {
"filter": {
"query": {
"match": {
"continents.countries.name": "india"
}
}
},
"aggs": {
"states": {
"nested": {
"path": "continents.countries.states"
},
"aggs": {
"count": {
"value_count": {
"field": "continents.countries.states.wins"
}
}
}
}
}
}
}
}
}
}
}
}
}
}
Say I had this document:
{
"_index": "food",
"_type": "recipes",
"_id": "AU2LjsMLOuShTUj_LBrT",
"_score": 1,
"_source": {
"name": "granola bars",
"ingredients": [
{
"name": "butter",
"quantity": 4
},
{
"name": "granola",
"quantity": 6
}
]
}
}
Using the following filter matches this document fine:
POST /food/recipes/_search
{
"query": {
"filtered": {
"query": {
"match_all": { }
},
"filter": {
"nested": {
"path": "ingredients",
"filter": {
"bool": {
"must": [
{
"terms": {
"ingredients.name": [
"butter",
"granola"
]
}
}
]
}
}
}
}
}
}
}
However it will also match documents that have additional ingredients.
How can I query so that it will only match documents that only have the ingredients butter and granola?
You need a "double negative", so to speak. You want to match parent documents that have nested docs that match your query, and no nested documents that don't match your query.
To test I set up the following index:
PUT /test_index
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"doc": {
"properties": {
"ingredients": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"quantity": {
"type": "long"
}
}
},
"name": {
"type": "string"
}
}
}
}
}
And added these two documents:
PUT /test_index/doc/1
{
"name": "granola bars",
"ingredients": [
{
"name": "butter",
"quantity": 4
},
{
"name": "granola",
"quantity": 6
}
]
}
PUT /test_index/doc/2
{
"name": "granola cookies",
"ingredients": [
{
"name": "butter",
"quantity": 5
},
{
"name": "granola",
"quantity": 7
},
{
"name": "milk",
"quantity": 2
},
{
"name": "sugar",
"quantity": 7
}
]
}
Your query returns both the documents. For the purposes of this question, to make it easier to understand, I first simplified your query a little:
POST /test_index/doc/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "ingredients",
"filter": {
"terms": {
"ingredients.name": [
"butter",
"granola"
]
}
}
}
}
}
}
}
Then I added an outer "bool" with two "nested" filters. One is the filter you originally had inside a "must", and the second is the opposite of the filter you had (so it will match nested documents that do NOT contain those terms), inside a "must_not":
POST /test_index/doc/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "ingredients",
"filter": {
"terms": {
"ingredients.name": [
"butter",
"granola"
]
}
}
}
}
],
"must_not": [
{
"nested": {
"path": "ingredients",
"filter": {
"not": {
"filter": {
"terms": {
"ingredients.name": [
"butter",
"granola"
]
}
}
}
}
}
}
]
}
}
}
}
}
This returns only the one doc:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 1,
"_source": {
"name": "granola bars",
"ingredients": [
{
"name": "butter",
"quantity": 4
},
{
"name": "granola",
"quantity": 6
}
]
}
}
]
}
}
Here is all the code I used for testing it:
http://sense.qbox.io/gist/e5fd0c35070fb329d40ad342b3198695e6f52d3a