elasticsearch terms on bool field not working - elasticsearch

I have this query that returns always null :
{
"query": {
"bool": {
"should": {
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"old": false
}
}
]
}
},
"path": "jobOffers"
}
}
}
}
}
Here's what match all returns :
{
"hits": [{
"_index": "dev",
"_type": "recruitment",
"_id": "202837r",
"_score": 1,
"_routing": "202837",
"_parent": "202837",
"_source": {
"score": 1,
"jobOffers": [{
"jobId": "jksncdjkqsnhcjkqs",
"jobCompany": "company 1",
"jobTitle": "Comptable",
"old": false
}],
"totalCount": 1
}
},
{
"_index": "dev",
"_type": "recruitment",
"_id": "202838r",
"_score": 1,
"_routing": "202838",
"_parent": "202838",
"_source": {
"score": 1,
"jobOffers": [{
"jobId": "wxjkckjwxhcmlazdkklqjkcn",
"jobCompany": "company 2",
"jobTitle": "Commercial",
"old": false
},
{
"jobId": "lxjkckazdwxctrzadjkoo",
"jobCompany": "company 2",
"jobTitle": "Chargé de développement commercial",
"old": false
}
],
"totalCount": 2
},
...
}
I made sure I'am querying the right index and the right type. Is this behavior normal? How can I make it return the expected result?

In your query, you need to write jobOffers.old instead of just old
{
"query": {
"bool": {
"should": {
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"jobOffers.old": false <--- modify this
}
}
]
}
},
"path": "jobOffers"
}
}
}
}
}

Related

ElasticSearch: Fetch records from nested Array that "only" include given element/s and filter-out the rest with mixed values

I am stuck on one of my tasks.
Overview:
There are some records on elastic search. Which includes information about the candidates and their employment.
There is a field that stores information about the statuses in which the candidate got submitted.
{
"submittedJobs": [
{
"status": "PendingPM", "jobId": "ABC", ...
},
{
"status": "PendingClient", "jobId": "XYZ", ...
},
{
"status": "PendingPM", "jobId": "WXY", ...
},
...
]
}
I want to write an es query to fetch all the records in which submitted jobs array "only" have "pendingPM" statuses and no other statuses.
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "submittedJobs",
"query": {
"bool": {
"must": [
{
"term": {
"submittedJobs.status.keyword": "PendingPM"
}
}
]
}
}
}
}
]
}
}
I tried this query, and it returns the records which include "pendingPM" along with other statuses - might use contains() logic.
here is the mapping
"submittedJobs": {
"type": "nested",
"properties": {
"statusId": {
"type": "long"
},
"status": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
}
}
},
"jobId": {
"type": "keyword"
}
}
}
For example. let's suppose there are two documents
document #1:
{
"submittedJobs": [
{
"status": "PendingPM", "jobId": "ABC", ...
},
{
"status": "PendingClient", "jobId": "XYZ", ...
},
{
"status": "PendingPM", "jobId": "WXY", ...
},
...
]
},
document #2:
{
"submittedJobs": [
{
"status": "PendingPM", "jobId": "ABC", ...
},
{
"status": "PendingPM", "jobId": "WXY", ...
},
...
]
}
Only document #2 should be returned, as the entire array contains only "PendingPM" and no other statuses.
Document #1 will be filtered-out since it includes mixed statuses.
Any help will be appreciated.
Try this:
Will be return only document with all item of array with status PendingPM.
{
"query": {
"bool": {
"must_not": [
{
"nested": {
"path": "submittedJobs",
"query": {
"bool": {
"must_not": [
{
"match": {
"submittedJobs.status": {
"query": "PendingPM"
}
}
},
{
"match": {
"submittedJobs.status": {
"query": "PendingClient"
}
}
}
]
}
}
}
}
]
}
}
}
You can use inner_hits along with nested query to get only the matched results from the document
Adding a working example
Index Mapping:
{
"mappings": {
"properties": {
"submittedJobs": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "submittedJobs",
"query": {
"bool": {
"must": [
{
"term": {
"submittedJobs.status.keyword": "PendingPM"
}
}
]
}
},
"inner_hits": {}
}
}
]
}
}
}
Search Result would be:
"hits": [
{
"_index": "73062439",
"_id": "1",
"_score": 0.0,
"_source": {
"submittedJobs": [
{
"status": "PendingPM",
"jobId": "ABC"
},
{
"status": "PendingClient",
"jobId": "XYZ"
},
{
"status": "PendingPM",
"jobId": "WXY"
}
]
},
"inner_hits": { // note this
"submittedJobs": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 0.4700036,
"hits": [
{
"_index": "73062439",
"_id": "1",
"_nested": {
"field": "submittedJobs",
"offset": 0
},
"_score": 0.4700036,
"_source": {
"jobId": "ABC",
"status": "PendingPM"
}
},
{
"_index": "73062439",
"_id": "1",
"_nested": {
"field": "submittedJobs",
"offset": 2
},
"_score": 0.4700036,
"_source": {
"jobId": "WXY",
"status": "PendingPM"
}
}
]
}
}
}
}
]

Filter elastic data on array count

How can we fetch candidates which have at least one phone number from the below index data along with other conditions like must and should?
Using elastic version 6.*
{
"_index": "test",
"_type": "docs",
"_id": "1271",
"_score": 1.518617,
"_source": {
"record": {
"createdDate": "2020-10-16T10:49:51.53",
"phoneNumbers": [
{
"type": "Cell",
"id": 0,
"countryCode": "+1",
"phoneNumber": "7845200448",
"extension": "",
"typeId": 700
}
]
},
"entityType": "Candidate",
"dbId": "1271",
"id": "1271"
}
}
You can use terms query that returns documents that contain one
or more exact terms in a provided field.
Search Query:
{
"query": {
"bool": {
"must": [
{
"terms": {
"record.phoneNumbers.phoneNumber.keyword": [
"7845200448"
]
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "stof_64388591",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"record": {
"createdDate": "2020-10-16T10:49:51.53",
"phoneNumbers": [
{
"type": "Cell",
"id": 0,
"countryCode": "+1",
"phoneNumber": "7845200448",
"extension": "",
"typeId": 700
}
]
},
"entityType": "Candidate",
"dbId": "1271",
"id": "1271"
}
}
]
Update 1: For version 7.*
You need to use a script query, to filter documents based on the provided script.
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['record.phoneNumbers.phoneNumber.keyword'].length > 0",
"lang": "painless"
}
}
}
}
}
}
For version 6.*
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['record.phoneNumbers.phoneNumber.keyword'].values.length > 0",
"lang": "painless"
}
}
}
}
}
}
You can use exists query for this purpose like below which is a lightweight query in comparison with scripts:
{
"query": {
"exists": {
"field": "record.phoneNumbers.phoneNumber"
}
}
}

elasticsearch complex query on nested object

i have a list of books, each book has nested tag:
"hits": [
{
"_index": "",
"_type": "",
"_id": "",
"_score": ,
"_source": {
"name": "book1",
"tags": [
{
"t": "tagA",
"w": 100
},
{
"t": "tagB",
"w": 0
},
],
"active": true,
}
},
{
"_index": "",
"_type": "",
"_id": "",
"_score": ,
"_source": {
"name": "book2",
"tags": [
{
"t": "tagA",
"w": 100
},
{
"t": "tagB",
"w": 0
},
],
"active": true,
}
},
{
"_index": "",
"_type": "",
"_id": "",
"_score": ,
"_source": {
"name": "book3",
"tags": [
{
"t": "tagC",
"w": 100
},
{
"t": "tagB",
"w": 0
},
],
"active": false,
}
}]
first, i tried to get all 'active' books with a specific tag, this can get by this query:
GET /index/type/_search
{
"query": {
"bool": {
"must_not": {"term" : { "active" : false}},
"must":
[
{
"nested": {
"path": "tags",
"query": {
"bool": {
"must": [
{
"match": {
"tags.t": "tagB"
}
}
]
}
}
}
}
]
}
}
}
for the above, book1 and book2 returned.
but what i am trying to get now is become more complicated.
i am trying to get 'active' books with a specific tag (tagB). but if 'tagC' is in book, then book can return also if it is not active.
so for this question, book1, book2, book3 will return.
how can i do this query in elasticsearch?
Try this, a should clause for both conditions
{
"query": {
"bool": {
"should": [
{
"nested": {
"path": "tags",
"query": {
"bool": {
"must": [
{
"match": {
"tags.t": "tagC"
}
}
]
}
}
}
},
{
"bool": {
"must": [
{
"term": {
"active": true
}
},
{
"nested": {
"path": "tags",
"query": {
"bool": {
"must": [
{
"match": {
"tags.t": "tagB"
}
}
]
}
}
}
}
]
}
}
]
}
}
}

How to narrow down the current aggregation context to a specific scope within set of documents returned from Filter Aggregation?

I have a nested object mapping, the sample data:
{
"_index": "simpleindex",
"_type": "games",
"_id": "AU_eC-Uzt6KxlUliF68N",
"_score": 1,
"_source": {
"continents": [
{
"name": "Asia",
"countries": [
{
"name": "India",
"states": [
{
"name": "TN",
"game": "soccor",
"wins": 1
}
]
},
{
"name": "India",
"states": [
{
"name": "KA",
"game": "soccor",
"wins": 1
}
]
}
]
}
]
}
},
{
"_index": "simpleindex",
"_type": "games",
"_id": "AU_eCf5dt6KxlUliF637",
"_score": 1,
"_source": {
"continents": [
{
"name": "Asia",
"countries": [
{
"name": "India",
"states": [
{
"name": "TN",
"game": "soccor",
"wins": 1
}
]
}
]
}
]
}
},
{
"_index": "simpleindex",
"_type": "games",
"_id": "AU_eDIdXt6KxlUliF69i",
"_score": 1,
"_source": {
"continents": [
{
"name": "Asia",
"countries": [
{
"name": "India",
"states": [
{
"name": "TN",
"game": "soccor",
"wins": 1
}
]
},
{
"name": "India",
"states": [
{
"name": "KA",
"game": "soccor",
"wins": 1
}
]
},
{
"name": "Pak",
"states": [
{
"name": "NA",
"game": "soccor",
"wins": 1
}
]
}
]
}
]
}
}
Here is my Filtered Aggregation that returns documents that matches the filter criteria (i.e. continent should be 'Asia' AND country should be 'India'):
{
"aggs": {
"DocumentSet": {
"filter": {
"and": {
"filters": [
{
"nested": {
"path": "continents",
"query": {
"match": {
"continents.name": "asia"
}
}
}
},
{
"nested": {
"path": "continents.countries",
"query": {
"match": {
"continents.countries.name": "india"
}
}
}
}
]
}
},
"aggs": {
"continents": {
"nested": {
"path": "continents"
},
"aggs": {
"countries": {
"nested": {
"path": "continents.countries"
},
"aggs": {
"states": {
"nested": {
"path": "continents.countries.states"
},
"aggs": {
"count": {
"value_count": {
"field": "continents.countries.states.wins"
}
}
}
}
}
}
}
}
}
}}}
And here is the result (copy pasted only the aggregation here):
"aggregations": {
"DocumentSet": {
"doc_count": 3,
"continents": {
"doc_count": 3,
"countries": {
"doc_count": 6,
"states": {
"doc_count": 6,
"count": {
"value": 6
}
}
}
}
}
}
My intention is to get "wins" only from continents.name=asia AND countries.name=india. The filter works as expected but I need to narrow down the aggregation scope only to countries.name=india; essentially another level of scope on the docs returned by Filter aggregation so that leaf aggregation count is 5 instead of 6.
Try this aggregation:
{
"aggs": {
"continents": {
"nested": {
"path": "continents"
},
"aggs": {
"asia_continent": {
"filter": {
"query": {
"match": {
"continents.name": "asia"
}
}
},
"aggs": {
"countries": {
"nested": {
"path": "continents.countries"
},
"aggs": {
"india_country": {
"filter": {
"query": {
"match": {
"continents.countries.name": "india"
}
}
},
"aggs": {
"states": {
"nested": {
"path": "continents.countries.states"
},
"aggs": {
"count": {
"value_count": {
"field": "continents.countries.states.wins"
}
}
}
}
}
}
}
}
}
}
}
}
}
}

Filter an array of dictionaries that all must contain all of specified values

Say I had this document:
{
"_index": "food",
"_type": "recipes",
"_id": "AU2LjsMLOuShTUj_LBrT",
"_score": 1,
"_source": {
"name": "granola bars",
"ingredients": [
{
"name": "butter",
"quantity": 4
},
{
"name": "granola",
"quantity": 6
}
]
}
}
Using the following filter matches this document fine:
POST /food/recipes/_search
{
"query": {
"filtered": {
"query": {
"match_all": { }
},
"filter": {
"nested": {
"path": "ingredients",
"filter": {
"bool": {
"must": [
{
"terms": {
"ingredients.name": [
"butter",
"granola"
]
}
}
]
}
}
}
}
}
}
}
However it will also match documents that have additional ingredients.
How can I query so that it will only match documents that only have the ingredients butter and granola?
You need a "double negative", so to speak. You want to match parent documents that have nested docs that match your query, and no nested documents that don't match your query.
To test I set up the following index:
PUT /test_index
{
"settings": {
"number_of_shards": 1
},
"mappings": {
"doc": {
"properties": {
"ingredients": {
"type": "nested",
"properties": {
"name": {
"type": "string"
},
"quantity": {
"type": "long"
}
}
},
"name": {
"type": "string"
}
}
}
}
}
And added these two documents:
PUT /test_index/doc/1
{
"name": "granola bars",
"ingredients": [
{
"name": "butter",
"quantity": 4
},
{
"name": "granola",
"quantity": 6
}
]
}
PUT /test_index/doc/2
{
"name": "granola cookies",
"ingredients": [
{
"name": "butter",
"quantity": 5
},
{
"name": "granola",
"quantity": 7
},
{
"name": "milk",
"quantity": 2
},
{
"name": "sugar",
"quantity": 7
}
]
}
Your query returns both the documents. For the purposes of this question, to make it easier to understand, I first simplified your query a little:
POST /test_index/doc/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"nested": {
"path": "ingredients",
"filter": {
"terms": {
"ingredients.name": [
"butter",
"granola"
]
}
}
}
}
}
}
}
Then I added an outer "bool" with two "nested" filters. One is the filter you originally had inside a "must", and the second is the opposite of the filter you had (so it will match nested documents that do NOT contain those terms), inside a "must_not":
POST /test_index/doc/_search
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"bool": {
"must": [
{
"nested": {
"path": "ingredients",
"filter": {
"terms": {
"ingredients.name": [
"butter",
"granola"
]
}
}
}
}
],
"must_not": [
{
"nested": {
"path": "ingredients",
"filter": {
"not": {
"filter": {
"terms": {
"ingredients.name": [
"butter",
"granola"
]
}
}
}
}
}
}
]
}
}
}
}
}
This returns only the one doc:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "test_index",
"_type": "doc",
"_id": "1",
"_score": 1,
"_source": {
"name": "granola bars",
"ingredients": [
{
"name": "butter",
"quantity": 4
},
{
"name": "granola",
"quantity": 6
}
]
}
}
]
}
}
Here is all the code I used for testing it:
http://sense.qbox.io/gist/e5fd0c35070fb329d40ad342b3198695e6f52d3a

Resources