Filter elastic data on array count - elasticsearch

How can we fetch candidates which have at least one phone number from the below index data along with other conditions like must and should?
Using elastic version 6.*
{
"_index": "test",
"_type": "docs",
"_id": "1271",
"_score": 1.518617,
"_source": {
"record": {
"createdDate": "2020-10-16T10:49:51.53",
"phoneNumbers": [
{
"type": "Cell",
"id": 0,
"countryCode": "+1",
"phoneNumber": "7845200448",
"extension": "",
"typeId": 700
}
]
},
"entityType": "Candidate",
"dbId": "1271",
"id": "1271"
}
}

You can use terms query that returns documents that contain one
or more exact terms in a provided field.
Search Query:
{
"query": {
"bool": {
"must": [
{
"terms": {
"record.phoneNumbers.phoneNumber.keyword": [
"7845200448"
]
}
}
]
}
}
}
Search Result:
"hits": [
{
"_index": "stof_64388591",
"_type": "_doc",
"_id": "1",
"_score": 1.0,
"_source": {
"record": {
"createdDate": "2020-10-16T10:49:51.53",
"phoneNumbers": [
{
"type": "Cell",
"id": 0,
"countryCode": "+1",
"phoneNumber": "7845200448",
"extension": "",
"typeId": 700
}
]
},
"entityType": "Candidate",
"dbId": "1271",
"id": "1271"
}
}
]
Update 1: For version 7.*
You need to use a script query, to filter documents based on the provided script.
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['record.phoneNumbers.phoneNumber.keyword'].length > 0",
"lang": "painless"
}
}
}
}
}
}
For version 6.*
{
"query": {
"bool": {
"filter": {
"script": {
"script": {
"source": "doc['record.phoneNumbers.phoneNumber.keyword'].values.length > 0",
"lang": "painless"
}
}
}
}
}
}

You can use exists query for this purpose like below which is a lightweight query in comparison with scripts:
{
"query": {
"exists": {
"field": "record.phoneNumbers.phoneNumber"
}
}
}

Related

ElasticSearch: Fetch records from nested Array that "only" include given element/s and filter-out the rest with mixed values

I am stuck on one of my tasks.
Overview:
There are some records on elastic search. Which includes information about the candidates and their employment.
There is a field that stores information about the statuses in which the candidate got submitted.
{
"submittedJobs": [
{
"status": "PendingPM", "jobId": "ABC", ...
},
{
"status": "PendingClient", "jobId": "XYZ", ...
},
{
"status": "PendingPM", "jobId": "WXY", ...
},
...
]
}
I want to write an es query to fetch all the records in which submitted jobs array "only" have "pendingPM" statuses and no other statuses.
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "submittedJobs",
"query": {
"bool": {
"must": [
{
"term": {
"submittedJobs.status.keyword": "PendingPM"
}
}
]
}
}
}
}
]
}
}
I tried this query, and it returns the records which include "pendingPM" along with other statuses - might use contains() logic.
here is the mapping
"submittedJobs": {
"type": "nested",
"properties": {
"statusId": {
"type": "long"
},
"status": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256,
"normalizer": "lowercase_normalizer"
}
}
},
"jobId": {
"type": "keyword"
}
}
}
For example. let's suppose there are two documents
document #1:
{
"submittedJobs": [
{
"status": "PendingPM", "jobId": "ABC", ...
},
{
"status": "PendingClient", "jobId": "XYZ", ...
},
{
"status": "PendingPM", "jobId": "WXY", ...
},
...
]
},
document #2:
{
"submittedJobs": [
{
"status": "PendingPM", "jobId": "ABC", ...
},
{
"status": "PendingPM", "jobId": "WXY", ...
},
...
]
}
Only document #2 should be returned, as the entire array contains only "PendingPM" and no other statuses.
Document #1 will be filtered-out since it includes mixed statuses.
Any help will be appreciated.
Try this:
Will be return only document with all item of array with status PendingPM.
{
"query": {
"bool": {
"must_not": [
{
"nested": {
"path": "submittedJobs",
"query": {
"bool": {
"must_not": [
{
"match": {
"submittedJobs.status": {
"query": "PendingPM"
}
}
},
{
"match": {
"submittedJobs.status": {
"query": "PendingClient"
}
}
}
]
}
}
}
}
]
}
}
}
You can use inner_hits along with nested query to get only the matched results from the document
Adding a working example
Index Mapping:
{
"mappings": {
"properties": {
"submittedJobs": {
"type": "nested"
}
}
}
}
Search Query:
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "submittedJobs",
"query": {
"bool": {
"must": [
{
"term": {
"submittedJobs.status.keyword": "PendingPM"
}
}
]
}
},
"inner_hits": {}
}
}
]
}
}
}
Search Result would be:
"hits": [
{
"_index": "73062439",
"_id": "1",
"_score": 0.0,
"_source": {
"submittedJobs": [
{
"status": "PendingPM",
"jobId": "ABC"
},
{
"status": "PendingClient",
"jobId": "XYZ"
},
{
"status": "PendingPM",
"jobId": "WXY"
}
]
},
"inner_hits": { // note this
"submittedJobs": {
"hits": {
"total": {
"value": 2,
"relation": "eq"
},
"max_score": 0.4700036,
"hits": [
{
"_index": "73062439",
"_id": "1",
"_nested": {
"field": "submittedJobs",
"offset": 0
},
"_score": 0.4700036,
"_source": {
"jobId": "ABC",
"status": "PendingPM"
}
},
{
"_index": "73062439",
"_id": "1",
"_nested": {
"field": "submittedJobs",
"offset": 2
},
"_score": 0.4700036,
"_source": {
"jobId": "WXY",
"status": "PendingPM"
}
}
]
}
}
}
}
]

in elastic search, how can get document max value for nested field?

is My Mapping.
"script": {
"type": "nested",
"properties": {
"name": {
"type": "keyword"
},
"age": {
"type": "integer"
}
}
}
and sample document below
PUT /btest/_create/1
{
"script": [
{
"name": "john",
"age": 14
}
]
}
PUT /btest/_create/2
{
"script": [
{
"name": "tt",
"age": 14
},
{
"name": "jj",
"age": 17
},
{
"name": "tim",
"age": 34
}
]
}
PUT /btest/_create/3
{
"script": [
{
"name": "john",
"age": 42
},
{
"name": "jj",
"age": 12
}
]
}
and use max aggregation for get max ages :
GET /btest/_search
{
"query": {
"nested": {
"path": "script",
"query": {
"match": {
"script.name": "john"
}
}
}
},
"aggs": {
"age": {
"nested": {
"path": "script"
},
"aggs": {
"script_age": {
"filter": {
"match": {
"script.name": "john"
}
},
"aggs": {
"length": {
"max": {
"field": "script.age"
}
}
}
}
}
}
}
}
but it returns all matched "script.name": "john".
i want to get document only max age john.
should I use aggregation to get this document?
or is there a way to use a query similar to max without aggregation for nested field?
According to your requirement, you need to fetch only those documents that match with name john. This can be achieved in the query section using a nested query with match query.
Now, to get the document having max-age (with name john) you can perform top hits aggregation with sort on script.age field.
{
"size": 0,
"query": {
"nested": {
"path": "script",
"query": {
"match": {
"script.name": "john"
}
}
}
},
"aggs": {
"nested-agg": {
"nested": {
"path": "script"
},
"aggs": {
"by_age": {
"top_hits": {
"sort": [
{
"script.age": {
"order": "desc"
}
}
],
"size": 1
}
}
}
}
}
}
The search response will be
"aggregations": {
"nested-agg": {
"doc_count": 3,
"by_age": {
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": [
{
"_index": "71081556",
"_type": "_doc",
"_id": "3",
"_nested": {
"field": "script",
"offset": 0
},
"_score": null,
"_source": {
"name": "john",
"age": 42
},
"sort": [
42
]
}
]
}
}
}
}
Option 2
You can use sort with the nested query, to get the document having max age
{
"size": 1,
"sort": [
{
"script.age": {
"order": "desc",
"nested": {
"path": "script",
"filter": {
"term": {
"script.name": "john"
}
}
}
}
}
]
}
But in this case, the response contains the entire document, instead of only the matching document
"hits": [
{
"_index": "71081556",
"_type": "_doc",
"_id": "3",
"_score": null,
"_source": {
"script": [
{
"name": "john",
"age": 42
},
{
"name": "jj",
"age": 12
}
]
},
"sort": [
42
]
}
]

Select documents by array of objects when at least one object doesn't contain necessary field Elasticsearch

I have documents in the elasticsearch and can't understand how to apply search script that should return documents if any attachment doesn't contain uuid or uuid is null. Version of elastic 5.2.
Mapping of documents
"mappings": {
"documentType": {
"properties": {
"attachment": {
"properties": {
"uuid": {
"type": "text"
},
"path": {
"type": "text"
},
"size": {
"type": "long"
}
}
}}}
In the elasticsearch it looks like
{
"_index": "documents",
"_type": "documentType",
"_id": "1",
"_score": 1.0,
"_source": {
"attachment": [
{
"uuid": "21321321",
"path": "../uploads/somepath",
"size":1231
},
{
"path": "../uploads/somepath",
"size":1231
},
]},
{
"_index": "documents",
"_type": "documentType",
"_id": "2",
"_score": 1.0,
"_source": {
"attachment": [
{
"uuid": "223645641321321",
"path": "../uploads/somepath",
"size":1231
},
{
"uuid": "22341424321321",
"path": "../uploads/somepath",
"size":1231
},
]},
{
"_index": "documents",
"_type": "documentType",
"_id": "3",
"_score": 1.0,
"_source": {
"attachment": [
{
"uuid": "22789789341321321",
"path": "../uploads/somepath",
"size":1231
},
{
"path": "../uploads/somepath",
"size":1231
},
]}
As result I want to get attachments with _id 1 and 3. But as result I get error of the script
I tried to apply next script:
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "attachment"
}
},
{
"script": {
"script": {
"inline": "for (item in doc['attachment'].value) { if (item['uuid'] == null) { return true}}",
"lang": "painless"
}
}
}
]
}
}
}
Error is next:
"root_cause": [
{
"type": "script_exception",
"reason": "runtime error",
"script_stack": [
"org.elasticsearch.search.lookup.LeafDocLookup.get(LeafDocLookup.java:77)",
"org.elasticsearch.search.lookup.LeafDocLookup.get(LeafDocLookup.java:36)",
"for (item in doc['attachment'].value) { ",
" ^---- HERE"
],
"script": "for (item in doc['attachment'].value) { if (item['uuid'] == null) { return true}}",
"lang": "painless"
}
],
Is it possible to select documents in case even one attachment object doesn't contain uuid ?
Iterating arrays of objects is not as trivial as one would expect. I've written extensively about it here and here.
Since your attachments are not defined as nested, ES will internally represent them as flattened lists of values (also called "doc values"). For instance attachment.uuid in doc#2 will become ["223645641321321", "22341424321321"], and attachments.size will turn into [1231, 1231].
This means that you can simply compare the .length of these flattened representations! I assume attachment.size will always be present and can be thus taken as the comparison baseline.
One more thing. To take advantage of these optimized doc values for textual fields, it'll require one small mapping change:
PUT documents/documentType/_mappings
{
"properties": {
"attachment": {
"properties": {
"uuid": {
"type": "text",
"fielddata": true <---
},
"path": {
"type": "text"
},
"size": {
"type": "long"
}
}
}
}
}
When that's done and you've reindexed your docs — which can be done with this little Update by query trick:
POST documents/_update_by_query
You can then use the following script query:
POST documents/_search
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "attachment"
}
},
{
"script": {
"script": {
"inline": "def size_field_length = doc['attachment.size'].length; def uuid_field_length = doc['attachment.uuid'].length; return uuid_field_length < size_field_length",
"lang": "painless"
}
}
}
]
}
}
}
Just to supplement this answer. If mapping for uuid field was created automatically elastic search adds it in this way:
"uuid": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
then script could look like:
POST documents/_search
{
"query": {
"bool": {
"must": [
{
"exists": {
"field": "attachment"
}
},
{
"script": {
"script": {
"inline": "doc['attachment.size'].length > doc['attachment.uuid.keyword'].length",
"lang": "painless"
}
}
}
]
}
}
}

Elasticsearch range query with multiple condition

I have to fetch records from Elastic Search on the basis of date it is updated and created. I have these two fields updatedDate and createdDate and the condition should be:
To fetch records that has updatedDate within the range of past 3 years.
If updatedDate is null, fetch records that has createdDate within the range of past 3 years.
I have written the query in java for fetching the records on the basis of record createdDate:
.must(QueryBuilders.rangeQuery("createdDate").from(startDate,true).to(endDate,true));
startDate and endDate holds the date range.
I am new to Elastic Search, don't know how to implement the above condition.
Since you have not provided any index data, so adding a working example with sample index data, mapping, search query and search result that satisfies all the conditions required for your use case.
Index Mapping:
{
"mappings": {
"properties": {
"createdDate": {
"format": "yyyy-MM-dd'T'HH:mm:ss'Z'",
"type": "date"
},
"updatedDate": {
"format": "yyyy-MM-dd'T'HH:mm:ss'Z'",
"type": "date"
}
}
}
}
Index Data:
{
"createdDate": "2020-08-15T00:00:00Z"
}
{
"createdDate": "2019-08-15T00:00:00Z"
}
{
"createdDate": "2010-08-15T00:00:00Z"
}
{
"updatedDate": "2021-08-15T00:00:00Z",
"createdDate": "2002-08-15T00:00:00Z"
}
{
"updatedDate": "2018-08-15T00:00:00Z",
"createdDate": "2020-09-15T00:00:00Z"
}
{
"updatedDate": "2000-08-15T00:00:00Z",
"createdDate": "2020-09-15T00:00:00Z"
}
Search Query:
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{
"bool": {
"filter": {
"range": {
"createdDate": {
"gte": "now-3y",
"lte": "now"
}
}
},
"must_not": {
"exists": {
"field": "updatedDate"
}
}
}
}
]
}
},
{
"bool": {
"filter": {
"range": {
"updatedDate": {
"gte": "now-3y",
"lte": "now"
}
}
}
}
}
],
"minimum_should_match": 1
}
}
}
Search Result:
"hits": [
{
"_index": "64965551",
"_type": "_doc",
"_id": "1",
"_score": 0.0,
"_source": {
"createdDate": "2020-08-15T00:00:00Z"
}
},
{
"_index": "64965551",
"_type": "_doc",
"_id": "2",
"_score": 0.0,
"_source": {
"createdDate": "2019-08-15T00:00:00Z"
}
},
{
"_index": "64965551",
"_type": "_doc",
"_id": "5",
"_score": 0.0,
"_source": {
"updatedDate": "2018-08-15T00:00:00Z",
"createdDate": "2020-09-15T00:00:00Z"
}
}
]

Elasticsearch msearch query with no hits

I'm new to Elastic 5.1, (new to elastic in general) and I have a list which I send using msearch to elastic.
However the following does not return any hits, but my documents in the index look like:
{
"_index": "all_items",
"_type": "product",
"_id": "1000002007900",
"_version": 2,
"found": true,
"_source": {
"doc": {
"title": "title here",
"brand": null,
"updatedOn": "2016-12-22T14:00:26.016290",
"price": 49,
"viewed7": 0,
"idInShop": "11",
"active": true,
"model": null,
"_id": 1000002007900,
"purchased7": 0
},
"doc_as_upsert": true
}
}
and here is the body sent to msearch
[
{
"index": "all_items",
"type": "product"
},
{
"sort": [
{
"_score": "desc"
}
],
"query": {
"function_score": {
"query": {
"bool": {
"filter": [
{
"term": {
"active": true
}
}
],
"should": [],
"must_not": [],
"must": []
}
},
"functions": [
{
"script_score": {
"script": {
"lang": "painless",
"inline": "_score * params.constant * (doc['discountPrice'] > 0 ? doc['price'] / doc['discountPrice'] : 0)",
"params": {
"constant": 1.2
}
}
}
}
],
"score_mode": "multiply"
}
},
"from": 0,
"size": 3
}
]
If I only send {"query":{"match_all":{}}} I get hits.
You can use match query to get the result you want.
[
{
"index": "all_items",
"type": "product"
},
{
"sort": [
{
"_score": "desc"
}
],
"query": {
"function_score": {
"query": {
"match": {
"active": true
}
},
"functions": [
{
"script_score": {
"script": {
"lang": "painless",
"inline": "_score * params.constant * (doc['discountPrice'] > 0 ? doc['price'] / doc['discountPrice'] : 0)",
"params": {
"constant": 1.2
}
}
}
}
],
"score_mode": "multiply"
}
},
"from": 0,
"size": 3
}
]
You can read more about match query and term based query (which you used) at this link.

Resources