Finding all objects with a certain field in ElasticSearch - elasticsearch

My mapping looks like so:
"condition": {
"properties": {
"name": {
"type": "keyword"
},
"value": {
"type": "keyword"
}
}
},
and some data I have looks like:
"condition": [
{
"name": "condition",
"value": "new",
},
{
"name": "condition",
"value": "gently-used",
}
]
How can I write a query that finds all objects within the array that have a new condition?
I have the following but I am getting 0 results back:
{
"query": {
"bool": {
"must": [
{
"match_phrase": {
"attribute_condition": "new"
}
}
]
}
}
}

First, you need to map your condition field as a nested type.
"condition": {
"type": "nested",
"properties": {
"name": { "type": "keyword" },
"value": { "type": "keyword" }
}
},
Now you're able to query each element of the condition array independently from each other. Next, you need to use the nested query and request to retrieve the inner hits and output them in the inner_hits object of the query response
{
"query": {
"bool": {
"must": {
"nested": {
"path": "condition",
"query": {
"match": {
"condition.value": "new"
}
},
"inner_hits": {}
}
}
}
}
}
An example response will look like below:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931471,
"hits" : [
{
"_index" : "nested",
"_type" : "_doc",
"_id" : "Xx_LN3gBp5RUqdfAef3B",
"_score" : 0.6931471,
"_source" : {
"condition" : [
{
"name" : "condition",
"value" : "new"
},
{
"name" : "condition",
"value" : "gently-used"
}
]
},
"inner_hits" : { <--- here begins the list of inner hits
"condition" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931471,
"hits" : [
{
"_index" : "nested",
"_type" : "_doc",
"_id" : "Xx_LN3gBp5RUqdfAef3B",
"_nested" : {
"field" : "condition",
"offset" : 0
},
"_score" : 0.6931471,
"_source" : {
"name" : "condition",
"value" : "new"
}
}
]
}
}
}
}
]
}
}

Related

Getting incorrect inner hits from parent child relationship when combined with boolean query

Getting incorrect inner hits from parent child relationship when combined with boolean query
Hi Everyone
I am getting incorrect inner hits results when combining parent-child query with boolean query. To reproduce the issue, I create this Index
PUT /my-index-000001
{
"mappings": {
"_routing": {
"required": true
},
"properties": {
"parentProperty": {
"type": "text"
},
"childProperty": {
"type": "text"
},
"id": {
"type": "integer"
},
"myJoinField": {
"type": "join",
"relations": {
"parent": "mychild"
}
}
}
}
}
then I add these three documents (document with Id equals "1" is the parent of the other two documents)
POST /my-index-000001/_doc/1?routing=1
{
"id": 1,
"parentProperty": "a parent document",
"myJoinField": "parent"
}
POST /my-index-000001/_doc/2?routing=1
{
"id": 2,
"childProperty": "queensland civil administration",
"myJoinField": {
"name":"mychild",
"parent":"1"
}
}
POST /my-index-000001/_doc/3?routing=1
{
"id": 3,
"childProperty": "beautiful weather",
"myJoinField": {
"name":"mychild",
"parent":"1"
}
}
now we set up our index with 3 documents. I am looking for all child documents that meet this boolean query: [childProperty contains either "queensland civil" or both "beautiful" and "nothing"].
I expect that elastic returns only the child document with Id "2" since the child document with Id "3" does not have the term "nothing" in it.
The translated version of this query is as follows:
GET /my-index-000001/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"has_child": {
"inner_hits": {
"name": "opr1"
},
"query": {
"query_string": {
"analyzer": "stop",
"query": "childProperty:(\"queensland civil\")"
}
},
"type": "mychild"
}
},
{
"bool": {
"must": [
{
"has_child": {
"inner_hits": {
"name": "opr2"
},
"query": {
"query_string": {
"query": "childProperty:(beautiful)"
}
},
"type": "mychild"
}
},
{
"has_child": {
"inner_hits": {
"name": "opr3"
},
"query": {
"query_string": {
"query": "childProperty:(nothing)"
}
},
"type": "mychild"
}
}
]
}
}
]
}
}
}
and the result that is returned from elasitc is as follows:
{
"took" : 24,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_routing" : "1",
"_source" : {
"id" : 1,
"parentProperty" : "a parent document",
"myJoinField" : "parent"
},
"inner_hits" : {
"opr1" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2814486,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.2814486,
"_routing" : "1",
"_source" : {
"id" : 2,
"childProperty" : "queensland civil administration",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
},
"opr2" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.7549127,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.7549127,
"_routing" : "1",
"_source" : {
"id" : 3,
"childProperty" : "beautiful weather",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
},
"opr3" : {
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
}
}
]
}
}
as you can see in the result the elastic returns both child document which clearly is against what I have written in the "must" section of the query.
but if I rewrite the query as following then it will return ONLY the expected document (document with Id "2"):
GET /my-index-000001/_search
{
"query": {
"bool": {
"must": [
{
"has_child": {
"inner_hits": {
"name": "opr1"
},
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"query_string": {
"query": "childProperty:(\"queensland civil\")"
}
},
{
"bool": {
"must": [
{
"query_string": {
"query": "childProperty:(beautiful)"
}
},
{
"query_string": {
"query": "childProperty:(weather1)"
}
}
]
}
}
]
}
},
"type": "mychild"
}
}
]
}
}
}
here is the correct result:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_routing" : "1",
"_source" : {
"id" : 1,
"parentProperty" : "a parent document",
"myJoinField" : "parent"
},
"inner_hits" : {
"opr1" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2814486,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.2814486,
"_routing" : "1",
"_source" : {
"id" : 2,
"childProperty" : "queensland civil administration",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
}
}
}
]
}
}
I appreciate it if someone tells me what I did wrong in the first query or if this is the default behavior in elasitc when it comes to parent/child relationship.

Elasticsearch: sorted nested array

Is it possible to configure the mapping of an index, or the discover view of this in index in a way that an array inside the documents is / will be sorted?
Background: I have a es index with documents containing an array:
This array is updated from time to time with new entries (objects containing a timestamp), and I would like this arrays to be sorted according to the timestamp inside the objects.
If your field is define as nested type then you can use inner_hits to sort the array of object. it will return the sorted object array inside inner_hits for each document.
You can define field as nested like below:
{
"mappings": {
"properties": {
"name": {
"type": "keyword"
},
"openTimes": {
"type": "nested",
"properties": {
"date": {
"type": "date"
},
"name": {
"type": "keyword"
}
}
}
}
}
}
Let consider below is your sample data:
{"index": { } }
{ "name": "second on 6th (3rd on the 5th)", "openTimes": [ { "date": "2018-12-05T12:00:00" ,"name":"abc"}, { "date": "2018-12-06T11:00:00","name":"xyz" }] }
{"index": { } }
{ "name": "third on 6th (1st on the 5th)", "openTimes": [ {"date": "2018-12-05T10:00:00","name":"abc"}, { "date": "2018-12-06T12:00:00","name":"xyz" }] }
{"index": { } }
{ "name": "first on the 6th (2nd on the 5th)", "openTimes": [ {"date": "2018-12-05T11:00:00","name":"abc" }, { "date": "2018-12-06T10:00:00","name":"xyz" }] }
Below is Query:
{
"query": {
"nested": {
"path": "openTimes",
"query": {
"match_all": {}
},
"inner_hits": {
"sort": {
"openTimes.date": "desc"
}
}
}
}
}
Sample Response:
{
"_index" : "nested-listings",
"_type" : "_doc",
"_id" : "u0fw338BMCbs63yKkqi0",
"_score" : 1.0,
"_source" : {
"name" : "second on 6th (3rd on the 5th)",
"openTimes" : [
{
"date" : "2018-12-05T12:00:00",
"name" : "abc"
},
{
"date" : "2018-12-06T11:00:00",
"name" : "xyz"
}
]
},
"inner_hits" : {
"openTimes" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "nested-listings",
"_type" : "_doc",
"_id" : "u0fw338BMCbs63yKkqi0",
"_nested" : {
"field" : "openTimes",
"offset" : 1
},
"_score" : null,
"_source" : {
"date" : "2018-12-06T11:00:00",
"name" : "xyz"
},
"sort" : [
1544094000000
]
},
{
"_index" : "nested-listings",
"_type" : "_doc",
"_id" : "u0fw338BMCbs63yKkqi0",
"_nested" : {
"field" : "openTimes",
"offset" : 0
},
"_score" : null,
"_source" : {
"date" : "2018-12-05T12:00:00",
"name" : "abc"
},
"sort" : [
1544011200000
]
}
]
}
}
}
}

Elasticsearch query to get the list of documents with some minimum occurrence of a property

I have a index with documents like this
[
{
"customer_id" : "123",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-23"
...
},
{
"customer_id" : "123",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-24"
...
},
{
"customer_id" : "345",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-25"
...
}
]
I want to get the list of all documents from specific country e.g USA, between a give time range with at least 2 occurrences of same customer_id.
With the above data, it should return
[
{
"customer_id" : "123",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-24"
...
}
]
Now, I tried the below ES query
POST /index_name/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"creation_date": {
"gte": "2021-06-23",
"lte": "2021-08-23"
}
}
},
{
"match": {
"country": "USA"
}
}
]
}
},
"aggs": {
"customer_agg": {
"terms": {
"field": "customer_id",
"min_doc_count": 2
}
}
}
}
The above query returns following result
"hits" : {
"total" : {
"value" : 10000,
"relation" : "gte"
},
"max_score" : 1.5587491,
"hits" : [...]
]
},
"aggregations" : {
"person_agg" : {
"doc_count_error_upper_bound" : 1,
"sum_other_doc_count" : 1,
"buckets" : [
{
"key" : "customer_id",
"doc_count" : 2
}
]
}
}
I don't need the list of buckets in response, but only the list of documents satisfying the condition. How can I achieve it?
On a first glance I noticed that in the search query you are searching by a field called creation_timestamp but in the mapping of the document you say that the date field you want to range check is called creation_date.
I decided to test this locally on Elasticsearch 7.10 and here are the settings I used
PUT /test-index-v1
PUT /test-index-v1/_mapping
{
"properties": {
"customer_id": {
"type": "keyword"
},
"country": {
"type": "keyword"
},
"department": {
"type": "keyword"
},
"creation-date": {
"type": "date"
}
}
}
As you can see I'm using keyword on the fields so that we can use - sorting, aggregation and etc.
After I created the index I imported the documents you gave as an example
POST /test-index-v1/_doc
{
"customer_id" : "345",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-25"
}
POST /test-index-v1/_doc
{
"customer_id" : "123",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-25"
}
POST /test-index-v1/_doc
{
"customer_id" : "123",
"country": "USA",
"department": "IT",
"creation_date" : "2021-06-24"
}
Then I executed this search query including a must match on the customer_id as well:
POST /test-index-v1/_search
{
"query": {
"bool": {
"must": [
{
"range": {
"creation_date": {
"gte": "2021-06-23",
"lte": "2021-08-23"
}
}
},
{
"match": {
"country": "USA"
}
},
{
"match": {
"customer_id": "123"
}
}
]
}
},
"aggs": {
"customer_agg": {
"terms": {
"field": "customer_id",
"min_doc_count": 2
}
}
}
}
This query will return you the search hits as well. Using only an aggregation the searchHits won't be returned.
Here is the response I received:
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.6035349,
"hits" : [
{
"_index" : "test-index-v1",
"_type" : "_doc",
"_id" : "vbVD9HsBRVWFAvvZTW-l",
"_score" : 1.6035349,
"_source" : {
"customer_id" : "123",
"country" : "USA",
"department" : "IT",
"creation_date" : "2021-06-25"
}
},
{
"_index" : "test-index-v1",
"_type" : "_doc",
"_id" : "vrVD9HsBRVWFAvvZU29q",
"_score" : 1.6035349,
"_source" : {
"customer_id" : "123",
"country" : "USA",
"department" : "IT",
"creation_date" : "2021-06-24"
}
}
]
},
"aggregations" : {
"customer_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "123",
"doc_count" : 2
}
]
}
}
}
Hope this helps with your issue. Feel free to leave a comment if you have other questions regarding Elastic! :)
EDIT:
Regarding the grouping by customer_id in a certain date range I used this query:
POST /test-index-v1/_search
{
"aggs": {
"group_by_customer_id": {
"terms": {
"field": "customer_id"
},
"aggs": {
"dates_between": {
"filter": {
"range": {
"creation_date": {
"gte": "2020-06-23",
"lte": "2021-06-24"
}
}
}
}
}
}
}
}
And the response is:
"aggregations" : {
"group_by_customer_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "123",
"doc_count" : 2,
"dates_between" : {
"doc_count" : 1
}
},
{
"key" : "345",
"doc_count" : 1,
"dates_between" : {
"doc_count" : 0
}
}
]
}
}

How to return matched nested objects with elastic-search

Given the documents below, how would I search and return only the matched nested object. I would like the query to return the journal information with only the second nested article since that's the one being matched in the query.
{
"mappings": {
"properties": {
"isn" : { "type":"text" },
"title" : { "type":"text" },
"article": {
"type": "nested"
}
}
}
}
PUT journal/_doc/1
{
"isn" : "11223344",
"article" : [
{
"id" : 1,
"title" : "first article title",
"author" : "John"
},
{
"id" : 2,
"title" : "second article title",
"author" : "Carl"
}
]
}
GET journal/_search
{
"query": {
"nested": {
"path": "article",
"query": {
"bool": {
"must": [
{ "match": { "article.title": "second" }}
]
}
}
}
}
}
All you need is ask for inner_hits like below in your query :
GET journal/_search
{
"_source": false,
"query": {
"nested": {
"path": "article",
"query": {
"bool": {
"must": [
{ "match": { "article.title": "second" }}
]
}
}
, "inner_hits": {}
}
}
}
Note that the response has a specific scheme :
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931472,
"hits" : [
{
"_index" : "journal",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.6931472,
"inner_hits" : {
"article" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.6931472,
"hits" : [
{
"_index" : "journal",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "article",
"offset" : 1
},
"_score" : 0.6931472,
"_source" : {
"id" : 2,
"title" : "second article title",
"author" : "Carl"
}
}
]
}
}
}
}
]
}
}

Sorting documents by a nested field

I'm trying to sort the result returned by ElasticSearch by the nested field sections.name as follows:
Mapping:
PUT /staff
{
"mappings": {
"list": {
"properties": {
"id": {"type": "text" },
"name": {
"type":"text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"sections" : {
"type":"nested",
"properties": {
"id": {"type":"text", "fielddata" : true},
"name": {
"fielddata" : true,
"type": "text",
"fields": {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
}
}
documents:
POST /staff/list
{
"id": 10,
"name": "abc def",
"sections":
[
{
"id":"1",
"name" : "zamphire"
},{
"id":"2",
"name" : "warden"
}
]
}
POST /staff/list
{
"id": 9,
"name": "abc def",
"sections":
[
{
"id":"1",
"name" : "shaggi"
},{
"id":"2",
"name" : "robert"
}
]
}
POST /staff/list
{
"id": 8,
"name": "abc def",
"sections":
[
{
"id":"3",
"name" : "zamphire"
},{
"id":"2",
"name" : "abi"
}
]
}
I'm performing the following query:
GET /staff/_search
{
"from": 0,
"query": {
"nested": {
"path": "sections",
"query": {
"match": {
"sections.id": {
"query": "1"
}
}
}
}
},
"size": 25,
"sort": [
{
"sections.name": {
"nested": {
"filter": {
"nested": {
"path": "sections",
"query": {
"term" : { "sections.id" : "1" }
}
}
}
},
"order": "asc"
}
}
],
"_source": {
"includes": [
"id",
"name",
"sections"
]
}
}
I get these results:
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : 2,
"max_score" : null,
"hits" : [
{
"_index" : "staff",
"_type" : "list",
"_id" : "rJtyyGwBNB-cdBRb5XGR",
"_score" : null,
"_source" : {
"name" : "abc def",
"id" : 10,
"sections" : [
{
"name" : "zamphire",
"id" : "1"
},
{
"name" : "warden",
"id" : "2"
}
]
},
"sort" : [
null
]
},
{
"_index" : "staff",
"_type" : "list",
"_id" : "rZtyyGwBNB-cdBRb6nHU",
"_score" : null,
"_source" : {
"name" : "abc def",
"id" : 9,
"sections" : [
{
"name" : "shaggi",
"id" : "1"
},
{
"name" : "robert",
"id" : "2"
}
]
},
"sort" : [
null
]
}
]
}
}
I'm expecting the section shaggi to come before zamphire and thus the order of the two documents should be reversed.
I noticed this in the results:
"sort" : [
null
]
Is that related? What am I missing here?
Changing sort part to this should do the job according to the docs
"sort": [
{
"sections.name": {
"order": "asc",
"nested": {
"path": "sections",
"filter": {
"term" : { "sections.id" : "1" }
}
}
}
}
]
Returns
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "staff",
"_type" : "_doc",
"_id" : "8hSJyWwBHfpsFyAs9f_8",
"_score" : null,
"_source" : {
"name" : "abc def",
"id" : 9,
"sections" : [
{
"name" : "shaggi",
"id" : "1"
},
{
"name" : "robert",
"id" : "2"
}
]
},
"sort" : [
"shaggi"
]
},
{
"_index" : "staff",
"_type" : "_doc",
"_id" : "8RSJyWwBHfpsFyAs5v98",
"_score" : null,
"_source" : {
"name" : "abc def",
"id" : 10,
"sections" : [
{
"name" : "zamphire",
"id" : "1"
},
{
"name" : "warden",
"id" : "2"
}
]
},
"sort" : [
"zamphire"
]
}
]
}
}
Tested with elasticsearch 7.2.0.
Hope that helps.

Resources