Elasticsearch unable retrieve child documents - elasticsearch

I recently migrated Elasticsearch version 2.4 to 6.2.1 and my previous GET query is not working. Below is the query I am trying to retrieve the child document based on _id and _parent values. DO i have to change the implementation to retreive the documnets from ES?
{
"query": {
"bool": {
"must": [
{
"term": {
"_id": {
"value": "9:v0",
"boost": 1
}
}
},
{
"term": {
"_parent": {
"value": "v0",
"boost": 1
}
}
},
{
"terms": {
"assoc.domainId": [
"XX"
],
"boost": 1
}
},
{
"terms": {
"assoc.nodeId": [
"YY"
],
"boost": 1
}
}
],
"adjust_pure_negative": false,
"boost": 1
}
}
}
parent document in ES:
{
"_index" : "test",
"_type" : "assocjoin",
"_id" : "v0",
"_score" : 1.0,
"_source" : {
"my_join_field" : {
"name" : "version"
},
"versionnumber" : "v0",
"versiondate" : "2018/03/29 13:25:02"
}
}
Child document in ES:
{
"_index" : "test",
"_type" : "versionjoin",
"_id" : "9:v0",
"_score" : 0.18232156,
"_routing" : "v0",
"_source" : {
"id" : 0,
"assocDTO" : {
"id" : 9,
"domainId" : "XX",
"nodeId" : "YY"
},
"biomarkers" : [
{
....
}
],
"contexts" : [
{
....
}
]
},
"my_join_field" : {
"name" : "assocversion",
"parent" : "v0"
}
}
}
]
}

Related

Checking if a field exists for any and/or all nested objects

I took a look at ElasticSearch: search inside the array of objects and while it helps, I'm actually trying to determine if at least one has a field and if all nested objects have the field.
Pretending we have an index of all refrigerators with a superfluous document like:
{
"_id": "whatever",
"location": "North Building 1",
"floor": 2,
"tag": "refrigerator-1",
"contents" : [
{
"item": "milk-carton",
"expires": 1-1-2023
},
{
"item": "pyrex-container",
}
]
}
How do I create an Elastic search query to;
Find any refrigerator that has at least 1 item that CAN expire ( "exists" : { "field" : "expires" } }
Find refrigerators that have no items that expire
Find refrigerators that where all items have an expire field
If you want to do this in a single query , use named_queries
Query
{
"query": {
"bool": {
"should": [
{
"nested": {
"_name": "At least one expires",
"path": "contents",
"query": {
"exists": {
"field": "contents.expires"
}
}
}
},
{
"bool": {
"_name": "None expires",
"must_not": [
{
"nested": {
"path": "contents",
"query": {
"exists": {
"field": "contents.expires"
}
}
}
}
]
}
},
{
"bool": {
"_name": "All expires",
"must": [
{
"nested": {
"path": "contents",
"query": {
"exists": {
"field": "contents.expires"
}
}
}
}
],
"must_not": [
{
"nested": {
"path": "contents",
"query": {
"bool": {
"must_not": [
{
"exists": {
"field": "contents.expires"
}
}
]
}
}
}
}
]
}
}
]
}
}
}
Result
"hits" : [
{
"_index" : "index70",
"_type" : "_doc",
"_id" : "Qt2PVoQB_m3FhzcGBasD",
"_score" : 2.0,
"_source" : {
"location" : "North Building 1",
"floor" : 3,
"tag" : "refrigerator-3",
"contents" : [
{
"item" : "milk-carton",
"expires" : "2023-01-01"
},
{
"item" : "pyrex-container",
"expires" : "2023-01-01"
}
]
},
"matched_queries" : [
"At least one expires",
"All expires"
]
},
{
"_index" : "index70",
"_type" : "_doc",
"_id" : "QN2BVoQB_m3FhzcG9qsG",
"_score" : 1.0,
"_source" : {
"location" : "North Building 1",
"floor" : 2,
"tag" : "refrigerator-1",
"contents" : [
{
"item" : "milk-carton",
"expires" : "2023-01-01"
},
{
"item" : "pyrex-container"
}
]
},
"matched_queries" : [
"At least one expires"
]
},
{
"_index" : "index70",
"_type" : "_doc",
"_id" : "Qd2HVoQB_m3FhzcGUauO",
"_score" : 0.0,
"_source" : {
"location" : "North Building 1",
"floor" : 3,
"tag" : "refrigerator-2",
"contents" : [
{
"item" : "milk-carton"
},
{
"item" : "pyrex-container"
}
]
},
"matched_queries" : [
"None expires"
]
}
]
Query is self explanatory. If you want use separate queries for three conditions, break above query. Each should clause will become a separate query

Return field even if specific field value isn't available

I have this bool query:
{
"bool": {
"must_not": [
{
"exists": {
"field": "*multiparttype.doNotDisplay",
"boost": 1
}
}
],
"should": [
{
"exists": {
"field": "multiparttype",
"boost": 1
}
},
{
"exists": {
"field": "*multiparttype.oldValue",
"boost": 1
}
},
{
"exists": {
"field": "*multiparttype.newValue",
"boost": 1
}
}
]
}
}
It return data if ES has following structure. If a document exist like below, this query will work and return this documents
multiparttype{
oldValue: "YY",
newValue:"XXX",
type:10
}
But if document just have this:
multiparttype{
type:10
}
OR
multiparttype{
}
Above query wont return this document
How can i make it possible??
Based on your problem, you need to use a match_all which will match against all documents, which would return all documents with a score of "1.0".
The following data was in the index:
multiparttype = { "oldValue" : "versionX","newValue" : "versionY"}
multiparttype = { "oldValue" : "versionX","newValue" : "versionY"}
empty_field : "test",multiparttype : {}
multiparttype" = {"type" : "typetest"}
The following query was corrected taking into account the boost which can be changed based on the requirements.
"query": {
"bool": {
"should": [
{
"match_all": {}
},
{
"exists": {
"field": "multiparttype.oldValue",
"boost": 1
}
},
{
"exists": {
"field": "multiparttype.newValue",
"boost": 1
}
}
],
"must_not": [
{
"exists": {
"field": "*multiparttype.doNotDisplay"
}
}
]
}
}
The following response will be generated:
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 3.0,
"hits" : [
{
"_index" : "stackoverflow-field",
"_type" : "_doc",
"_id" : "7Qg7TnQB3IIDvL59KA7i",
"_score" : 3.0,
"_source" : {
"multiparttype" : {
"oldValue" : "versionX",
"newValue" : "versionY"
}
}
},
{
"_index" : "stackoverflow-field",
"_type" : "_doc",
"_id" : "1wmWTnQB3IIDvL59lAAL",
"_score" : 1.0,
"_source" : {
"multiparttype" : {
"type" : "typetest"
}
}
},
{
"_index" : "stackoverflow-field",
"_type" : "_doc",
"_id" : "tQmbTnQB3IIDvL59Zgy7",
"_score" : 1.0,
"_source" : {
"empty_field" : "test"
}
},
{
"_index" : "stackoverflow-field",
"_type" : "_doc",
"_id" : "tQmcTnQB3IIDvL59fA8Z",
"_score" : 1.0,
"_source" : {
"empty_field" : "test",
"multiparttype" : { }
}
}
]
}
Documentation : https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-match-all-query.html

Filter nested objects in ElasticSearch 6.8.1

I didn't find any answers how to do simple thing in ElasticSearch 6.8 I need to filter nested objects.
Index
{
"settings": {
"index": {
"number_of_shards": "5",
"number_of_replicas": "1"
}
},
"mappings": {
"human": {
"properties": {
"cats": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"breed": {
"type": "text"
},
"colors": {
"type": "integer"
}
}
},
"name": {
"type": "text"
}
}
}
}
}
Data
{
"name": "iridakos",
"cats": [
{
"colors": 1,
"name": "Irida",
"breed": "European Shorthair"
},
{
"colors": 2,
"name": "Phoebe",
"breed": "european"
},
{
"colors": 3,
"name": "Nino",
"breed": "Aegean"
}
]
}
select human with name="iridakos" and cats with breed contains 'European' (ignore case).
Only two cats should be returned.
Million thanks for helping.
For nested datatypes, you would need to make use of nested queries.
Elasticsearch would always return the entire document as a response. Note that nested datatype means that every item in the list would be treated as an entire document in itself.
Hence in addition to return entire document, if you also want to know the exact hits, you would need to make use of inner_hits feature.
Below query should help you.
POST <your_index_name>/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "iridakos"
}
},
{
"nested": {
"path": "cats",
"query": {
"match": {
"cats.breed": "european"
}
},
"inner_hits": {}
}
}
]
}
}
}
Response:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.74455214,
"hits" : [
{
"_index" : "my_cat_index",
"_type" : "_doc",
"_id" : "1", <--- The document that hit
"_score" : 0.74455214,
"_source" : {
"name" : "iridakos",
"cats" : [
{
"colors" : 1,
"name" : "Irida",
"breed" : "European Shorthair"
},
{
"colors" : 2,
"name" : "Phoebe",
"breed" : "european"
},
{
"colors" : 3,
"name" : "Nino",
"breed" : "Aegean"
}
]
},
"inner_hits" : { <---- Note this
"cats" : {
"hits" : {
"total" : {
"value" : 2, <---- Count of nested doc hits
"relation" : "eq"
},
"max_score" : 0.52354836,
"hits" : [
{
"_index" : "my_cat_index",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "cats",
"offset" : 1
},
"_score" : 0.52354836,
"_source" : { <---- First Nested Document
"breed" : "european"
}
},
{
"_index" : "my_cat_index",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "cats",
"offset" : 0
},
"_score" : 0.39019167,
"_source" : { <---- Second Document
"breed" : "European Shorthair"
}
}
]
}
}
}
}
]
}
}
Note in your response how the inner_hits section would appear where you would find the exact hits.
Hope this helps!
You could use something like this:
{
"query": {
"bool": {
"must": [
{ "match": { "name": "iridakos" }},
{ "match": { "cats.breed": "European" }}
]
}
}
}
To search on a cat's breed, you can use the dot-notation.

elasticsearch groupby and filter by regex condition

It's a bit hard for me to define the question as I'm not very experienced with Elasticsearch. I'm focusing the question on my specific problem:
Assuming I have the following records:
{
id: 1
name: bla1_1.aaa
},
{
id: 1
name: bla1_2.bbb
},
{
id: 2
name: bla2_1.aaa
},
{
id: 2
name: bla2_2.aaa
}
What I want is to GET all the ids that have all of their names ending with aaa.
I was thinking about group by id and then do a regex query like so: *\.aaa so that all the name must satisfy the regex query.
On this particular example I would get id: 2 back.
How do I do it?
Let me know if there's anything I need to add to clarify the question.
RegexExp can be used.
Wildcard .* matches any character any number of times including zero
Terms aggregation will give you unique "ids" and number of docs under them.
Mapping :
PUT regex
{
"mappings": {
"properties": {
"id":{
"type":"integer"
},
"name":{
"type":"text",
"fields": {
"keyword":{
"type":"keyword"
}
}
}
}
}
}
Data:
"hits" : [
{
"_index" : "regex",
"_type" : "_doc",
"_id" : "olQXjW0BywGFQhV7k84P",
"_score" : 1.0,
"_source" : {
"id" : 1,
"name" : "bla1_1.aaa"
}
},
{
"_index" : "regex",
"_type" : "_doc",
"_id" : "o1QXjW0BywGFQhV7us6B",
"_score" : 1.0,
"_source" : {
"id" : 1,
"name" : "bla1_2.bbb"
}
},
{
"_index" : "regex",
"_type" : "_doc",
"_id" : "pFQXjW0BywGFQhV77c6J",
"_score" : 1.0,
"_source" : {
"id" : 2,
"name" : "bla2_1.aaa"
}
},
{
"_index" : "regex",
"_type" : "_doc",
"_id" : "pVQYjW0BywGFQhV7Dc6F",
"_score" : 1.0,
"_source" : {
"id" : 2,
"name" : "bla2_2.aaa"
}
}
]
Query:
GET regex/_search
{
"size":0,
"query": {
"regexp": {
"name.keyword": {
"value": ".*.aaa" ---> name ending with .aaa
}
}
},
"aggs": {
"unique_ids": {
"terms": {
"field": "id",
"size": 10
}
}
}
}
Result:
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"unique_ids" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2, ---> 2 doc under id 2
"doc_count" : 2
},
{
"key" : 1, ----> 1 doc under id 1
"doc_count" : 1
}
]
}
}
Edit:
Using bucket selector to keep buckets where total count of docs in Id matches with docs selected in regex
GET regex/_search
{
"size": 0,
"aggs": {
"unique_ids": {
"terms": {
"field": "id",
"size": 10
},
"aggs": {
"totalCount": { ---> to get total count of id(all docs)
"value_count": {
"field": "id"
}
},
"filter_agg": {
"filter": {
"bool": {
"must": [
{
"regexp": {
"name.keyword": ".*.aaa"
}
}
]
}
},
"aggs": {
"finalCount": { -->total count of docs matching regex
"value_count": {
"field": "id"
}
}
}
},
"mybucket_selector": { ---> include buckets where totalcount==finalcount
"bucket_selector": {
"buckets_path": {
"FinalCount": "filter_agg>finalCount",
"TotalCount": "totalCount"
},
"script": "params.FinalCount==params.TotalCount"
}
}
}
}
}
}

elastic: query the sum of a filtered subset of nested documents

Consider a document (post) like this in elasticsearch index:
{
title: "I love ice cream!"
comments: [
{
body: "me too!",
reaction: 'positive',
likes: 20
},
{
body: "huh!",
reaction: 'sarcastic',
likes: 5
}
]
}
The comments is a field of nested type.
How can elastic answer this:
Give me all posts, where the total sum of likes on "sarcastic" comments is greater than 100.
I'm open to any other way of modelling data which helps answer such queries.
This can be solved using bucket selector aggregation.
Mapping:
{
"index1" : {
"mappings" : {
"properties" : {
"comments" : {
"type" : "nested",
"properties" : {
"body" : {
"type" : "text"
},
"likes" : {
"type" : "integer"
},
"reaction" : {
"type" : "text"
}
}
},
"title" : {
"type" : "keyword"
}
}
}
}
}
Data:
"hits" : [
{
"_index" : "index1",
"_type" : "_doc",
"_id" : "p0y9DGsBfPdKzuAGdQrm",
"_score" : 1.0,
"_source" : {
"title" : "I love ice cream!",
"comments" : [
{
"body" : "me too!",
"reaction" : "positive",
"likes" : 20
},
{
"body" : "huh!",
"reaction" : "sarcastic",
"likes" : 5
}
]
}
},
{
"_index" : "index1",
"_type" : "_doc",
"_id" : "qEy9DGsBfPdKzuAGnwox",
"_score" : 1.0,
"_source" : {
"title" : "I hate ice cream!",
"comments" : [
{
"body" : "me too!",
"reaction" : "positive",
"likes" : 10
},
{
"body" : "huh!",
"reaction" : "sarcastic",
"likes" : 5
}
]
}
}
]
}
Query:
GET index1/_search
{
"size": 0,
"aggs": {
"title": {
"terms": {
"field": "title"
},
"aggs": {
"comments": {
"nested": {
"path": "comments"
},
"aggs": {
"reaction": {
"filter": {
"term": {
"comments.reaction": "positive"
}
},
"aggs": {
"total_likes": {
"sum": {
"field": "comments.likes"
}
}
}
}
}
},
"total_likes_filter": {
"bucket_selector": {
"buckets_path": {
"likes": "comments>reaction>total_likes"
},
"script": "params.likes > 15"
}
}
}
}
}
}
Result:
"aggregations" : {
"title" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "I love ice cream!",
"doc_count" : 1,
"comments" : {
"doc_count" : 2,
"reaction" : {
"doc_count" : 1,
"total_likes" : {
"value" : 20.0
}
}
}
}
]
}
}
}
Bucket contains only "I love ice cream!" where total likes for reaction positive is greater than 20.
I hate ice cream! has total sum 5 for positive reaction so it is not included.

Resources