Is there a function similar to subquery in elasticsearch? - elasticsearch

I want to act like a subquery in elasticsearch.
Let's look at the example below.
create index
PUT test_index
{
"mappings" : {
"properties" : {
"human" : {
"type" : "nested",
"properties" : {
"age" : {
"type" : "integer"
},
"name" : {
"type" : "text"
}
}
}
}
}
}
insert into index sample data
POST test_index/_doc/1
{
"human": [
{
"name": "adrian",
"age" : 24
},
{
"name": "simon",
"age" : 26
},
{
"name": "michale",
"age" : 24
},
{
"name": "beom",
"age" : 25
},
{
"name": "simon",
"age" : 24
}
]
}
In this situation, i want to get a result if doc satisfied condition that human.name == "adrian" and human.name = "simon"
as follow
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.87546873,
"hits" : [
{
"_index" : "test_index",
"_id" : "1",
"_score" : 0.87546873,
"_source" : {
"human" : [
{
"name" : "adrian",
"age" : 24
},
{
"name" : "simon",
"age" : 26
},
{
"name" : "michale",
"age" : 24
},
{
"name" : "beom",
"age" : 25
},
{
"name" : "simon",
"age" : 24
}
]
}
}
]
}
}
but, when i try like this
GET test_index/_search
{
"query": {
"nested": {
"path": "human",
"query": {
"bool": {
"must": [
{
"match": {
"human.name": "simon"
}
},
{
"match": {
"human.name": "adrian"
}
}
]
}
}
}
}
}
then, result is below
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
Is there any way to solve this situation??

You need to do it as follows with two nested queries as each nested document is a document of its own. So you're looking for a top-level document that has two nested documents that must match each human.name:
GET test_index/_search
{
"query": {
"bool": {
"must": [
{
"nested": {
"path": "human",
"query": {
"match": {
"human.name": "simon"
}
}
}
},
{
"nested": {
"path": "human",
"query": {
"match": {
"human.name": "adrian"
}
}
}
}
]
}
}
}

Related

Highlight multi_match doesnt take last term

When I search for multiple keywords, the last term is not highlighted in the result.
This is the index and mapping:
PUT objects
{
"mappings": {
"properties": {
"title": {
"type": "search_as_you_type"
}
}
}
}
And this is my search:
// query
GET objects/_search
{
"query": {
"multi_match": {
"query": "Goldenen Vlies",
"type": "bool_prefix",
"fields": [
"title",
"title._2gram",
"title._3gram",
"title._index_prefix"
]
}
},
"highlight": {
"fields": {
"title": {}
}
},
"_source": false
}
The output I get is the following:
{
"took" : 1,
"timed_out" : false,
"_shards" : {...},
"hits" : {
"total" : {
"value" : 23,
"relation" : "eq"
},
"max_score" : 7.628418,
"hits" : [
{
"_index" : "objects",
"_id" : "AWj1tIEBIysZ6sOt9vqw",
"_score" : 7.628418,
"highlight" : {
"title" : [
"Schwurkreuz des Ordens vom <em>Goldenen</em> Vlies" <-------
]
}
}
]
}
}
However, this would be the expected/desired output:
{
"took" : 1,
"timed_out" : false,
"_shards" : {...},
"hits" : {
"total" : {
"value" : 23,
"relation" : "eq"
},
"max_score" : 7.628418,
"hits" : [
{
"_index" : "objects",
"_id" : "AWj1tIEBIysZ6sOt9vqw",
"_score" : 7.628418,
"highlight" : {
"title" : [
"Schwurkreuz des Ordens vom <em>Goldenen</em> <em<Vlies</em>" <-------
]
}
}
]
}
}
It does work as expected when I add an extra empty space in the query like so: "query": "Goldenen Vlies ", but I want to know if there is a better solution?
Try this way with "best_fields":
{
"query": {
"multi_match": {
"query": "Goldenen Vlies",
"type": "best_fields",
"fields": [
"title",
"title._2gram",
"title._3gram",
"title._index_prefix"
]
}
},
"highlight": {
"fields": {
"title": {}
}
},
"_source": false
}

How to return hit term in ES ?

I try to return only the terms that were successfully hit instead of the document itself, but I don’t know how to achieve the desired effect。
"es_episode" : {
"aliases" : { },
"mappings" : {
"properties" : {
"endTime" : {
"type" : "long"
},
"episodeId" : {
"type" : "long"
},
"startTime" : {
"type" : "long"
},
"studentIds" : {
"type" : "long"
}
}
}
This is an example:
{
"episodeId":124,
"startTime":10,
"endTime":20,
"studentIds":[200,300]
}
My query:
GET /es_episode/_search
{
"_source": ["studentIds"],
"query": {
"terms": {
"studentIds": [300,400]
}
}
}
The result is
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "es_episode",
"_type" : "episode",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"studentIds" : [
200,
300
]
}
}
]
}
But in fact I only want to know which term hits. For example, the result I want should be studentIds=[300] instead of all studentIds=[200,300] of the returned document. It seems that some additional operations are required, but I don’t know
how.
I try to achieve my goal with the following query
GET /es_episode/_search
{
"_source": ["studentIds"],
"query": {
"terms": {
"studentIds": [300,400]
}
},
"aggs": {
"student_id": {
"terms": {
"field": "studentIds",
"size": 10
},
"aggs": {
"id": {
"terms": {
"field": "episodeId"
}
},
"id_select":{
"bucket_selector": {
"buckets_path": {
"key" : "_key"
},
"script": "params.key==300 || params.key==400"
}
}
}
}
}
}
the result for this is
"aggregations" : {
"student_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 300,
"doc_count" : 1,
"id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 124,
"doc_count" : 1
}
]
}
}
]
}
}
It seems that I successfully filtered out the terms I don’t want, but this doesn’t look pretty, and I need to set my parameters repeatedly in the script

Elasticsearch get average

I'm trying to average aggregate data on elasticsearch. This is the structure of my data:
document 1
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":2
},
{
"name":"topic_02",
"valore":4
}
]
}
document 2
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":4
},
{
"name":"topic_02",
"valore":8
}
]
}
I have to create an aggregation by groupId and by topic name and on this aggregation calculate the average of the value field. But trying with the source code the result of the obtained average is wrong.
With the above data of documents one and two the expected result should be:
groupId
topicName
average
TEST_01
topic_01
3
TEST_01
topic_02
6
TermsAggregationBuilder aggregation = AggregationBuilders
.terms("groupId")
.field("groupId.keyword")
.subAggregation(AggregationBuilders
.terms("topicName")
.field("topics.name.keyword").subAggregation(AggregationBuilders
.avg("avg").field("topics.valore")));
First of all make sure you topics field is type "nested", because if it is "object" the topicName and valores will be flattened. This mean you will end up with a set of valores and topicNames without relation between them.
Mappings
{
"test_ynsanity" : {
"mappings" : {
"properties" : {
"detectionDate" : {
"type" : "date"
},
"groupId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"lag" : {
"type" : "long"
},
"tipo" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"topics" : {
"type" : "nested",
"properties" : {
"name" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"valore" : {
"type" : "long"
}
}
}
}
}
}
}
Ingesting data
POST test_ynsanity/_doc
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":2
},
{
"name":"topic_02",
"valore":4
}
]
}
POST test_ynsanity/_doc
{
"groupId":"TEST_01",
"lag":10,
"detectionDate":"2021-02-26T21:42:30.010Z",
"tipo":"uno",
"topics":[
{
"name":"topic_01",
"valore":4
},
{
"name":"topic_02",
"valore":8
}
]
}
Query
POST test_ynsanity/_search
{
"size": 0,
"aggs": {
"groups": {
"terms": {
"field": "groupId.keyword",
"size": 10
},
"aggs": {
"topics": {
"nested": {
"path": "topics"
},
"aggs": {
"topic_names": {
"terms": {
"field": "topics.name.keyword"
},
"aggs": {
"topic_avg": {
"avg": {
"field": "topics.valore"
}
}
}
}
}
}
}
}
}
}
Response
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"groups" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "TEST_01",
"doc_count" : 2,
"topics" : {
"doc_count" : 4,
"topic_names" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "topic_01",
"doc_count" : 2,
"NAME" : {
"value" : 3.0
}
},
{
"key" : "topic_02",
"doc_count" : 2,
"NAME" : {
"value" : 6.0
}
}
]
}
}
}
]
}
}
}
I have no access to the Java DSL right now, but the query should look something like this:
TermsAggregationBuilder aggregation = AggregationBuilders
.terms("groupId")
.field("groupId.keyword")
.subAggregation(AggregationBuilders
.nested("agg", "topics")
.terms("topic_names")
.field("topics.name.keyword").subAggregation(AggregationBuilders
.avg("avg").field("topics.valore")));

Filter nested objects in ElasticSearch 6.8.1

I didn't find any answers how to do simple thing in ElasticSearch 6.8 I need to filter nested objects.
Index
{
"settings": {
"index": {
"number_of_shards": "5",
"number_of_replicas": "1"
}
},
"mappings": {
"human": {
"properties": {
"cats": {
"type": "nested",
"properties": {
"name": {
"type": "text"
},
"breed": {
"type": "text"
},
"colors": {
"type": "integer"
}
}
},
"name": {
"type": "text"
}
}
}
}
}
Data
{
"name": "iridakos",
"cats": [
{
"colors": 1,
"name": "Irida",
"breed": "European Shorthair"
},
{
"colors": 2,
"name": "Phoebe",
"breed": "european"
},
{
"colors": 3,
"name": "Nino",
"breed": "Aegean"
}
]
}
select human with name="iridakos" and cats with breed contains 'European' (ignore case).
Only two cats should be returned.
Million thanks for helping.
For nested datatypes, you would need to make use of nested queries.
Elasticsearch would always return the entire document as a response. Note that nested datatype means that every item in the list would be treated as an entire document in itself.
Hence in addition to return entire document, if you also want to know the exact hits, you would need to make use of inner_hits feature.
Below query should help you.
POST <your_index_name>/_search
{
"query": {
"bool": {
"must": [
{
"match": {
"name": "iridakos"
}
},
{
"nested": {
"path": "cats",
"query": {
"match": {
"cats.breed": "european"
}
},
"inner_hits": {}
}
}
]
}
}
}
Response:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.74455214,
"hits" : [
{
"_index" : "my_cat_index",
"_type" : "_doc",
"_id" : "1", <--- The document that hit
"_score" : 0.74455214,
"_source" : {
"name" : "iridakos",
"cats" : [
{
"colors" : 1,
"name" : "Irida",
"breed" : "European Shorthair"
},
{
"colors" : 2,
"name" : "Phoebe",
"breed" : "european"
},
{
"colors" : 3,
"name" : "Nino",
"breed" : "Aegean"
}
]
},
"inner_hits" : { <---- Note this
"cats" : {
"hits" : {
"total" : {
"value" : 2, <---- Count of nested doc hits
"relation" : "eq"
},
"max_score" : 0.52354836,
"hits" : [
{
"_index" : "my_cat_index",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "cats",
"offset" : 1
},
"_score" : 0.52354836,
"_source" : { <---- First Nested Document
"breed" : "european"
}
},
{
"_index" : "my_cat_index",
"_type" : "_doc",
"_id" : "1",
"_nested" : {
"field" : "cats",
"offset" : 0
},
"_score" : 0.39019167,
"_source" : { <---- Second Document
"breed" : "European Shorthair"
}
}
]
}
}
}
}
]
}
}
Note in your response how the inner_hits section would appear where you would find the exact hits.
Hope this helps!
You could use something like this:
{
"query": {
"bool": {
"must": [
{ "match": { "name": "iridakos" }},
{ "match": { "cats.breed": "European" }}
]
}
}
}
To search on a cat's breed, you can use the dot-notation.

Nested boolean aggregation in elastic

I have json payloads as such
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 61,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "myindex",
"_type" : "_doc",
"_id" : "CAojVWwBO8H0jj7a_j3P",
"_score" : 1.0,
"_source" : {
"appName" : "BigApp",
"appVer" : "1.0",
"reviews" : {
"reviewer" : {
"value" : "Bob"
},
"testsPass" : [
{
"name" : "unit",
"pass" : false
},
{
"name" : "integraton",
"pass" : false
},
{
"name" : "ui",
"pass" : false
}
]
}
}
}
]
}
}
In elastic I want to aggregate the boolean values under testsPass to return true if all of the pass values are true.
I am new to Elastic and struggling to write a query in that shape, can someone please help?
So far I have tried nested aggregators but can't get the syntax right.
Looking at your data, I'm assuming the structure of your mapping is as follow:
Mapping:
PUT myindex
{
"mappings": {
"properties": {
"appName":{
"type": "keyword"
},
"appVer": {
"type": "keyword"
},
"reviews":{
"properties": {
"reviewer":{
"properties":{
"value": {
"type": "keyword"
}
}
},
"testsPass":{
"type": "nested"
}
}
}
}
}
}
Sample Documents:
POST myindex/_doc/1
{
"appName":"BigApp",
"appVer":"1.0",
"reviews":{
"reviewer":{
"value":"Bob"
},
"testsPass":[
{
"name":"unit",
"pass":false
},
{
"name":"integraton",
"pass":false
},
{
"name":"ui",
"pass":false
}
]
}
}
POST myindex/_doc/2
{
"appName":"MidApp",
"appVer":"1.0",
"reviews":{
"reviewer":{
"value":"Bob"
},
"testsPass":[
{
"name":"unit",
"pass":true
},
{
"name":"integraton",
"pass":true
},
{
"name":"ui",
"pass":true
}
]
}
}
POST myindex/_doc/3
{
"appName":"SmallApp",
"appVer":"1.0",
"reviews":{
"reviewer":{
"value":"Bob"
},
"testsPass":[
{
"name":"unit",
"pass":true
},
{
"name":"integraton",
"pass":true
},
{
"name":"ui",
"pass":false
}
]
}
}
Note that in the list of the above documents, only the document having appName: MidApp(2nd document) has the list of all true values.
Aggregation Query:
POST myindex/_search
{
"size":0,
"aggs":{
"pass_reviewers":{
"filter":{
"bool":{
"must":[
{
"nested":{
"path":"reviews.testsPass",
"query":{
"match":{
"reviews.testsPass.pass":"true"
}
}
}
}
],
"must_not":[
{
"nested":{
"path":"reviews.testsPass",
"query":{
"match":{
"reviews.testsPass.pass":"false"
}
}
}
}
]
}
},
"aggs":{
"myhits":{
"top_hits":{
"size":10
}
}
}
}
}
}
Note that the above returns only the concerned document as result of Top Hits aggregation. The main aggregation over here is in filter section which is just a Filter Aggregation
Response:
{
"took" : 7,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"pass_reviewers" : {
"doc_count" : 1, <------ Note this. Returns count of docs. This is result of filtered aggregation
"myhits" : { <------ Start of top hits aggregation
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "myindex",
"_type" : "_doc",
"_id" : "2", <----- Document
"_score" : 1.0,
"_source" : {
"appName" : "MidApp",
"appVer" : "1.0",
"reviews" : {
"reviewer" : {
"value" : "Bob"
},
"testsPass" : [
{
"name" : "unit",
"pass" : true
},
{
"name" : "integraton",
"pass" : true
},
{
"name" : "ui",
"pass" : true
}
]
}
}
}
]
}
}
}
}
}
Just in case if you just want the query to return the documents having all true, and not necessarily make use of aggregation, you can simply make use of the below query:
Query:
POST myindex/_search
{
"query":{
"bool":{
"must":[
{
"nested":{
"path":"reviews.testsPass",
"query":{
"match":{
"reviews.testsPass.pass":"true"
}
}
}
}
],
"must_not":[
{
"nested":{
"path":"reviews.testsPass",
"query":{
"match":{
"reviews.testsPass.pass":"false"
}
}
}
}
]
}
}
}
Basically the core execution logic is the same in both the queries, I've just narrowed down the logic you are looking for.
Response:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.597837,
"hits" : [
{
"_index" : "myindex",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.597837,
"_source" : {
"appName" : "MidApp",
"appVer" : "1.0",
"reviews" : {
"reviewer" : {
"value" : "Bob"
},
"testsPass" : [
{
"name" : "unit",
"pass" : true
},
{
"name" : "integraton",
"pass" : true
},
{
"name" : "ui",
"pass" : true
}
]
}
}
}
]
}
}
Hope this helps!

Resources