I created an index like that:
{
"mappings":{
"properties":{
"#timestamp":{
"type":"date",
"doc_values":true
},
"event.category":{
"type":"keyword",
"index":true
},
"action":{
"type":"keyword",
"index":true
},
"success":{
"type":"boolean",
"index":true
},
"raw":{
"type":"text",
"index":false
}
}
}
}
Then I tried to use bucket_script pipeline aggregation to calculate success rate over actions, searching like that
{
"size": 0,
"_source": false,
"query": {
"bool": {
"filter": [{
"term": {
"action": "login"
}
}
]
}
},
"aggs": {
"action_bucket": {
"terms": {
"field": "action",
"show_term_doc_count_error": true
},
"aggs": {
"total": {
"terms": {
"field": "action"
}
},
"action": {
"filter": {
"term": {
"success": true
}
},
"aggs": {
"success": {
"terms": {
"field": "action"
}
}
}
},
"action_success_rate": {
"bucket_script": {
"buckets_path": {
"no_total": "total.doc_count",
"no_success": "action>success.doc_count"
},
"script": "100 * params.no_success / params.no_total"
}
}
}
}
}
}
And inside the response there is not action_success_rate:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 15,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"action_bucket": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "login",
"doc_count": 15,
"doc_count_error_upper_bound": 0,
"total": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "login",
"doc_count": 15
}
]
},
"action": {
"doc_count": 9,
"success": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "login",
"doc_count": 9
}
]
}
}
}
]
}
}
}
How could I fix my search request body to obtain success rate?
Mapping:
{
"mappings":{
"properties":{
"#timestamp":{
"type":"date",
"doc_values":true
},
"event.category":{
"type":"keyword",
"index":true
},
"action":{
"type":"keyword",
"index":true
},
"success":{
"type":"boolean",
"index":true
},
"raw":{
"type":"text",
"index":false
}
}
}
}
Sample Data:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "idx0",
"_type" : "_doc",
"_id" : "E802dnkBHcKLV4rtwh3V",
"_score" : 1.0,
"_source" : {
"action" : "login",
"success" : true
}
},
{
"_index" : "idx0",
"_type" : "_doc",
"_id" : "FM02dnkBHcKLV4rtyx0r",
"_score" : 1.0,
"_source" : {
"action" : "login",
"success" : true
}
},
{
"_index" : "idx0",
"_type" : "_doc",
"_id" : "Fc02dnkBHcKLV4rt1x0t",
"_score" : 1.0,
"_source" : {
"action" : "login",
"success" : false
}
},
{
"_index" : "idx0",
"_type" : "_doc",
"_id" : "Fs04dnkBHcKLV4rtKR3P",
"_score" : 1.0,
"_source" : {
"action" : "logout",
"success" : false
}
},
{
"_index" : "idx0",
"_type" : "_doc",
"_id" : "F804dnkBHcKLV4rtNR3J",
"_score" : 1.0,
"_source" : {
"action" : "logout",
"success" : true
}
}
]
}
}
Query:
{
"size": 0,
"aggs": {
"actions": {
"terms": {
"field": "action.keyword",
"size": 10
},
"aggs": {
"total": {
"value_count": {
"field": "action.keyword"
}
},
"success":{
"filter": {
"term": {
"success":true
}
},
"aggs": {
"success_cnt": {
"value_count": {
"field": "action.keyword"
}
}
}
},
"success_rate":{
"bucket_script": {
"buckets_path": {
"no_total":"total.value",
"no_success":"success>success_cnt.value"
},
"script": "(params.no_success/params.no_total)*100"
}
}
}
}
}
}
Response:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"actions" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "login",
"doc_count" : 3,
"total" : {
"value" : 3
},
"success" : {
"doc_count" : 2,
"success_cnt" : {
"value" : 2
}
},
"success_rate" : {
"value" : 66.66666666666666
}
},
{
"key" : "logout",
"doc_count" : 2,
"total" : {
"value" : 2
},
"success" : {
"doc_count" : 1,
"success_cnt" : {
"value" : 1
}
},
"success_rate" : {
"value" : 50.0
}
}
]
}
}
}
Related
I have an OpenSearch index with the following mapping (simplified):
PUT /house
{
"mappings": {
"properties": {
"house": { "type": "keyword" },
"people": {
"type": "nested",
"properties": {
"forename": { "type": "keyword" },
"surname": { "type": "keyword" }
}
}
}
}
}
I'd like to retrieve an aggregate where the bucket key is "[forename] [surname]".
Toy data:
PUT /house/_doc/1
{
"house": "house1",
"people": [
{ "forename": "Dave", "surname": "Daveson" },
{ "forename": "Jeff", "surname": "Jeffson" }
]
}
PUT /house/_doc/2
{
"house": "house1",
"people": [
{ "forename": "Dave", "surname": "Daveson" },
{ "forename": "Jeffs", "surname": "Jeffsons" }
]
}
The following doesn't return what I'd expect, and I can't figure out what object paths to put in the script to get it to work:
GET house/_search
{
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"people.name": {
"terms": {
"script": "[params._source['forename'], params._source['surname']].join(' ')"
}
}
}
}
},
"size": 0
}
Returns:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"people" : {
"doc_count" : 4,
"people.name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "null null",
"doc_count" : 4
}
]
}
}
}
}
Without script I can aggregate correctly on forename, surname or both, but using both I can't reliably "join" the results since they can be sorted only on the doc_count or key:
GET house/_search
{
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"people.forename": {
"terms": { "field": "people.forename" }
},
"people.surname": {
"terms": { "field": "people.surname" }
}
}
}
},
"size": 0
}
Returns:
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"people" : {
"doc_count" : 4,
"people.surname" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Daveson",
"doc_count" : 2
},
{
"key" : "Jeffson",
"doc_count" : 1
},
{
"key" : "Jeffsons",
"doc_count" : 1
}
]
},
"people.forename" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Dave",
"doc_count" : 2
},
{
"key" : "Jeff",
"doc_count" : 1
},
{
"key" : "Jeffs",
"doc_count" : 1
}
]
}
}
}
}
You want this results:
GET house/_search
{
"aggs": {
"people": {
"nested": {
"path": "people"
},
"aggs": {
"people.name": {
"terms": {
"script": "doc['people.forename'].value + ' ' + doc['people.surname'].value"
}
}
}
}
},
"size": 0
}
Results:
"aggregations" : {
"people" : {
"doc_count" : 4,
"people.name" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "Dave Daveson",
"doc_count" : 2
},
{
"key" : "Jeff Jeffson",
"doc_count" : 1
},
{
"key" : "Jeffs Jeffsons",
"doc_count" : 1
}
]
}
}
}
Getting incorrect inner hits from parent child relationship when combined with boolean query
Hi Everyone
I am getting incorrect inner hits results when combining parent-child query with boolean query. To reproduce the issue, I create this Index
PUT /my-index-000001
{
"mappings": {
"_routing": {
"required": true
},
"properties": {
"parentProperty": {
"type": "text"
},
"childProperty": {
"type": "text"
},
"id": {
"type": "integer"
},
"myJoinField": {
"type": "join",
"relations": {
"parent": "mychild"
}
}
}
}
}
then I add these three documents (document with Id equals "1" is the parent of the other two documents)
POST /my-index-000001/_doc/1?routing=1
{
"id": 1,
"parentProperty": "a parent document",
"myJoinField": "parent"
}
POST /my-index-000001/_doc/2?routing=1
{
"id": 2,
"childProperty": "queensland civil administration",
"myJoinField": {
"name":"mychild",
"parent":"1"
}
}
POST /my-index-000001/_doc/3?routing=1
{
"id": 3,
"childProperty": "beautiful weather",
"myJoinField": {
"name":"mychild",
"parent":"1"
}
}
now we set up our index with 3 documents. I am looking for all child documents that meet this boolean query: [childProperty contains either "queensland civil" or both "beautiful" and "nothing"].
I expect that elastic returns only the child document with Id "2" since the child document with Id "3" does not have the term "nothing" in it.
The translated version of this query is as follows:
GET /my-index-000001/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"has_child": {
"inner_hits": {
"name": "opr1"
},
"query": {
"query_string": {
"analyzer": "stop",
"query": "childProperty:(\"queensland civil\")"
}
},
"type": "mychild"
}
},
{
"bool": {
"must": [
{
"has_child": {
"inner_hits": {
"name": "opr2"
},
"query": {
"query_string": {
"query": "childProperty:(beautiful)"
}
},
"type": "mychild"
}
},
{
"has_child": {
"inner_hits": {
"name": "opr3"
},
"query": {
"query_string": {
"query": "childProperty:(nothing)"
}
},
"type": "mychild"
}
}
]
}
}
]
}
}
}
and the result that is returned from elasitc is as follows:
{
"took" : 24,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_routing" : "1",
"_source" : {
"id" : 1,
"parentProperty" : "a parent document",
"myJoinField" : "parent"
},
"inner_hits" : {
"opr1" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2814486,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.2814486,
"_routing" : "1",
"_source" : {
"id" : 2,
"childProperty" : "queensland civil administration",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
},
"opr2" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 0.7549127,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.7549127,
"_routing" : "1",
"_source" : {
"id" : 3,
"childProperty" : "beautiful weather",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
},
"opr3" : {
"hits" : {
"total" : {
"value" : 0,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
}
}
}
}
]
}
}
as you can see in the result the elastic returns both child document which clearly is against what I have written in the "must" section of the query.
but if I rewrite the query as following then it will return ONLY the expected document (document with Id "2"):
GET /my-index-000001/_search
{
"query": {
"bool": {
"must": [
{
"has_child": {
"inner_hits": {
"name": "opr1"
},
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"query_string": {
"query": "childProperty:(\"queensland civil\")"
}
},
{
"bool": {
"must": [
{
"query_string": {
"query": "childProperty:(beautiful)"
}
},
{
"query_string": {
"query": "childProperty:(weather1)"
}
}
]
}
}
]
}
},
"type": "mychild"
}
}
]
}
}
}
here is the correct result:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_routing" : "1",
"_source" : {
"id" : 1,
"parentProperty" : "a parent document",
"myJoinField" : "parent"
},
"inner_hits" : {
"opr1" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.2814486,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.2814486,
"_routing" : "1",
"_source" : {
"id" : 2,
"childProperty" : "queensland civil administration",
"myJoinField" : {
"name" : "mychild",
"parent" : "1"
}
}
}
]
}
}
}
}
]
}
}
I appreciate it if someone tells me what I did wrong in the first query or if this is the default behavior in elasitc when it comes to parent/child relationship.
Trying to run a terms query on elastic search and couldn't figure out how to limit the returns to only unique results?
Assuming this is the query.
"query": {
"bool": {
"must": [{
"terms": {
"id": [
"1",
"2",
"3",
],
"boost": 1.0
}
}],
"adjust_pure_negative": true,
"boost": 1.0
}
},
"aggs": {
"top-results": {
"terms": {
"field": "id"
},
"aggs": {
"test": {
"top_hits": {
"size": 1
}
}
}
}
}
Ideally I would like to only have 3 results returned each one matching a id of 1, 2, or 3, but this query returns a lot more than that.
In order to mimic your scenario, have pushed a set of 5 records of employees in elasticsearch having different salaries. So, I am trying to fetch the salaries listed with one record (top-hit) each.
GET /employee/_doc/_search
{
"query": {
"bool": {
"should": [
{ "match": { "salary": 90000 }},
{ "match": { "salary": 80000 }}
]
}
},
"size" : 0,
"aggs": {
"salaries": {
"terms": {
"field": "salary",
"order": { "top_score": "desc" }
},
"aggs": {
"top_score": { "max": { "script": "_score" }},
"salary-num": { "top_hits": { "size": 1 }}
}
}
}
}
OUTPUT
{
...
"aggregations" : {
"salaries" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 80000,
"doc_count" : 2,
"top_score" : {
"value" : 1.0
},
"salary-num" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "employee",
"_type" : "_doc",
"_id" : "2",
"_score" : 1.0,
"_source" : {
"id" : 10,
"name" : "Lydia",
"dept" : "HR",
"salary" : 80000
}
}
]
}
}
},
{
"key" : 90000,
"doc_count" : 1,
"top_score" : {
"value" : 1.0
},
"salary-num" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "employee",
"_type" : "_doc",
"_id" : "3",
"_score" : 1.0,
"_source" : {
"id" : 20,
"name" : "Flora",
"dept" : "Accounts",
"salary" : 90000
}
}
]
}
}
}
]
}
}
}
How to go about bucketing on a field and then aggregating all the values of a different field into an array. Here's a sample list.
{
"product": "xyz",
"action": "add",
"user": "bob"
},
{
"product": "xyz",
"action": "update",
"user": "bob"
},
{
"product": "xyz",
"action": "add",
"user": "alice"
},
{
"product": "xyz",
"action": "add",
"user": "eve"
},
{
"product": "xyz",
"action": "delete",
"user": "eve"
}
Expected output:
{
"buckets": [
{
"key": "add",
"doc_count": 3,
"user": ["bob", "alice", "eve"]
},
{
"key": "update",
"doc_count": 1,
"user": ["bob"]
},
{
"key": "delete",
"doc_count": 1,
"user": ["eve"]
}
]
}
How to push user values to an array in each bucket? Is there something similar to mongodb $push or $addToFields in elastic aggregation? Appreciate the help.
Here's the work-in-progress aggregation.
{
"size": 0,
"aggs": {
"product_filter": {
"filter": {
"term": {
"product": "xyz"
}
},
"aggs": {
"group_by_action": {
"terms": {
"field": "action",
"size":1000,
"order": {
"_count": "desc"
}
}
}
}
}
}
}
Would this do? I just added chained one more Terms Aggregation as mentioned below:
Aggregation Query:
POST <your_index_name>
{
"size": 0,
"aggs": {
"product_filter": {
"filter": {
"term": {
"product": "xyz"
}
},
"aggs": {
"group_by_action": {
"terms": {
"field": "action",
"size":1000,
"order": {
"_count": "desc"
}
},
"aggs": {
"myUsers": {
"terms": {
"field": "user",
"size": 10
}
}
}
}
}
}
}
}
Response:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"product_filter" : {
"doc_count" : 5,
"group_by_action" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "add",
"doc_count" : 3,
"myUsers" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "alice",
"doc_count" : 1
},
{
"key" : "bob",
"doc_count" : 1
},
{
"key" : "eve",
"doc_count" : 1
}
]
}
},
{
"key" : "delete",
"doc_count" : 1,
"myUsers" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "eve",
"doc_count" : 1
}
]
}
},
{
"key" : "update",
"doc_count" : 1,
"myUsers" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "bob",
"doc_count" : 1
}
]
}
}
]
}
}
}
}
I'm not sure if it is possible to have them in a single list as you've mentioned.
Hope this helps!
Say we have following documents in elasticsearch:
[{
"person": {
'name': 'asqar'
},
"bill": [
{
code:2,
value: 210
},
{
code:3,
value: 330
},
{
code:8,
value: 220
},
]
},
{
"person": {
'name': 'asqar'
},
"bill": [
{
code:2,
value: 340
},
{
code:4,
value: 340
},
{
code:1,
value: 200
},
]
},
{
"person": {
'name': 'asqar'
},
"bill": [
{
code:2,
value: 810
},
{
code:4,
value: 630
},
{
code:8,
value: 220
},
]
}]
I want to apply aggregate function on specific object in the bill array iwth some condition, for example I want calculate avg of value which its code is 2.
Field bill needs to be created as nested object to filter on it.
You can then use filter aggregation
Mapping:
PUT testindex/_mapping
{
"properties": {
"person": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
}
},
"bill": {
"type": "nested",
"properties": {
"code": {
"type": "integer"
},
"value":{
"type": "double"
}
}
}
}
}
Data:
"hits" : [
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "422tAWsBd-1D6Ztt1_Tb",
"_score" : 1.0,
"_source" : {
"person" : {
"name" : "asqar"
},
"bill" : [
{
"code" : 2,
"value" : 210
},
{
"code" : 3,
"value" : 330
},
{
"code" : 8,
"value" : 220
}
]
}
},
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "5G2uAWsBd-1D6ZttpfR9",
"_score" : 1.0,
"_source" : {
"person" : {
"name" : "asqar"
},
"bill" : [
{
"code" : 2,
"value" : 340
},
{
"code" : 4,
"value" : 340
},
{
"code" : 1,
"value" : 200
}
]
}
},
{
"_index" : "testindex",
"_type" : "_doc",
"_id" : "5W2vAWsBd-1D6ZttQfQ_",
"_score" : 1.0,
"_source" : {
"person" : {
"name" : "asqar"
},
"bill" : [
{
"code" : 2,
"value" : 810
},
{
"code" : 4,
"value" : 630
},
{
"code" : 8,
"value" : 220
}
]
}
}
]
Query:
GET testindex/_search
{
"size": 0,
"aggs": {
"terms_agg": {
"terms": {
"field": "person.name.keyword"
},
"aggs": {
"bill": {
"nested": {
"path": "bill"
},
"aggs": {
"bill_code": {
"filter": {
"term": {
"bill.code": 2
}
},
"aggs": {
"average": {
"avg": {
"field": "bill.value"
}
}
}
}
}
}
}
}
}
}
Output:
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"terms_agg" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "asqar",
"doc_count" : 3,
"bill" : {
"doc_count" : 9,
"bill_code" : {
"doc_count" : 3,
"average" : {
"value" : 453.3333333333333
}
}
}
}
]
}
}
You need to first make sure that the bill field is of nested type. Then you can use nested aggregation to deal with nested documents. You can use terms aggregation on bill.code and a child avg aggregation on field bill.value to this terms aggregation. This will give you average value for each code. Now since you want only aggregation against the code 2, you can make use of bucket selector aggregation to filter and get only bucket with code 2.
So the final aggregation query will look as below:
{
"aggs": {
"VALUE_NESTED": {
"nested": {
"path": "bill"
},
"aggs": {
"VALUE_TERM": {
"terms": {
"field": "bill.code"
},
"aggs": {
"VALUE_AVG": {
"avg": {
"field": "bill.value"
}
},
"CODE": {
"max": {
"field": "bill.code"
}
},
"CODE_FILTER": {
"bucket_selector": {
"buckets_path": {
"code": "CODE"
},
"script": "params.code == 2"
}
}
}
}
}
}
}
}
Sample o/p for above:
"aggregations": {
"VALUE_NESTED": {
"doc_count": 9,
"VALUE_TERM": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": 2,
"doc_count": 3,
"CODE": {
"value": 2
},
"VALUE_AVG": {
"value": 453.3333333333333
}
}
]
}
}
}