elastic query to get events where corresponding pair is missing - elasticsearch

I have records of transaction which follow following lifecycle.
Event when transaction is received [RCVD]
Event when transaction gets pending for execution [PNDG] (OPTIONAL step)
Event when it gets executed [SENT]
Following are the 7 sample events in the index:
{trxID: 1, status:RCVD}
{trxID: 2, status:RCVD}
{trxID: 3, status:RCVD}
{trxID: 2, status:PNDG}
{trxID: 3, status:PNDG}
{trxID: 1, status:SENT}
{trxID: 2, status:SENT}
I need to find all the transactions which went to pending state but not executed yet. In other word there should be PNDG status for transaction but not SENT.
I am trying not to do it at java layer.
I did an aggregation on trxID, and then I did sub aggregation on status.
Then I cannot figure out how to get those records where bucket has only PNDG in sub-aggregation. I am not sure if I am thinking in right direction.
The result I am expecting is trxID 3 because for this transaction ,we got PNDG status but did not get SENT yet. On the other hand TrxUD 1 should not be reported as it never went to PNDG (pending) state irrespective of if SENT status is reported of not.

You can use count of status under a transaction id.
GET index24/_search
{
"size": 0,
"aggs": {
"transactionId": {
"terms": {
"field": "trxID",
"size": 10
},
"aggs": {
"status": {
"terms": {
"field": "status.keyword",
"size": 10
}
},
"count": {
"cardinality": {
"field": "status.keyword"
}
},
"my_bucketselector": {
"bucket_selector": {
"buckets_path": {
"statusCount": "count"
},
"script": "params.statusCount==1"
}
}
}
}
}
}
Response:
"aggregations" : {
"transactionId" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 4,
"doc_count" : 1,
"count" : {
"value" : 1
},
"status" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "PNDG",
"doc_count" : 1
}
]
}
}
]
}
}
EDIT 1:
I have tried with below :-
Get max date for a transaction id and then get date under pending . If both dates are same then pending is the last status
Data:
[
{
"_index" : "index24",
"_type" : "_doc",
"_id" : "aYCs0m0BD5PlkoxXxO36",
"_score" : 1.0,
"_source" : {
"trxID" : 1,
"status" : "RCVD",
"date" : "2019-10-15T12:00:00"
}
},
{
"_index" : "index24",
"_type" : "_doc",
"_id" : "aoCs0m0BD5PlkoxX7e35",
"_score" : 1.0,
"_source" : {
"trxID" : 1,
"status" : "PNDG",
"date" : "2019-10-15T12:01:00"
}
},
{
"_index" : "index24",
"_type" : "_doc",
"_id" : "a4Ct0m0BD5PlkoxXCO06",
"_score" : 1.0,
"_source" : {
"trxID" : 1,
"status" : "SENT",
"date" : "2019-10-15T12:02:00"
}
},
{
"_index" : "index24",
"_type" : "_doc",
"_id" : "bICt0m0BD5PlkoxXQe0Y",
"_score" : 1.0,
"_source" : {
"trxID" : 2,
"status" : "RCVD",
"date" : "2019-10-15T12:00:00"
}
},
{
"_index" : "index24",
"_type" : "_doc",
"_id" : "bYCt0m0BD5PlkoxXZO2x",
"_score" : 1.0,
"_source" : {
"trxID" : 2,
"status" : "PNDG",
"date" : "2019-10-15T12:01:00"
}
},
{
"_index" : "index24",
"_type" : "_doc",
"_id" : "boCt0m0BD5PlkoxXju1H",
"_score" : 1.0,
"_source" : {
"trxID" : 3,
"status" : "RCVD",
"date" : "2019-10-15T12:00:00"
}
},
{
"_index" : "index24",
"_type" : "_doc",
"_id" : "b4Ct0m0BD5PlkoxXou0-",
"_score" : 1.0,
"_source" : {
"trxID" : 3,
"status" : "SENT",
"date" : "2019-10-15T12:01:00"
}
}
]
Query:
GET index24/_search
{
"size": 0,
"aggs": {
"transactionId": {
"terms": {
"field": "trxID",
"size": 10000
},
"aggs": {
"maxDate": {
"max": {
"field": "date" ---> get max date under transactions
}
},
"pending_status": {
"filter": {
"term": {
"status.keyword": "PNDG" ---> filter for pending
}
},
"aggs": {
"filtered_maxdate": {
"max": {
"field": "date" --> get date under pending
}
}
}
},
"buckets_latest_status_pending": { -->filter if max date==pending date
"bucket_selector": {
"buckets_path": {
"filtereddate": "pending_status>filtered_maxdate",
"maxDate": "maxDate"
},
"script": "params.filtereddate==params.maxDate"
}
}
}
}
}
}
Response:
{
"transactionId" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2, --> only transaction id 2 is returned
"doc_count" : 2,
"pending_status" : {
"doc_count" : 1,
"filtered_maxdate" : {
"value" : 1.57114086E12,
"value_as_string" : "2019-10-15T12:01:00.000Z"
}
},
"maxDate" : {
"value" : 1.57114086E12,
"value_as_string" : "2019-10-15T12:01:00.000Z"
}
}
]
}
}

I did an aggregation on trxID, and then I did sub aggregation on status.
That's a great start !!!
Now, you can leverage the bucket_selector pipeline aggregation in order to surface only the transactions which have only 1 or 2 documents, i.e. the script condition params.eventCount < 3 makes sure to catch all buckets that have RCVD and/or PNDG documents but no SENT documents:
POST events/_search
{
"size": 0,
"aggs": {
"trx": {
"terms": {
"field": "trxID",
"size": 1000
},
"aggs": {
"count": {
"cardinality": {
"field": "status.keyword"
}
},
"not_sent": {
"bucket_selector": {
"buckets_path": {
"eventCount": "count"
},
"script": "params.eventCount < 3"
}
}
}
}
}
}
In your case, this would yield this, i.e. only event with trxID = 3:
"aggregations" : {
"trx" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 3,
"doc_count" : 2,
"count" : {
"value" : 2
}
}
]
}
}

Related

is there a way of showing documents after a sum aggregation?

I've been trying lately to retrieve information about sales on Kibana DSL.
I've been told to show vendors information PLUS their monthly sales.
(I'll use the "Kibana_sample_data_ecommerce" for this example)
I already did this aggregation in order to group all clients by their 'customer_id':
#Aggregations (group by)
GET kibana_sample_data_ecommerce/_search
{
"size": 0,
"aggs": {
"by user_id": {
"terms": {
"field": "customer_id"
},
"aggs": {
"add_field_to_bucket": {
"top_hits": {"size": 1, "_source": {"includes": ["customer_full_name"]}}
}
}
}
}
}
in which i've included customer_full_name in the result:
"aggregations" : {
"by user_id" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 2970,
"buckets" : [
{
"key" : "27",
"doc_count" : 348,
"add_field_to_bucket" : {
"hits" : {
"total" : 348,
"max_score" : 1.0,
"hits" : [
{
"_index" : "kibana_sample_data_ecommerce",
"_type" : "_doc",
"_id" : "fhwUR3sBpfDKGuVlpu8r",
"_score" : 1.0,
"_source" : {
"customer_full_name" : "Elyssa Underwood"
}
}
]
}
}
}
So, in this result i know that 'Elyssa Underwood' with 'customerid' '27' has 348 hits (or documents related).
Also i recquire to know the total spent by 'Elyssa' on those products, using the field 'products.taxful_price'.
The thing is that i cannot perform a subaggregation on top_hits (as far as i know); Also I've tried to do a sum_aggregation, but it ends on the same result (i got my sum, but i cannot access top_hits sub aggregation at that point).
At the end of the day i want to have a result like this:
"hits" : [
{
"_index" : "kibana_sample_data_ecommerce",
"_type" : "_doc",
"_id" : "fhwUR3sBpfDKGuVlpu8r",
"_score" : 1.0,
"_source" : {
"customer_full_name" : "Elyssa Underwood",
"total_spent": 1234.5678
}
}
]
Is there something I can do to achieve it?.
PS: I'm using ElasticSearch 5.x and also I have access to NEST client, if there's a solution I can reach through it.
Thanks In Advance.
I have used below as sample data.
Data:
{
"customer_id":2,
"client-name":"b",
"purchase": 2001
}
Query:
GET index/_search
{
"size": 0,
"aggs": {
"NAME": {
"terms": {
"field": "customer_id",
"size": 10
},
"aggs": {
"total_sales": {
"sum": {
"field": "purchase"
}
},
"documents":{
"top_hits": {
"size": 10
}
}
}
}
}
}
Result:
{
"key" : 2,
"doc_count" : 1,
"documents" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "index1",
"_type" : "_doc",
"_id" : "0HPzcHsBjw4ziwrzGzrq",
"_score" : 1.0,
"_source" : {
"customer_id" : 2,
"client-name" : "b",
"purchase" : 2001
}
}
]
}
},
"total_sales" : {
"value" : 2001.0
}
}

Aggregating multiple values of single key into a single bucket elasticsearch

I have a elastic search index with following mapping
{
"probe_alert" : {
"mappings" : {
"alert" : {
"properties" : {
"id" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
},
"probeChannelId" : {
"type" : "long"
},
"severity" : {
"type" : "integer"
},
}
}
}
}
}
Sample indexed data : For each channel index has a severity value
[
{
"_index" : "probe_alert",
"_type" : "alert",
"_id" : "b_cu0nYB8EMvknGcmMxk",
"_score" : 0.0,
"_source" : {
"id" : "b_cu0nYB8EMvknGcmMxk",
"probeChannelId" : 15,
"severity" : 2,
}
},
{
"_index" : "probe_alert",
"_type" : "alert",
"_id" : "b_cu0nYB8EMvknGcmMxk",
"_score" : 0.0,
"_source" : {
"id" : "b_cu0nYB8EMvknGcmMxk",
"probeChannelId" : 17,
"severity" : 5,
}
},
{
"_index" : "probe_alert",
"_type" : "alert",
"_id" : "b_cu0nYB8EMvknGcmMxk",
"_score" : 0.0,
"_source" : {
"id" : "b_cu0nYB8EMvknGcmMxk",
"probeChannelId" : 18,
"severity" : 10,
}
},
{
"_index" : "probe_alert",
"_type" : "alert",
"_id" : "b_cu0nYB8EMvknGcmMxk",
"_score" : 0.0,
"_source" : {
"id" : "b_cu0nYB8EMvknGcmMxk",
"probeChannelId" : 19,
"severity" : 5,
}
},
{
"_index" : "probe_alert",
"_type" : "alert",
"_id" : "b_cu0nYB8EMvknGcmMxk",
"_score" : 0.0,
"_source" : {
"id" : "b_cu0nYB8EMvknGcmMxk",
"probeChannelId" :20,
"severity" : 10,
}
}
]
I have done terms aggregation for fetching max severity value for a single probeChannelId but now I want to aggregate on multiple values of probeChannelId and get max value of severity.
Expected Result :
"aggregations" : {
"aggs_by_channels" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : [15,17],
"doc_count" : 1,
"aggs_by_severity" : {
"value" : 5.0
}
},
{
"key" : [18,19,20],
"doc_count" : 1,
"aggs_by_severity" : {
"value" : 10.0
}
}
]
}
}
In response i want group of values probeChannelId to have highest severity value
If you want to get the highest severity value, among a set of documents, then you can try out the below query using the Adjacency matrix aggregation
Search Query:
{
"size": 0,
"aggs": {
"interactions": {
"adjacency_matrix": {
"filters": {
"[15,17]": {
"terms": {
"probeChannelId": [
15,
17
]
}
},
"[18,19,20]": {
"terms": {
"probeChannelId": [
18,
19,
20
]
}
}
}
},
"aggs": {
"max_severity": {
"max": {
"field": "severity"
}
}
}
}
}
}
Search Result:
"aggregations": {
"interactions": {
"buckets": [
{
"key": "[15,17]",
"doc_count": 2,
"max_severity": {
"value": 5.0 // note this
}
},
{
"key": "[18,19,20]",
"doc_count": 3,
"max_severity": {
"value": 10.0 // note this
}
}
]
}

Elastic Search Intersection Query

I want to fetch common words of list of users sorted by total count.
example:
I have a index of words used by a user.
docs:
[
{
user_id: 1,
word: 'food',
count: 2
},
{
user_id: 1,
word: 'thor',
count: 1
},
{
user_id: 1,
word: 'beer',
count: 7
},
{
user_id: 2,
word: 'summer',
count: 12
},
{
user_id: 2,
word: 'thor',
count: 4
},
{
user_id: 1,
word: 'beer',
count: 2
},
..otheruserdetails..
]
input: user_ids: [1, 2]
desired output:
[
{
'word': 'beer',
'total_count': 9
},
{
'word': 'thor',
'total_count': 5
}
]
what I have so far:
fetch all docs using user_id in user_id list (bool should query)
process docs in app layer.
loop through each keyword
check if keyword is present for each user_id
if yes, find count
else, dispose and go to next keyword
However, this is not feasible because word docs are gonna grow huge and app layer won't keep-up. any way to move this to ES query?
You can use Terms aggregation and Value Count aggregation
One can look at "Terms aggregation" as a "Group By". Output will give a unique list of userIds, list of all words under user and finally count of each word
{
"from": 0,
"size": 10,
"query": {
"terms": {
"user_id": [
"1",
"2"
]
}
},
"aggs": {
"users": {
"terms": {
"field": "user_id",
"size": 10
},
"aggs": {
"words": {
"terms": {
"field": "word.keyword",
"size": 10
},
"aggs": {
"word_count": {
"value_count": {
"field": "word.keyword"
}
}
}
}
}
}
}
}
Result
"hits" : [
{
"_index" : "index89",
"_type" : "_doc",
"_id" : "gFRzr3ABAWOsYG7t2tpt",
"_score" : 1.0,
"_source" : {
"user_id" : 1,
"word" : "thor",
"count" : 1
}
},
{
"_index" : "index89",
"_type" : "_doc",
"_id" : "flRzr3ABAWOsYG7t0dqI",
"_score" : 1.0,
"_source" : {
"user_id" : 1,
"word" : "food",
"count" : 2
}
},
{
"_index" : "index89",
"_type" : "_doc",
"_id" : "f1Rzr3ABAWOsYG7t19ps",
"_score" : 1.0,
"_source" : {
"user_id" : 2,
"word" : "thor",
"count" : 4
}
},
{
"_index" : "index89",
"_type" : "_doc",
"_id" : "gVRzr3ABAWOsYG7t8NrR",
"_score" : 1.0,
"_source" : {
"user_id" : 1,
"word" : "food",
"count" : 2
}
},
{
"_index" : "index89",
"_type" : "_doc",
"_id" : "glRzr3ABAWOsYG7t-Npj",
"_score" : 1.0,
"_source" : {
"user_id" : 1,
"word" : "thor",
"count" : 1
}
},
{
"_index" : "index89",
"_type" : "_doc",
"_id" : "g1Rzr3ABAWOsYG7t_9po",
"_score" : 1.0,
"_source" : {
"user_id" : 2,
"word" : "thor",
"count" : 4
}
}
]
},
"aggregations" : {
"users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 1,
"doc_count" : 4,
"words" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "food",
"doc_count" : 2,
"word_count" : {
"value" : 2
}
},
{
"key" : "thor",
"doc_count" : 2,
"word_count" : {
"value" : 2
}
}
]
}
},
{
"key" : 2,
"doc_count" : 2,
"words" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "thor",
"doc_count" : 2,
"word_count" : {
"value" : 2
}
}
]
}
}
]
}
}
You can use aggregations along with filter for the user like below:
{
"size": 0,
"aggs": {
"words_stats": {
"filter": {
"terms": {
"user_id": [
"1",
"2"
]
}
},
"aggs": {
"words": {
"terms": {
"field": "word.keyword"
},
"aggs": {
"total_count": {
"sum": {
"field": "count"
}
}
}
}
}
}
}
}
The results will be:
{
"key" : "beer",
"doc_count" : 2,
"total_count" : {
"value" : 9.0
}
},
{
"key" : "thor",
"doc_count" : 2,
"total_count" : {
"value" : 5.0
}
},
{
"key" : "food",
"doc_count" : 1,
"total_count" : {
"value" : 2.0
}
},
{
"key" : "summer",
"doc_count" : 1,
"total_count" : {
"value" : 12.0
}
}
Here is what I had to do:
I have referred to #Rakesh Chandru & #jaspreet chahal's answers' and came up with this. this query handles intersection and sorting.
Process:
filter by user_ids
group_by(terms aggs) on keyword (word in example),
order by aggregating (sum) counts
{
size: 0, // because we do not want result of filtered records
query: {
terms: { user_id: user_ids } // filter by user_ids
},
aggs: {
group_by_keyword: {
terms: {
field: "keyword", // group by keyword
min_doc_count: 2, // where count >= 2
order: { agg_count: "desc" }, // order by count
size
},
aggs: {
agg_count: {
sum: {
field: "count" // aggregating count
}
}
}
}
}
}

GET TOP HIT FROM A VALUE IF THIS IS 0 KIBANA

My first post, I spend the weekend looking for an answer without a good result
I will try to explain my issue, I have this Index
ST ID
0 1
1 1
0 2
1 2
0 2
1 3
0 3
For example, I need to show the last records from each ID when them are 0, for example, in this index I have to show only ID 1 and ID 2, becuase the last record has ST to 0 in ID 1 and 2
Could some try to help me with this issue?
BR
Mapping:
PUT index34
{
"mappings": {
"properties": {
"ST":{
"type": "integer"
},
"ID":{
"type": "integer"
},
"Date":{
"type": "date"
}
}
}
}
Data:
[
{
"_index" : "index34",
"_type" : "_doc",
"_id" : "LO7Z7W0B_-hMjUaqtwHw",
"_score" : 1.0,
"_source" : {
"ST" : 1,
"ID" : 1,
"Date" : "2019-10-21T12:00:00Z"
}
},
{
"_index" : "index34",
"_type" : "_doc",
"_id" : "Le7Z7W0B_-hMjUaq0QEz",
"_score" : 1.0,
"_source" : {
"ST" : 0,
"ID" : 1,
"Date" : "2019-10-21T12:01:00Z"
}
},
{
"_index" : "index34",
"_type" : "_doc",
"_id" : "Lu7a7W0B_-hMjUaqAwE0",
"_score" : 1.0,
"_source" : {
"ST" : 1,
"ID" : 2,
"Date" : "2019-10-21T12:02:00Z"
}
},
{
"_index" : "index34",
"_type" : "_doc",
"_id" : "L-7a7W0B_-hMjUaqGAEr",
"_score" : 1.0,
"_source" : {
"ST" : 0,
"ID" : 2,
"Date" : "2019-10-21T12:04:00Z"
}
},
{
"_index" : "index34",
"_type" : "_doc",
"_id" : "MO7a7W0B_-hMjUaqNAGA",
"_score" : 1.0,
"_source" : {
"ST" : 0,
"ID" : 3,
"Date" : "2019-10-21T12:04:00Z"
}
},
{
"_index" : "index34",
"_type" : "_doc",
"_id" : "Me7a7W0B_-hMjUaqTQFP",
"_score" : 1.0,
"_source" : {
"ST" : 1,
"ID" : 3,
"Date" : "2019-10-21T12:06:00Z"
}
}
]
Query: I am getting max date for all terms and then getting the max value when ST was zero. If these two match(which means 0 was latest document) then I am keeping tha bucket
GET index34/_search
{
"size": 0,
"aggs": {
"ID": {
"terms": {
"field": "ID",
"size": 10000
},
"aggs": {
"maxDate": {
"max": {
"field": "Date"
}
},
"pending_status": {
"filter": {
"term": {
"ST": 0
}
},
"aggs": {
"filtered_maxdate": {
"max": {
"field": "Date"
}
}
}
},
"buckets_latest_status_pending": {
"bucket_selector": {
"buckets_path": {
"filtereddate": "pending_status>filtered_maxdate",
"maxDate": "maxDate"
},
"script": "params.filtereddate==params.maxDate"
}
}
}
}
}
}
Response:
"aggregations" : {
"ID" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 1,
"doc_count" : 2,
"pending_status" : {
"doc_count" : 1,
"filtered_maxdate" : {
"value" : 1.57165926E12,
"value_as_string" : "2019-10-21T12:01:00.000Z"
}
},
"maxDate" : {
"value" : 1.57165926E12,
"value_as_string" : "2019-10-21T12:01:00.000Z"
}
},
{
"key" : 2,
"doc_count" : 2,
"pending_status" : {
"doc_count" : 1,
"filtered_maxdate" : {
"value" : 1.57165944E12,
"value_as_string" : "2019-10-21T12:04:00.000Z"
}
},
"maxDate" : {
"value" : 1.57165944E12,
"value_as_string" : "2019-10-21T12:04:00.000Z"
}
}
]
}

elasticsearch groupby and filter by regex condition

It's a bit hard for me to define the question as I'm not very experienced with Elasticsearch. I'm focusing the question on my specific problem:
Assuming I have the following records:
{
id: 1
name: bla1_1.aaa
},
{
id: 1
name: bla1_2.bbb
},
{
id: 2
name: bla2_1.aaa
},
{
id: 2
name: bla2_2.aaa
}
What I want is to GET all the ids that have all of their names ending with aaa.
I was thinking about group by id and then do a regex query like so: *\.aaa so that all the name must satisfy the regex query.
On this particular example I would get id: 2 back.
How do I do it?
Let me know if there's anything I need to add to clarify the question.
RegexExp can be used.
Wildcard .* matches any character any number of times including zero
Terms aggregation will give you unique "ids" and number of docs under them.
Mapping :
PUT regex
{
"mappings": {
"properties": {
"id":{
"type":"integer"
},
"name":{
"type":"text",
"fields": {
"keyword":{
"type":"keyword"
}
}
}
}
}
}
Data:
"hits" : [
{
"_index" : "regex",
"_type" : "_doc",
"_id" : "olQXjW0BywGFQhV7k84P",
"_score" : 1.0,
"_source" : {
"id" : 1,
"name" : "bla1_1.aaa"
}
},
{
"_index" : "regex",
"_type" : "_doc",
"_id" : "o1QXjW0BywGFQhV7us6B",
"_score" : 1.0,
"_source" : {
"id" : 1,
"name" : "bla1_2.bbb"
}
},
{
"_index" : "regex",
"_type" : "_doc",
"_id" : "pFQXjW0BywGFQhV77c6J",
"_score" : 1.0,
"_source" : {
"id" : 2,
"name" : "bla2_1.aaa"
}
},
{
"_index" : "regex",
"_type" : "_doc",
"_id" : "pVQYjW0BywGFQhV7Dc6F",
"_score" : 1.0,
"_source" : {
"id" : 2,
"name" : "bla2_2.aaa"
}
}
]
Query:
GET regex/_search
{
"size":0,
"query": {
"regexp": {
"name.keyword": {
"value": ".*.aaa" ---> name ending with .aaa
}
}
},
"aggs": {
"unique_ids": {
"terms": {
"field": "id",
"size": 10
}
}
}
}
Result:
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"unique_ids" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : 2, ---> 2 doc under id 2
"doc_count" : 2
},
{
"key" : 1, ----> 1 doc under id 1
"doc_count" : 1
}
]
}
}
Edit:
Using bucket selector to keep buckets where total count of docs in Id matches with docs selected in regex
GET regex/_search
{
"size": 0,
"aggs": {
"unique_ids": {
"terms": {
"field": "id",
"size": 10
},
"aggs": {
"totalCount": { ---> to get total count of id(all docs)
"value_count": {
"field": "id"
}
},
"filter_agg": {
"filter": {
"bool": {
"must": [
{
"regexp": {
"name.keyword": ".*.aaa"
}
}
]
}
},
"aggs": {
"finalCount": { -->total count of docs matching regex
"value_count": {
"field": "id"
}
}
}
},
"mybucket_selector": { ---> include buckets where totalcount==finalcount
"bucket_selector": {
"buckets_path": {
"FinalCount": "filter_agg>finalCount",
"TotalCount": "totalCount"
},
"script": "params.FinalCount==params.TotalCount"
}
}
}
}
}
}

Resources