sql to es : get limit page and order result on agg - elasticsearch

SELECT
max( timestamp ) AS first_time,
min( timestamp ) AS last_time,
src_ip,
threat_target ,
count(*) as count
FROM
traffic
GROUP BY
src_ip,
threat_target
ORDER BY
first_time desc
LIMIT 0 ,10
I want to get this result, but I don't know how to get limit size and where to use sort
{
"size": 0,
"aggregations": {
"src_ip": {
"aggregations": {
"threat_target": {
"aggregations": {
"last_time": {
"max": {
"field": "`timestamp`"
}
},
"first_time": {
"min": {
"field": "`timestamp`"
}
}
},
"terms": {
"field": "threat_target.keyword"
}
}
},
"terms": {
"field": "src_ip.keyword"
}
}
}
}

Aggregation Pagination is generally not supported in Elastic Search, however, composite aggregation provides a way to paginate your aggregation.
Unlike the other multi-bucket aggregation the composite aggregation can be used to paginate all buckets from a multi-level aggregation efficiently.
Excerpt from Composite-Aggregation ES Docs.
CHECK: THIS
Except "ORDER BY first_time desc", below query should run fine for you. I don't think ordering on any fields other than the grouping fields (src_ip,
threat_target) is possible.
GET traffic/_search
{
"size": 0,
"aggs": {
"my_bucket": {
"composite": {
"size": 2, //<=========== PAGE SIZE
/*"after":{ // <========== INCLUDE THIS FROM Second request onwards, passing after_key of the last output here for next page
"src_ip" : "1.2.3.5",
"threat_target" : "T3"
},*/
"sources": [
{
"src_ip": {
"terms": {
"field": "source_ip",
"order": "desc"
}
}
},
{
"threat_target": {
"terms": {
"field": "threat_target"
}
}
}
]
},
"aggs": {
"first_time": {
"max": {
"field": "first_time"
}
}
}
}
}
}

Related

querydsl group by and sum of a column for each group

I have a query that i use to filter and get sum of a column for each group in the dev tool.
GET readers/_search
{
"size": 0,
"aggs": {
"usr_agg": {
"terms": {
"field": "assetType.keyword"
},
"aggs": {
"by_device_os": {
"sum": {
"field": "count"
}
}
}
}
}
}
how can i apply this as a filter to get this as a table?

ElasticSearch Approximating GROUP_BY, SUM, SORT, with PAGINATION

I'm using Elasticsearch and I see questions now and then about doing some aggregations with sorting or aggregations with paging, but I never see anything GROUP_BY, SUM, SORT, and PAGINATION together. If I were to write what I'm looking for as SQL, here it is (without the PAGINATION).
select invoice_date, address_2, company_name, sum(amount)
from my_table
group by invoice_date, address_2, company_name
order by sum(amount) desc
I tried doing this using many different techniques like composite aggregation, however it appears I can't do the ORDER_BY with this on the summation.
# composite aggregation
POST /746ee3a6-2b87-4288-9f20-3bf3a9e47e93/_search
{
"size": 0,
"aggs": {
"my_buckets": {
"composite": {
"sources": [
{ "Address2": { "terms": { "field": "Address2" } } },
{ "Company_Description": { "terms": { "field": "Company_Description" } } },
{ "InvoiceDate": { "terms": { "field": "InvoiceDate" } } }
]
},
"aggregations": {
"summation": {
"sum": { "field": "GrossValue" }
}
}
}
}
}
I tried repeated nested aggregations but I saw a comment somewhere that with many nested levels you can't ORDER_BY either.
POST /746ee3a6-2b87-4288-9f20-3bf3a9e47e93/_search
{
"size":0,
"from":0,
"sort":[{"Address2":"asc"}],
"query":{"bool":{"must":[{"match":{"taxonomy_full_code":-1}}]}},
"track_total_hits":true,
"aggs":{
"agg0":{
"terms":{"field":"Address2"},
"aggs":{
"agg1":{
"terms":{"field":"Company_Description"},
"aggs":{
"agg2":{
"terms":{"field":"InvoiceDate"},
"aggs":{"sum(GrossValue)":{"sum":{"field":"GrossValue"}}
}
}
}
}
}
}
}
}
Same with multi-term aggregation
# multi-term aggregation
POST /746ee3a6-2b87-4288-9f20-3bf3a9e47e93/_search
{
"size": 0,
"aggs": {
"rule_builder": {
"multi_terms": {
"terms": [
{"field": "Address2"},
{"field": "Company_Description"},
{"field": "InvoiceDate"}
]
},
"aggs":{
"sum(GrossValue)":{"sum":{"field":"GrossValue"}}
}
}
}
}
I'm using Elasticsearch 7.14. Is what I'm looking for possible in this version?

Elasticsearch: Aggregate all unique values of a field and apply a condition or filter by another field

My documents look like this:
{
"ownID": "Val_123",
"parentID": "Val_456",
"someField": "Val_78",
"otherField": "Val_90",
...
}
I am trying to get all (unique, as in one instance) results for a list of ownID values, while filtering by a list of parentID values and vice-versa.
What I did so far is:
Get (separate!) unique values for ownID and parentID in key1 and key2
{
"size": 0,
"aggs": {
"key1": {
"terms": {
"field": "ownID",
"include": {
"partition": 0,
"num_partitions": 10
},
"size": 100
}
},
"key2": {
"terms": {
"field": "parentID",
"include": {
"partition": 0,
"num_partitions": 10
},
"size": 100
}
}
}
}
Use filter to get (some) results matching either ownID OR parentID
{
"size": 0,
"query": {
"bool": {
"should": [
{
"terms": {
"ownID": ["Val_1","Val_2","Val_3"]
}
},
{
"terms": {
"parentID": ["Val_8","Val_9"]
}
}
]
}
},
"aggs": {
"my_filter": {
"top_hits": {
"size": 30000,
"_source": {
"include": ["ownID", "parentID","otherField"]
}
}
}
}
}
However, I need to get separate results for each filter in the second query, and get:
(1) the parentID of the documents matching some value of ownID
(2) the ownID for the documents matching some value of parentID.
So far I managed to do it using two similar queries (see below for (1)), but I would ideally want to combine them and query only once.
{
"size": 0,
"query": {
"bool": {
"should": [
{
"terms": {
"ownID": [ "Val1", Val_2, Val_3 ]
}
}
]
}
},
"aggs": {
"my_filter": {
"top_hits": {
"size": 30000,
"_source": {
"include": "parentID"
}
}
}
}
}
I'm using Elasticsearch version 5.2
If I got your question correctly then you need to get all the aggregations count correct irrespective of the filter query but in search hits you want the filtered documents only, so for this elasticsearch has another type of filter : "post filter" : refer to this : https://www.elastic.co/guide/en/elasticsearch/reference/5.5/search-request-post-filter.html
its really simple, it will just filter the results after the aggregations have been computed.

Paging the top_hits aggregation in ElasticSearch

Right now I'm doing a top_hits aggregation in Elastic Search that groups my data by a field, sorts the groups by a date, and chooses the top 1.
I need to somehow page this aggregation results in a way that I can pass through the pageSize and the pageNumber, but I don't know how.
In addition to this, I also need the total results of this aggregation so we can show it in a table in our web interface.
The aggregation looks like this:
POST my_index/_search
{
"size": 0,
"aggs": {
"top_artifacts": {
"terms": {
"field": "artifactId.keyword"
},
"aggs": {
"top_artifacts_hits": {
"top_hits": {
"size": 1,
"sort": [{
"date": {
"order": "desc"
}
}]
}
}
}
}
}
}
If I understand what you want, you should be able to do pagination through a Composite Aggregation. You can still pass your size parameter in your pagination, but your from would be the key for the bucket.
POST my_index/_search
{
"size": 0,
"aggs": {
"top_artifacts": {
"composite": {
"sources": [
{
"artifact": {
"terms": {
"field": "artifactId.keyword"
}
}
}
]
,
"size": 1, // OPTIONAL SIZE (How many buckets)
"after": {
"artifact": "FOO_BAZ" // Buckets after this bucket key
}
},
"aggs": {
"hits": {
"top_hits": {
"size": 1,
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}
}
}
}
}
}

Elasticsearch Ordering terms aggregation buckets after field in top hits sub aggregation

I would like to order the buckets from a terms aggregation based on a property possessed by the first element in a top hits aggregation.
My best effort query looks like this (with syntax errors):
{
"aggregations": {
"toBeOrdered": {
"terms": {
"field": "parent_uuid",
"size": 1000000,
"order": {
"topAnswer._source.id": "asc"
}
},
"aggregations": {
"topAnswer": {
"top_hits": {
"size": 1
}
}
}
}
}
}
Does anyone know how to accomplish this?
Example:
{
"a":1,
"b":2,
"id":4
}
{
"a":1,
"b":3,
"id":1
}
{
"a":2,
"b":4,
"id":3
}
Grouping by "a" and ordering the buckets by "id" (desc) and sorting the top hits on "b" (desc) would give:
{2:{
"a":2,
"b":4,
"id":3
},1:{
"a":1,
"b":3,
"id":1
}}
You can do it with the following query. The idea is to show for each parent_uuid bucket the first top hit with the minimum id value and to sort the parent_uuid buckets according the smallest id value as well using a min sub-aggregation.
{
"aggregations": {
"toBeOrdered": {
"terms": {
"field": "parent_uuid",
"size": 1000000,
"order": {
"topSort": "desc"
}
},
"aggregations": {
"topAnswer": {
"top_hits": {
"size": 1,
"sort": {
"b": "desc"
}
}
},
"topSort": {
"max": {
"field": "id"
}
}
}
}
}
}
Try it out and report if this works out for you.

Resources