Elasticsearch summing buckets - elasticsearch

I have the following request which will return the count of all documents with a status of either "Accepted","Released" or closed.
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
}
],
"must_not": []
}
},
"aggs": {
"slices": {
"terms": {
"field": "status.raw",
"include": {
"pattern": "Accepted|Released|Closed"
}
}
}
}
}
In my case the response is:
"buckets": [
{
"key": "Closed",
"doc_count": 2216
},
{
"key": "Accepted",
"doc_count": 8
},
{
"key": "Released",
"doc_count": 6
}
]
Now I'd like to add all of them up into a single field.
I tried using pipeline aggregations and even tried the following sum_bucket (which apparently only works on multi-bucket):
"total":{
"sum_bucket":{
"buckets_path": "slices"
}
}
Anyone able to help me out with this?

With sum_bucket and your already existent aggregation:
"aggs": {
"slices": {
"terms": {
"field": "status.raw",
"include": {
"pattern": "Accepted|Released|Closed"
}
}
},
"sum_total": {
"sum_bucket": {
"buckets_path": "slices._count"
}
}
}

What I would do is to use the filters aggregation instead and define all the buckets you need, like this:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
}
],
"must_not": []
}
},
"aggs": {
"slices": {
"filters": {
"filters": {
"accepted": {
"term": {
"status.raw": "Accepted"
}
},
"released": {
"term": {
"status.raw": "Released"
}
},
"closed": {
"term": {
"status.raw": "Closed"
}
},
"total": {
"terms": {
"status.raw": [
"Accepted",
"Released",
"Closed"
]
}
}
}
}
}
}
}

You could add count with value_count sub aggregation and then use sum_bucket pipeline aggregation
{
"aggs": {
"unique_status": {
"terms": {
"field": "status.raw",
"include": "Accepted|Released|Closed"
},
"aggs": {
"count": {
"value_count": {
"field": "status.raw"
}
}
}
},
"sum_status": {
"sum_bucket": {
"buckets_path": "unique_status>count"
}
}
},
"size": 0
}

Related

How to scroll through aggregations

I have the below query:
{
"aggs": {
"user-ids": {
"terms": {
"field": "userId",
"size": 10000
},
"aggs": {
"excluded_tags_agg": {
"filter": {
"bool": {
"must": [
{
"match_phrase": {
"tag": "Yes"
}
},
{
"match_phrase": {
"tag": "No"
}
}
],
"minimum_should_match": 1
}
}
},
"filter_userids_which_do_not_have_any_docs_with_excluded_tags": {
"bucket_selector": {
"buckets_path": {
"doc_count": "excluded_tags_agg > _count"
},
"script": "params.doc_count == 0"
}
}
}
}
},
"size": 0
}
But I may have more than 10k results so I need to scroll through the buckets. I have used Composite before but not sure how to combine it with the above.

FIlter is not being applied to aggregation

I'm trying to get the billing of a product selled by a specific user, but it seems that the query is not being applied to the sum aggregation.
Could someone help me, please?
{
"query": {
"bool": {
"filter": [
{ "term": { "seller": 1 } },
{"term": { "product": 2 } }
]
}
},
"size": 0,
"aggs": {
"product": {
"terms": {
"field": "product"
},
"aggregations": {
"billing": {
"sum": {
"field": "price"
}
},
"aggregation": {
"bucket_sort": {
"sort": [
{
"billing": {
"order": "desc"
}
}
]
}
}
}
}
}
}
Try nesting your existing aggregations within another terms aggregation on "seller".
{
"query": {
"bool": {
"filter": [
{
"term": {
"seller": 1
}
},
{
"term": {
"product": 2
}
}
]
}
},
"size": 0,
"aggs": {
"seller": {
"terms": {
"field": "seller",
"size": 1
},
"aggs": {
"product": {
"terms": {
"field": "product",
"size": 1
},
"aggregations": {
"billing": {
"sum": {
"field": "price"
}
},
"aggregation": {
"bucket_sort": {
"sort": [
{
"billing": {
"order": "desc"
}
}
]
}
}
}
}
}
}
}
}

Add multiple filters to nested aggregation filters Elasticsearch

So I would like to add a couple more filters to the aggregate filter for the "inner" portion of the aggregate section. The other two filters I need to add are in the query section. I was able to get this code to work correctly, it just needs the second and third nested filters added from the first section down into the aggregate area, where I am only filtering by the "givingMatch.db_type" terms currently.
Here is the current code that just needs the additional filters added:
GET /testserver/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "givingMatch",
"query": {
"bool": {
"filter": {
"terms": {
"givingMatch.db_type": [
"FECmatch",
"StateMatch"
]
}
}
}
}
}
},
{
"nested": {
"path": "givingMatch",
"query": {
"bool": {
"filter": {
"range": {
"givingMatch.Status": {
"from": 0,
"to": 8
}
}
}
}
}
}
},
{
"nested": {
"path": "givingMatch",
"query": {
"bool": {
"filter": {
"range": {
"givingMatch.QualityScore": {
"from": 17
}
}
}
}
}
}
}
]
}
},
"aggs": {
"categories": {
"nested": {
"path": "givingMatch"
},
"aggs": {
"inner": {
"filter": {
"terms": {
"givingMatch.db_type":["FECmatch","StateMatch"]
}
},
"aggs":{
"org_category": {
"terms": {
"field": "givingMatch.org_category",
"size": 1000
},
"aggs": {
"total": {
"sum":{
"field": "givingMatch.low_gift"
}
}
}
}
}
}
}
}
},
"size": 0
}
Giving these results:
...."aggregations": {
"categories": {
"doc_count": 93084,
"inner": {
"doc_count": 65492,
"org_category": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "DEM",
"doc_count": 28829,
"total": {
"value": 29859163
}
},
{
"key": "REP",
"doc_count": 21561,
"total": {
"value": 69962305
}
},...
Hopefully this will save someone else a few hours. To add multiple filters, the aggregate section would become:
GET materielelectrique_search_alias/product/_search?explain=false
{
"aggs": {
"categories": {
"nested": {
"path": "givingMatch"
},
"aggs": {
"inner": {
"filter": {
"bool": {
"must": [
{
"terms": {
"givingMatch.db_type": [
"FECmatch",
"StateMatch"
]
}
},
{
"range": {
"givingMatch.QualityScore": {
"from": 17
}
}
},
{
"range": {
"givingMatch.Status": {
"from": 0,
"to": 8
}
}
}
]
}
},
"aggs": {
"org_category": {
"terms": {
"field": "givingMatch.org_category",
"size": 1000
},
"aggs": {
"total": {
"sum": {
"field": "givingMatch.low_gift"
}
}
}
}
}
}
}
}
}
}
This allows for multiple filters within the nested aggs.

Elasticsearch applying filters to aggregation

I'm trying to build a facets system using Elasticsearch to display the number of documents which match a query.
I'm currently doing this query on /_search?search_type=count:
{
"query": {
"query_string": {
"query": "status:(1|2) AND categories:A"
}
},
"aggs": {
"all_products": {
"global": {},
"aggs": {
"countries": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "country"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
},
"categories": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "category"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
},
"statuses": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "status"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
}
}
}
}
}
the documents have the following structure:
{
"id": 123,
"name": "Title",
"categories": ["A", "B", "C"],
"country": "United Kingdom",
"status": 1
}
so the output I'm looking for should be:
Country
UK: 123
USA: 1000
Category
Motors: 23
Fashion: 1100
Status
Active: 1120
Not Active: 3
I don't know how to filter properly the aggregations, because right now they are counting all the document in the specified field, without considering the query status:(1|2) AND categories:A.
The elastic version is 1.7.2.
You simply need to remove global aggregation since it is not influenced by the query, just move your countries, categories and statuses aggregations at the top level like this:
{
"query": {
"query_string": {
"query": "status:(1|2) AND categories:A"
}
},
"aggs": {
"countries": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "country"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
},
"categories": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "category"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
},
"statuses": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "status"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
}
}
}
Fabio. Ill see Your post on upwork, i have worked example for ES 2.4, may be it help You.
"index": "{{YOUR ELASTIC INDEX}}",
"type": "{{YOUR ELASTIC TYPE}}",
"body": {
"aggs": {
"trademarks": { // aggs NAME
"terms": {
"field": "id", // field name in ELASTIC base
"size": 100 // count of results YOU need
}
},
"materials": { //another aggs NAME
"terms": {
"field": "materials.name", // field name in ELASTIC base
"size": 100 / count of results YOU need
}
},
"certificate": {
"terms": {
"field": "certificate_type_id",
"size": 100
}
},
"country": {
"terms": {
"field": "country.id",
"size": 100
}
},
"price": {
"stats": {
"field": "price"
}
}
},
"from": 0, // start from
"size": 20, // results count
"query": {
"constant_score": {
"filter": { //apply filter
"bool": {
"should": [{ // all categories You need to show
"term": {
"categories": "10142"
}
}, {
"term": {
"categories": "10143"
}
}, {
"term": {
"categories": "10144"
}
}, {
"term": {
"categories": "10145"
}
}, {
"term": {
"categories": "12957"
}
}, {
"term": {
"categories": "13968"
}
}, {
"term": {
"categories": "14353"
}
}, {
"term": {
"categories": "16954"
}
}, {
"term": {
"categories": "18243"
}
}, {
"term": {
"categories": "10141"
}
}],
"must": [{ // if you want another filed to filter for example filter BY field trademark_id
"bool": {
"should": [{
"term": {
"trademark_id": "2872"
}
}, {
"term": {
"trademark_id": "2879"
}
}, {
"term": {
"trademark_id": "2914"
}
}]
}
}, {
"bool": { // filter by PRICE
"must": [{
"range": {
"price": {
"from": 5.97,
"to": 15752.69
}
}
}]
}
}]
}
}
}
},
"sort": { //here SORT BY desc or asc
"updated_at": "desc" //updated_at - field from ES base
}
}

Elasticsearch aggregations, get additional field in bucket

I query ES index to filter results and get aggregations by selected terms. A sample query is like this:
GET buyer_requests/vehicle_requests/_search
{
"query": {
"filtered": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
},
{
"range": {
"style.price": {
"gte": 15000,
"lte": 20000
}
}
},
{
"geo_distance": {
"distance": "20000km",
"info.pin": {
"lat": 42,
"lon": 21
}
}
}
]
}
}
},
"aggs": {
"makes": {
"filter": {
"range": {
"style.price": {
"gte": 5000,
"lte": 40000
}
}
},
"aggs": {
"makes": {
"terms": {
"field": "vehicle.make.raw",
"order": {
"_term": "asc"
}
}
}
}
},
"model": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
}
]
},
"aggs": {
"models": {
"terms": {
"field": "vehicle.model.raw",
"size": 10,
"order": {
"_term": "asc"
}
}
}
}
}
}
}
The result I get is something like:
How can I get in "buckets" section on "models" terms another field from result set. I want to get reference to Makes so the result would look like this:
"model": {
"doc_count": 7,
"models": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "3 Series",
"make": "bmw", <----------- this key
"doc_count": 3
},
{
"key": "4 Series",
"make": "bmw", <----------- this key
"doc_count": 4
},
{
"key": "Camaro",
"make": "chevrolet", <----------- this key
"doc_count": 2
}
]
}
}
You need to move your models aggregation as a sub-aggregation of the make aggregation and re-arrange the filter aggregation a bit. The result won't be syntactically like you expect, but semantically you'll get the data you need.
GET buyer_requests/vehicle_requests/_search
{
"query": {
"filtered": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
},
{
"range": {
"style.price": {
"gte": 15000,
"lte": 20000
}
}
},
{
"geo_distance": {
"distance": "20000km",
"info.pin": {
"lat": 42,
"lon": 21
}
}
}
]
}
}
},
"aggs": {
"makes": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
},
{
"range": {
"style.price": {
"gte": 5000,
"lte": 40000
}
}
}
]
},
"aggs": {
"makes": {
"terms": {
"field": "vehicle.make.raw",
"order": {
"_term": "asc"
}
},
"aggs": {
"models": {
"terms": {
"field": "vehicle.model.raw",
"size": 10,
"order": {
"_term": "asc"
}
}
}
}
}
}
}
}
}

Resources