I'm trying to build a facets system using Elasticsearch to display the number of documents which match a query.
I'm currently doing this query on /_search?search_type=count:
{
"query": {
"query_string": {
"query": "status:(1|2) AND categories:A"
}
},
"aggs": {
"all_products": {
"global": {},
"aggs": {
"countries": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "country"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
},
"categories": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "category"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
},
"statuses": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "status"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
}
}
}
}
}
the documents have the following structure:
{
"id": 123,
"name": "Title",
"categories": ["A", "B", "C"],
"country": "United Kingdom",
"status": 1
}
so the output I'm looking for should be:
Country
UK: 123
USA: 1000
Category
Motors: 23
Fashion: 1100
Status
Active: 1120
Not Active: 3
I don't know how to filter properly the aggregations, because right now they are counting all the document in the specified field, without considering the query status:(1|2) AND categories:A.
The elastic version is 1.7.2.
You simply need to remove global aggregation since it is not influenced by the query, just move your countries, categories and statuses aggregations at the top level like this:
{
"query": {
"query_string": {
"query": "status:(1|2) AND categories:A"
}
},
"aggs": {
"countries": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "country"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
},
"categories": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "category"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
},
"statuses": {
"aggs": {
"counter": {
"terms": ["min_doc_count": 0, "field": "status"],
"aggs": ["unique": ["cardinality": ["field": "id"]]]
}
}
}
}
}
Fabio. Ill see Your post on upwork, i have worked example for ES 2.4, may be it help You.
"index": "{{YOUR ELASTIC INDEX}}",
"type": "{{YOUR ELASTIC TYPE}}",
"body": {
"aggs": {
"trademarks": { // aggs NAME
"terms": {
"field": "id", // field name in ELASTIC base
"size": 100 // count of results YOU need
}
},
"materials": { //another aggs NAME
"terms": {
"field": "materials.name", // field name in ELASTIC base
"size": 100 / count of results YOU need
}
},
"certificate": {
"terms": {
"field": "certificate_type_id",
"size": 100
}
},
"country": {
"terms": {
"field": "country.id",
"size": 100
}
},
"price": {
"stats": {
"field": "price"
}
}
},
"from": 0, // start from
"size": 20, // results count
"query": {
"constant_score": {
"filter": { //apply filter
"bool": {
"should": [{ // all categories You need to show
"term": {
"categories": "10142"
}
}, {
"term": {
"categories": "10143"
}
}, {
"term": {
"categories": "10144"
}
}, {
"term": {
"categories": "10145"
}
}, {
"term": {
"categories": "12957"
}
}, {
"term": {
"categories": "13968"
}
}, {
"term": {
"categories": "14353"
}
}, {
"term": {
"categories": "16954"
}
}, {
"term": {
"categories": "18243"
}
}, {
"term": {
"categories": "10141"
}
}],
"must": [{ // if you want another filed to filter for example filter BY field trademark_id
"bool": {
"should": [{
"term": {
"trademark_id": "2872"
}
}, {
"term": {
"trademark_id": "2879"
}
}, {
"term": {
"trademark_id": "2914"
}
}]
}
}, {
"bool": { // filter by PRICE
"must": [{
"range": {
"price": {
"from": 5.97,
"to": 15752.69
}
}
}]
}
}]
}
}
}
},
"sort": { //here SORT BY desc or asc
"updated_at": "desc" //updated_at - field from ES base
}
}
Related
I want to get maximum and minimum value using group by channel id and also want to get maximum video id and minimum video id
{
"query": {
"term": {
"channel_id.keyword": {
"value": "UCQOd1f6pYldvhgvdQ_ktpGA"
}
}
},
"aggs": {
"views_max": {
"max": {
"field": "views",
"missing": 0
},
"_source":["video_id","views"]
},
"views_min": {
"min": {
"field": "views",
"missing": 0
},
"_source":["video_id","views"]
}
}
}
{
"aggs": {
"2": {
"terms": {
"field": "channel_id.keyword",
"order": {
"1": "desc"
},
"size": 10
},
"aggs": {
"1": {
"max": {
"field": "video_id"
}
},
"3": {
"min": {
"field": "video_id"
}
}
}
}
},
"size": 0,
"_source": {
"excludes": []
},
"query": {
"bool": {
"must": [],
"filter": [
{
"bool": {
"should": [
{
"match": {
"channel_id.keyword": "UCQOd1f6pYldvhgvdQ_ktpGA"
}
}
],
"minimum_should_match": 1
}
}
]
}
}
}
The above query will give the maximum and minimum of video_id for a particular channel_id.
{
"aggs": {
"2": {
"terms": {
"field": "channel_id.keyword",
"order": {
"1": "desc"
},
"size": 10
},
"aggs": {
"1": {
"max": {
"field": "video_id"
}
},
"3": {
"min": {
"field": "video_id"
}
}
}
}
},
"size": 0,
"_source": {
"excludes": []
}
}
With the above query, you will be able to fetch for all the distinct channel_id its respective maximum and minimum video_id
I'm trying to get the billing of a product selled by a specific user, but it seems that the query is not being applied to the sum aggregation.
Could someone help me, please?
{
"query": {
"bool": {
"filter": [
{ "term": { "seller": 1 } },
{"term": { "product": 2 } }
]
}
},
"size": 0,
"aggs": {
"product": {
"terms": {
"field": "product"
},
"aggregations": {
"billing": {
"sum": {
"field": "price"
}
},
"aggregation": {
"bucket_sort": {
"sort": [
{
"billing": {
"order": "desc"
}
}
]
}
}
}
}
}
}
Try nesting your existing aggregations within another terms aggregation on "seller".
{
"query": {
"bool": {
"filter": [
{
"term": {
"seller": 1
}
},
{
"term": {
"product": 2
}
}
]
}
},
"size": 0,
"aggs": {
"seller": {
"terms": {
"field": "seller",
"size": 1
},
"aggs": {
"product": {
"terms": {
"field": "product",
"size": 1
},
"aggregations": {
"billing": {
"sum": {
"field": "price"
}
},
"aggregation": {
"bucket_sort": {
"sort": [
{
"billing": {
"order": "desc"
}
}
]
}
}
}
}
}
}
}
}
I've a range for which no hitdocs exist. When a date_histogram aggregation based query is run with extended_bounds over this no-data range, nothing is returned.
However, for a range which has at least 1 hitdoc, buckets data is returned for the range as specified using extended_bounds.
How can I achieved similar results over a range with no hitdocs?
Sample query -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"terms": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
One can use missing aggregation for the same. Above query looks like this after update -
{
"size": 0,
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"term": {
"kind": "hit-search"
}
},
{
"range": {
"startTime": {
"gte": 1506429661000,
"lte": 1506516061000
}
}
}
]
}
}
}
},
"aggs": {
"perHost": {
"missing": {
"field": "user"
},
"aggs": {
"ts": {
"date_histogram": {
"field": "startTime",
"interval": "30m",
"min_doc_count": 0,
"extended_bounds": {
"min": 1506429661000,
"max": 1506516061000
}
},
"aggs": {
"numQuery": {
"cardinality": {
"field": "queryId"
}
}
}
}
}
}
},
"from": 0
}
An observation - extended_bounds doesn't seem to be working for missing.
{
"aggs": {
"by_countryCode": {
"terms": {
"field":"countryCode.keyword",
"size": 100
},
"aggs": {
"views": {"sum": {"field": "views"}},
"shares": {"sum": {"field": "shares"}}
}
}
},
"query": {
"bool": {
"must": [
{ "match": { "userId": 1 } },
{ "match": { "artistId": 1001 }},
{ "range": {
"date" : {
"gte" : "20170310",
"lte" : "20170312"
}
}
}
]
}
}
}
This will return the matched items and also give me the aggregation results too. The aggregation is the sum of the views and shares group by the country code.
But I want another sum aggregation. I want the "total sum" and "total shares" too, how could I do that?
Thanks!
Adding the two additional aggregations at the top most level should do it:
{
"aggs": {
"by_countryCode": {
"terms": {
"field":"countryCode.keyword",
"size": 100
},
"aggs": {
"views": {"sum": {"field": "views"}},
"shares": {"sum": {"field": "shares"}}
}
},
"total_views": {
{"sum": {"field": "views"}}
},
"total_shares": {
{"sum": {"field": "shares"}}
},
"query": {
"bool": {
"must": [
{ "match": { "userId": 1 } },
{ "match": { "artistId": 1001 }},
{ "range": {
"date" : {
"gte" : "20170310",
"lte" : "20170312"
}
}
}
]
}
}
}
{
"aggs": {
"by_countryCode": {
"terms": {
"field":"countryCode.keyword",
"size": 100
},
"aggs": {
"views": {"sum": {"field": "views"}},
"shares": {"sum": {"field": "shares"}}
}
},
"total_views": {
"sum": {"field": "views"}
},
"total_shares": {
"sum": {"field": "shares"}
}
},
"query": {
"bool": {
"must": [
{ "match": { "userId": 1 } },
{ "match": { "artistId": 1001 }},
{ "range": {
"date" : {
"gte" : "20170310",
"lte" : "20170312"
}
}
}
]
}
}
}
Thanks, Roman!
With the help of your codes, I made some small changes. It works!
I have the following request which will return the count of all documents with a status of either "Accepted","Released" or closed.
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
}
],
"must_not": []
}
},
"aggs": {
"slices": {
"terms": {
"field": "status.raw",
"include": {
"pattern": "Accepted|Released|Closed"
}
}
}
}
}
In my case the response is:
"buckets": [
{
"key": "Closed",
"doc_count": 2216
},
{
"key": "Accepted",
"doc_count": 8
},
{
"key": "Released",
"doc_count": 6
}
]
Now I'd like to add all of them up into a single field.
I tried using pipeline aggregations and even tried the following sum_bucket (which apparently only works on multi-bucket):
"total":{
"sum_bucket":{
"buckets_path": "slices"
}
}
Anyone able to help me out with this?
With sum_bucket and your already existent aggregation:
"aggs": {
"slices": {
"terms": {
"field": "status.raw",
"include": {
"pattern": "Accepted|Released|Closed"
}
}
},
"sum_total": {
"sum_bucket": {
"buckets_path": "slices._count"
}
}
}
What I would do is to use the filters aggregation instead and define all the buckets you need, like this:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
}
],
"must_not": []
}
},
"aggs": {
"slices": {
"filters": {
"filters": {
"accepted": {
"term": {
"status.raw": "Accepted"
}
},
"released": {
"term": {
"status.raw": "Released"
}
},
"closed": {
"term": {
"status.raw": "Closed"
}
},
"total": {
"terms": {
"status.raw": [
"Accepted",
"Released",
"Closed"
]
}
}
}
}
}
}
}
You could add count with value_count sub aggregation and then use sum_bucket pipeline aggregation
{
"aggs": {
"unique_status": {
"terms": {
"field": "status.raw",
"include": "Accepted|Released|Closed"
},
"aggs": {
"count": {
"value_count": {
"field": "status.raw"
}
}
}
},
"sum_status": {
"sum_bucket": {
"buckets_path": "unique_status>count"
}
}
},
"size": 0
}