Elasticsearch query with time range and cardinality - elasticsearch

I need help with ES query for both Time Range and Cardinality. For now, my query for Time Range is as follow:
query={
"query": {
"bool": {
"must": [
{
"query_string": {
"query": querystr_var,
"analyze_wildcard": "true"
}
}
]
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"range": {
"date_range": {
"field": timeField_var,
"format" : "yyyy-MM-dd HH:mm:ss.SSS",
"ranges": [
{
"from": startDateTime_var,
"to": endDateTime_var,
"key": "CurrentCount"
},
{
"from": prev1WeekStartDateTime_var,
"to": prev1WeekEndDateTime_var,
"key": "Prev1WeekCount"
}
],
"keyed": "true"
}
}
}
}
The above query is work fine, but now I need to also count for unique "CustomerID" using cardinality, I tried below but the result is the same as before, no effect:
query={
"query": {
"bool": {
"must": [
{
"query_string": {
"query": querystr_var,
"analyze_wildcard": "true"
}
}
]
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"session_count": {
"cardinality": {
"field": "CustomerID"
}
},
"range": {
"date_range": {
"field": timeField_var,
"format" : "yyyy-MM-dd HH:mm:ss.SSS",
"ranges": [
{
"from": startDateTime_var,
"to": endDateTime_var,
"key": "CurrentCount"
},
{
"from": prevWeekStartDateTime_var,
"to": prevWeekEndDateTime_var,
"key": "PrevWeekCount"
}
],
"keyed": "true"
}
}
}
}
Can you please help with this query. Thanks so much!

Your query seems to be correct. I tried a similar query (with aggregation), with some sample data, and the result is as expected.
Index Data:
{
"date": "2015-01-01",
"customerId": 1
}
{
"date": "2015-02-01",
"customerId": 2
}
{
"date": "2015-03-01",
"customerId": 3
}
{
"date": "2015-04-01",
"customerId": 3
}
Search Query:
{
"size":0,
"aggs": {
"session_count": {
"cardinality": {
"field": "customerId"
}
},
"range": {
"date_range": {
"field": "date",
"ranges": [
{
"from": "2015-01-01",
"to": "2015-05-01"
}
],
"keyed": "true"
}
}
}
}
Search Result:
"aggregations": {
"session_count": {
"value": 3
},
"range": {
"buckets": {
"2015-01-01T00:00:00.000Z-2015-05-01T00:00:00.000Z": {
"from": 1.4200704E12,
"from_as_string": "2015-01-01T00:00:00.000Z",
"to": 1.4304384E12,
"to_as_string": "2015-05-01T00:00:00.000Z",
"doc_count": 4
}
}
}
}

Ok so after losing lots of hair, I found out that I need to put the "cardinality" under each of the separated date_range, something like this:
...
"aggs": {
"currentCount": {
"date_range": {
"field": timeField_var,
"format" : "yyyy-MM-dd HH:mm:ss.SSS",
"ranges": [
{
"from": startDateTime_var,
"to": endDateTime_var,
"key": "CurrentCount"
}
],
"keyed": "true"
},
"aggs": {
"currentUnique": {
"cardinality": {
"field": "CustomerID"
}
}
}
},
"previousCount": {
"date_range": {
"field": timeField_var,
"format" : "yyyy-MM-dd HH:mm:ss.SSS",
"ranges": [
{
"from": prevWeekStartDateTime_var,
"to": prevWeekEndDateTime_var,
"key": "previousUnique"
}
],
"keyed": "true"
},
"aggs": {
"previousUnique": {
"cardinality": {
"field": "CustomerID"
}
}
}
}

Related

How to shift elastic graph by 1 hr?

I have a visualization on hourly basis. Data from 1 to 2 is displayed at 1 o'clock. I want it to be displayed at 2 o'clock. How can I shift the graph by 1 ?
This is the query that I'm using-
Query -
{
"query": {
"bool": {
"must": [
{
"query_string": {
"query": "*",
"analyze_wildcard": true
}
},
{
"match": {
"server-status.name.keyword": {
"query": "https-x509",
"type": "phrase"
}
}
},
{
"range": {
"server-status.meta.current-time": {
"gte": 1550660541174,
"lte": 1550674941175,
"format": "epoch_millis"
}
}
}
],
"must_not": []
}
},
"size": 0,
"_source": {
"excludes": []
},
"aggs": {
"2": {
"date_histogram": {
"field": "server-status.meta.current-time",
"interval": "1h",
"time_zone": "CST6CDT",
"min_doc_count": 1
},
"aggs": {
"4": {
"terms": {
"field": "server-status.type.keyword",
"include": "http-server",
"size": 500,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.request-rate.value",
"script": "_value/60"
}
},
"3": {
"terms": {
"field": "server-status.name.keyword",
"size": 5,
"order": {
"1": "desc"
}
},
"aggs": {
"1": {
"sum": {
"field": "server-status.status-properties.request-rate.value",
"script": "_value/60"
}
}
}
}
}
}
}
}
}
}
I would like to shift the values by 1 hr. For example if the value is 2.0 at 2019-02-20T05:00:00.000-06:00 I want it to be displayed for 2019-02-20T06:00:00.000-06:00
Just a possible workaround:
Kibana display time based on browser timezone. You could set the timezone in Kibana configuration for a timezone of your interests.
Update:
You could use date_range aggregation and choose key for those buckets. You will need to generate the aggregation based on your time_range and interval.
For example:
"aggs": {
"range": {
"date_range": {
"field": "date",
"ranges": [
{
"key": "bucket1",
"to": "2016/02/01"
},
{
"key": "bucket2",
"from": "2016/02/01",
"to" : "now/d"
}
]
}
}
}
Reference: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-daterange-aggregation.html

Post filters and double nested aggregation with multiple filters in ElasticSearch

This is data sample that I have in my index:
[{
"filters": [
{
"group": "color",
"attribute": "red"
},
{
"group": "category",
"attribute": "office"
},
{
"group": "vendor",
"attribute": "some vendor"
},
{
"group": "sub category",
"attribute": "tables"
},
{
"group": "material",
"attribute": "wood"
}
],
"image": "img",
"itemId": "id"
},
{
"filters": [
{
"group": "color",
"attribute": "green"
},
{
"group": "category",
"attribute": "office"
},
{
"group": "vendor",
"attribute": "some vendor"
},
{
"group": "sub category",
"attribute": "tables"
}
],
"image": "img",
"itemId": "id"
},
{
"filters": [
{
"group": "color",
"attribute": "brown"
},
{
"group": "category",
"attribute": "office"
},
{
"group": "vendor",
"attribute": "some vendor"
},
{
"group": "sub category",
"attribute": "chairs"
},
{
"group": "style",
"attribute": "modern"
}
],
"image": "img",
"itemId": "id"
}]
Example of my query:
{
"size": 48,
"sort": [
{
"sequence": {
"order": "asc"
}
}
],
"aggs": {
"price": {
"range": {
"field": "salePrice",
"ranges": [
{
"to": 50.0
},
{
"from": 50.0,
"to": 100.0
},
{
"from": 100.0,
"to": 250.0
},
{
"from": 250.0,
"to": 500.0
},
{
"from": 500.0,
"to": 750.0
},
{
"from": 750.0,
"to": 1000.0
},
{
"from": 1000.0,
"to": 1500.0
},
{
"from": 1500.0,
"to": 2000.0
},
{
"from": 2000.0,
"to": 2500.0
},
{
"from": 2500.0,
"to": 3000.0
},
{
"from": 3000.0,
"to": 3500.0
},
{
"from": 3500.0
}
]
}
},
"filters": {
"nested": {
"path": "filters"
},
"aggs": {
"groups": {
"terms": {
"field": "filters.group"
},
"aggs": {
"attributes": {
"terms": {
"field": "filters.attribute"
}
}
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"match": {
"searchPattern": {
"query": "chairs",
"operator": "and"
}
}
}
],
"filter": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"filters.group": {
"value": "sub category"
}
}
},
{
"term": {
"filters.attribute": {
"value": "tables"
}
}
}
]
}
},
"path": "filters",
"_name": "filters"
}
}
]
}
}
}
As you can see, here I do double aggregation for nested "filters" array, then do filtration and I receive such data:
category:
office(3)
color:
red(1)
green(1)
brown(1)
sub category:
tables(2)
........
And it's logical, because I do filtration and only then do aggregation. But I want to get other sub categories and show there count. So, if user selects several attributes in one group, I want to do Or filtering. If selects attribute in another group, wanna do and filtering. Like so:
{
"size": 48,
"sort": [
{
"sequence": {
"order": "asc"
}
}
],
"aggs": {
"price": {
"range": {
"field": "salePrice",
"ranges": [
{
"to": 50.0
},
{
"from": 50.0,
"to": 100.0
},
{
"from": 100.0,
"to": 250.0
},
{
"from": 250.0,
"to": 500.0
},
{
"from": 500.0,
"to": 750.0
},
{
"from": 750.0,
"to": 1000.0
},
{
"from": 1000.0,
"to": 1500.0
},
{
"from": 1500.0,
"to": 2000.0
},
{
"from": 2000.0,
"to": 2500.0
},
{
"from": 2500.0,
"to": 3000.0
},
{
"from": 3000.0,
"to": 3500.0
},
{
"from": 3500.0
}
]
}
},
"filters": {
"nested": {
"path": "filters"
},
"aggs": {
"groups": {
"terms": {
"field": "filters.group"
},
"aggs": {
"attributes": {
"terms": {
"field": "filters.attribute"
}
}
}
}
}
}
},
"query": {
"bool": {
"must": [
{
"match": {
"searchPattern": {
"query": "chairs",
"operator": "and"
}
}
}
],
"filter": [
{
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"filters.group": {
"value": "Category"
}
}
},
{
"bool": {
"should": [
{
"term": {
"filters.attribute": {
"value": "Decor"
}
}
},
{
"term": {
"filters.attribute": {
"value": "Patio Dining"
}
}
}
]
}
}
]
}
},
"path": "filters",
"_name": "filters"
}
},
{
"nested": {
"query": {
"bool": {
"must": [
{
"term": {
"filters.group": {
"value": "Type"
}
}
},
{
"term": {
"filters.attribute": {
"value": "Coverlets"
}
}
}
]
}
},
"path": "filters",
"_name": "filters"
}
}
]
}
}
}
But, of course, this query has the same problem. So, is it possible to solve the problem in ElasticSearch? I found few words about post_filter, but have no ideas how to use it and how it can helps me, because I need to recalculate group attributes each time. Is it possible or I need to "store" group attributes anywhere and show them after each aggregation?

Elasticsearch missing aggs

I am doing an Elasticsearch query and having problems with the aggs going missing.
If I do the query below I get the aggs back without issue:
{
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*wet*",
"fields": [
"Name",
"Summary",
"Description",
"Location",
"Features",
"TypeName",
"CategoryName"
]
},
"filter": {
"term": {
"TypeID": "13"
}
}
}
}
},
"aggs": {
"type": {
"terms": {
"field": "TypeID"
}
},
"category": {
"terms": {
"field": "CategoryID"
}
},
"max_price": {
"max": {
"field": "Price"
}
},
"min_price": {
"min": {
"field": "Price"
}
},
"filter-type": {
"term": {
"TypeID": "13"
}
}
},
"from": 0,
"size": 50,
"sort": {
"_score": {
"order": "desc"
}
},
"explain": false
}
However, as soon as I add a filter, the aggs are no longer returned. I can't see what I am doing wrong so any help would be really appreciated.
The one that doesn't return aggs looks like this:
{
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*wet*",
"fields": [
"Name",
"Summary",
"Description",
"Location",
"Features",
"TypeName",
"CategoryName"
]
},
"filter": {
"term": {
"TypeID": "13"
}
}
}
}
},
"aggs": {
"type": {
"terms": {
"field": "TypeID"
}
},
"category": {
"terms": {
"field": "CategoryID"
}
},
"max_price": {
"max": {
"field": "Price"
}
},
"min_price": {
"min": {
"field": "Price"
}
},
"filter-type": {
"term": {
"TypeID": "13"
}
}
},
"from": 0,
"size": 50,
"sort": {
"_score": {
"order": "desc"
}
},
"explain": false
}
This is the correct syntax for filters aggregation. You have to wrap all you aggregation inside filter like this
{
"size": 0,
"query": {
"filtered": {
"query": {
"query_string": {
"query": "*wet*",
"fields": [
"Name",
"Summary",
"Description",
"Location",
"Features",
"TypeName",
"CategoryName"
]
}
},
"filter": {
"term": {
"TypeID": "13"
}
}
}
},
"aggs": {
"filter-type": {
"filter": {
"term": {
"TypeID": "13"
}
},
"aggs": {
"type": {
"terms": {
"field": "TypeID"
}
},
"category": {
"terms": {
"field": "CategoryID"
}
},
"max_price": {
"max": {
"field": "Price"
}
},
"min_price": {
"min": {
"field": "Price"
}
}
}
}
}
}
Also in this case filter in aggs is redundant as you are already using the same filter in your query.

Term Filter with "now" in elasticsearch 1.7.1

When using a Term Filter, I'm not able to use now elasticsearch 1.7.1 anymore. It worked fine in previous versions, but now it returns:
nested: IllegalArgumentException[Invalid format: \"now/y\"]
A query example is:
GET _search
{
"size": 0,
"aggs": {
"price": {
"nested": {
"path": "prices"
},
"aggs": {
"valid": {
"filter": {
"term": {
"prices.referred_year": "now/y"
}
},
"aggs": {
"ranged": {
"range": {
"field": "prices.price",
"ranges": [
{
"to": 10
},
{
"from": 10
}
]
}
}
}
}
}
}
}
}
Schema:
curl -XPUT 'http://localhost:9200/test/' -d '{
"mappings": {
"product": {
"properties": {
"prices": {
"type": "nested",
"include_in_parent": true,
"properties": {
"price": {
"type": "float"
},
"referred_year": {
"type": "date",
"format": "year"
}
}
}
}
}
}
}'
Document example:
curl -XPUT 'http://localhost:9200/test/product/1' -d '{
"prices": [
{
"referred_year": "2015",
"price": "10.00"
},
{
"referred_year": "2016",
"price": "11.00"
}
]
}'
Expected result for the aggregation (gotten by substituting now/y with 2015):
"aggregations": {
"price": {
"doc_count": 2,
"valid": {
"doc_count": 1,
"ranged": {
"buckets": [
{
"key": "*-10.0",
"to": 10,
"to_as_string": "10.0",
"doc_count": 0
},
{
"key": "10.0-*",
"from": 10,
"from_as_string": "10.0",
"doc_count": 1
}
]
}
}
}
}
now/y etc still works fine in the Range Filter and in queries.
I appreciate any help on this. Thanks!
------- UPDATE -------
So, it seems now doesn't work in Term Filters at all, no matter the rounding.
So, although I haven't found any documentation saying so, it seems using the now operator is not allowed in Term Filters. Which actually makes sense.
The correct query would be:
GET test/_search
{
"size": 0,
"aggs": {
"price": {
"nested": {
"path": "prices"
},
"aggs": {
"valid": {
"filter": {
"range": {
"prices.referred_year": {
"gte": "now/y",
"lte": "now/y"
}
}
},
"aggs": {
"ranged": {
"range": {
"field": "prices.price",
"ranges": [
{
"to": 10
},
{
"from": 10
}
]
}
}
}
}
}
}
}
}

Elasticsearch aggregations, get additional field in bucket

I query ES index to filter results and get aggregations by selected terms. A sample query is like this:
GET buyer_requests/vehicle_requests/_search
{
"query": {
"filtered": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
},
{
"range": {
"style.price": {
"gte": 15000,
"lte": 20000
}
}
},
{
"geo_distance": {
"distance": "20000km",
"info.pin": {
"lat": 42,
"lon": 21
}
}
}
]
}
}
},
"aggs": {
"makes": {
"filter": {
"range": {
"style.price": {
"gte": 5000,
"lte": 40000
}
}
},
"aggs": {
"makes": {
"terms": {
"field": "vehicle.make.raw",
"order": {
"_term": "asc"
}
}
}
}
},
"model": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
}
]
},
"aggs": {
"models": {
"terms": {
"field": "vehicle.model.raw",
"size": 10,
"order": {
"_term": "asc"
}
}
}
}
}
}
}
The result I get is something like:
How can I get in "buckets" section on "models" terms another field from result set. I want to get reference to Makes so the result would look like this:
"model": {
"doc_count": 7,
"models": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "3 Series",
"make": "bmw", <----------- this key
"doc_count": 3
},
{
"key": "4 Series",
"make": "bmw", <----------- this key
"doc_count": 4
},
{
"key": "Camaro",
"make": "chevrolet", <----------- this key
"doc_count": 2
}
]
}
}
You need to move your models aggregation as a sub-aggregation of the make aggregation and re-arrange the filter aggregation a bit. The result won't be syntactically like you expect, but semantically you'll get the data you need.
GET buyer_requests/vehicle_requests/_search
{
"query": {
"filtered": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
},
{
"range": {
"style.price": {
"gte": 15000,
"lte": 20000
}
}
},
{
"geo_distance": {
"distance": "20000km",
"info.pin": {
"lat": 42,
"lon": 21
}
}
}
]
}
}
},
"aggs": {
"makes": {
"filter": {
"and": [
{
"terms": {
"vehicle.make.raw": [
"Audi",
"BMW",
"Chevrolet"
]
}
},
{
"range": {
"style.price": {
"gte": 5000,
"lte": 40000
}
}
}
]
},
"aggs": {
"makes": {
"terms": {
"field": "vehicle.make.raw",
"order": {
"_term": "asc"
}
},
"aggs": {
"models": {
"terms": {
"field": "vehicle.model.raw",
"size": 10,
"order": {
"_term": "asc"
}
}
}
}
}
}
}
}
}

Resources