The Vega-lite x-axis displays the specified number of data - elasticsearch

Here is my generated pic, my x-axis is too large, so if I wanna display specified number, how can I fix my code?
I tried to cut my data source from ES DSL as my breakthrough point, but cumulative_sum needs complete data source.
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"data": {
"url": {
"index": "x-*",
"body": {
"query": {"match_all": {}},
"size": 0,
"aggs": {
"group_by_date": {
"date_histogram": {
"field": "timestamp",
"interval": "day"
},
"aggs": {
"cumulative_docs": {
"cumulative_sum": {"buckets_path": "_count"}
}
}
}
}
}
},
"format": {
"property": "aggregations.group_by_date.buckets"
}
},
"width": "container",
"height": 1200,
"layer": [
{
"mark": {
"type": "line",
"point": {"filled": false, "fill": "black"}
}
},
{
"mark": {
"type": "text",
"align": "left",
"baseline": "middle",
"fontSize": 15
},
"encoding": {
"text": {"field": "cumulative_docs.value", "type": "quantitative"}
}
}
],
"encoding": {
"x": {
"axis": {"title": "date"},
"field": "key_as_string",
"type": "nominal"
},
"y": {
"aggregate": "y",
"axis": {"title": "project_num"},
"field": "cumulative_docs.value",
"type": "quantitative",
"stack": "normalize"
}
}
}

It is difficult to debug without actual data but what happens if you change this to temporal?
"x": {
"axis": {"title": "date"},
"field": "key_as_string",
"type": "temporal"
},
EDIT
Remove this section.
{
"mark": {
"type": "text",
"align": "left",
"baseline": "middle",
"fontSize": 15
},
"encoding": {
"text": {"field": "cumulative_docs.value", "type": "quantitative"}
}
}

Related

Issue- Not displaying the custom visualization in vega

I am trying to make a custom word cloud visualization from the existing index using vega in kibana dashboard.
It is showing error as "Cannot read properties of undefined (reading 'datum')".
I have used type-formula in "transform property" to create custom visualization word cloud rather than these any types are there?
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"title": "A Wordcloud",
"width": 900,
"height": 500,
"padding": 100,
"autosize": "none",
"background": "pink",
"data": [
{
"name": "table",
"url": {
"index": "nupur2",
"body": {
"aggs": {
"2": {
"terms": {"field": "hashtags","order":{"_count": "asc"}, "size": 100},
"aggs": {
"_count": {
"avg": {"field": "vaderSentiment"}
}
}
}
}
}
},
"format": {"property": "aggregations.2.buckets"},
"transform": [
{
"type": "formula",
"as": "angle",
"expr": "datum.size >= 4 ? 0 : [-45,-30, -15, 0, 15, 30, 45][floor(random() * 7)]"
}
]
}
],
"scales": [
{
"name": "color",
"type": "ordinal",
"domain": {"data": "table", "field": "hashtags"},
"range": ["green", "orange", "red"]
}
],
"marks": [
{
"type": "group",
"from": {"data": "table"},
"encode": {
"enter": {
"text": {"field": "hashtags"},
"align": {"value": "center"},
"baseline": {"value": "alphabetic"},
"fill": {"scale": "color", "field": "hashtags"}
},
"update": {
"fillOpacity": {"value": 1}
},
"hover": {
"fillOpacity": {"value": 0.5}
}
},
"transform": [
{
"type": "wordcloud",
"size": [800, 400],
"text": {"field": "hashtags"},
"rotate": {"field": "datum.angle"},
"font": "Helvetica Neue, Arial",
"fontSize": {"field": "datum.size"},
"fontWeight": {"field": "datum.weight"},
"fontSizeRange": [12, 56],
"padding": 2
}
]
}
]
}
This is the image which I got output : https://i.stack.imgur.com/nbb8f.png

Draw tree Layout chart in vega

I want to have a tree chart of my data using vega in kibana 7.9.0, but I don't know how to write the query for that.
the below code is an example of tree chart from github website. I want such layout for my own index which I have it in kibanan.
Help me how to do that.
tree chart example
Sample tree chart code:
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"description": "An example of Cartesian layouts for a node-link diagram of hierarchical data.",
"width": 1000,
"height": 1600,
"padding": 5,
"signals": [
{
"name": "labels", "value": true,
"bind": {"input": "checkbox"}
},
{
"name": "layout", "value": "tidy",
"bind": {"input": "radio", "options": ["tidy", "cluster"]}
},
{
"name": "links", "value": "diagonal",
"bind": {
"input": "select",
"options": ["line", "curve", "diagonal", "orthogonal"]
}
},
{
"name": "separation", "value": false,
"bind": {"input": "checkbox"}
}
],
"data": [
{
"name": "tree",
"url": "data/flare.json",
"transform": [
{
"type": "stratify",
"key": "id",
"parentKey": "parent"
},
{
"type": "tree",
"method": {"signal": "layout"},
"size": [{"signal": "height"}, {"signal": "width - 100"}],
"separation": {"signal": "separation"},
"as": ["y", "x", "depth", "children"]
}
]
},
{
"name": "links",
"source": "tree",
"transform": [
{ "type": "treelinks" },
{
"type": "linkpath",
"orient": "horizontal",
"shape": {"signal": "links"}
}
]
}
],
"scales": [
{
"name": "color",
"type": "linear",
"range": {"scheme": "magma"},
"domain": {"data": "tree", "field": "depth"},
"zero": true
}
],
"marks": [
{
"type": "path",
"from": {"data": "links"},
"encode": {
"update": {
"path": {"field": "path"},
"stroke": {"value": "#ccc"}
}
}
},
{
"type": "symbol",
"from": {"data": "tree"},
"encode": {
"enter": {
"size": {"value": 100},
"stroke": {"value": "#fff"}
},
"update": {
"x": {"field": "x"},
"y": {"field": "y"},
"fill": {"scale": "color", "field": "depth"}
}
}
},
{
"type": "text",
"from": {"data": "tree"},
"encode": {
"enter": {
"text": {"field": "name"},
"fontSize": {"value": 9},
"baseline": {"value": "middle"}
},
"update": {
"x": {"field": "x"},
"y": {"field": "y"},
"dx": {"signal": "datum.children ? -7 : 7"},
"align": {"signal": "datum.children ? 'right' : 'left'"},
"opacity": {"signal": "labels ? 1 : 0"}
}
}
}
]
}
I recommend going and learning the Kibana Vega interaction, Vega spec and elasticsearch search api. This is too broad of a question, you are basically asking someone to do the work for you which is easily deducible from the the documentation.
https://www.elastic.co/guide/en/kibana/current/vega.html
https://vega.github.io/vega/docs/
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-search.html

Aggregating on two filter terms or more

How can I aggregate on two filter terms (strings) or more based on values stored in the same value column?, at the moment I'm trying to make a dashboard warning lamp based on two alarm events e.g.
"door-tamper-1" and "door-tamper-2" and I can easily creat an alarm button for one as follows:
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"title": {
"font": "Arial",
"fontSize": 15,
"text": "Door Tamper"
},
"height": 100,
"width": 100,
"padding": 20,
"autosize": "none",
"data": {
"name": "table",
"url": {
"%context%": true,
"%timefield%": "event_time",
"index": "event*",
"body": {
"aggs": {
"categories": {
"filter": {
"term": {"event_name.keyword": "door-tamper-1" }},
"aggs": {
"names": {
"terms": {
"field": "event_name.keyword"
}
}
}
}
}
},
"size": 0
},
"format": {"property": "aggregations.categories"}
},
"mark": "circle",
"encoding": {
"x": {"value": 31},
"y": {"value": 30},
"size": {"value": 2500},
"shape": {"value": "circle"},
"opacity": {"value": 1},
"stroke": {"value": "black"},
"strokeWidth": {"value": 5},
"fill": {
"condition": {"test": "datum.doc_count > 0",
"value": "red"},
"value": "green"
}
}
}
This lights a door tamper alarm red on a kibana dashboard monitored by guards when someone opens door one, but I need to do it for two doors in the same rooms, events door-tamper-1 and door-tamper-2, that is either or being tampered with.
I tried the following, but obviously it doesn't work, how would I go about this I have no clue...
{
"$schema": "https://vega.github.io/schema/vega-lite/v4.json",
"title": {
"font": "Arial",
"fontSize": 15,
"text": "Occupied"
},
"height": 100,
"width": 100,
"padding": 20,
"autosize": "none",
"data": {
"name": "table",
"url": {
"%context%": true,
"%timefield%": "event_time",
"index": "event*",
"body": {
"aggs": {
"categories": {
"filter": {
"term": {"or": [{"event_name.keyword": "door-tamper-1"},{"event_name.keyword": "door-tamper-2" }]},
"aggs": {
"names": {
"terms": {
"field": "event_name.keyword"
}
}
}
}
}
},
"size": 0
},
"format": {"property": "aggregations.categories"}
},
"mark": "circle",
"encoding": {
"x": {"value": 31},
"y": {"value": 30},
"size": {"value": 2500},
"shape": {"value": "circle"},
"opacity": {"value": 1},
"stroke": {"value": "black"},
"strokeWidth": {"value": 5},
"fill": {
"condition": {"test": "datum.doc_count > 0",
"value": "red"},
"value": "green"
}
}
}
This does not work.

In Kibana's Vega, how can I create layers from two different aggs in one request

In Elasticsearch's HTTP API, you can have a bucketing aggregation and a metric aggregation in a single request to the _search API. In Kibana's Vega environment, how can you create a Vega visualization which uses a single _search request with a buckets aggregation and a metric aggregation; and then makes a chart with one layer using data from the buckets and one layer using data from the metric?
To make this question more concrete, consider this example:
Imagine we are hat makers. Multiple stores carry our hats. We have an Elasticsearch index hat-sales which has one document for each time one of our hats is sold. Included in this document is the store at which the hat was sold.
Here are two examples of the documents in this index:
{
"type": "top",
"color": "black",
"price": 19,
"store": "Macy's"
}
{
"type": "fez",
"color": "red",
"price": 94,
"store": "Walmart"
}
I want to create a bar chart which shows the number of hats sold in the top 3 stores. I also want
a horizontal rule on this chart which shows the average number of hats sold over all stores - not just the top 3. Here is a sketch of what I want the chart to look like:
If we did this, having Vega do the calculation of the average:
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"title": "Hat Sales",
"data": {
"url": {
"index": "hat-sales",
"body": {
"size": 0,
"query": {"match_all": {}},
"aggs": {"stores": {"terms": {"field": "store.keyword", "size": 3}}}
}
},
"format": {"property": "aggregations.stores.buckets"}
},
"transform": [
{"calculate": "datum.key", "as": "store"},
{"calculate": "datum.doc_count", "as": "count"}
],
"layer": [
{
"name": "Sales of top 3 stores",
"mark": "bar",
"encoding": {
"x": {"type": "nominal", "field": "store", "sort": "-y"},
"y": {"type": "quantitative", "field": "count"}
}
},
{
"name": "Average number of sales over all stores",
"mark": {"type": "rule", "color": "red"},
"encoding": {"y": {"aggregate": "mean", "field": "count"}}
}
]
}
which would looks like this:
then the horizontal rule would be an average of the top 3 stores only. Instead we need to add another metric aggregation to the Elasticsearch request which calculates the global average of hats sold at stores (https://stackoverflow.com/a/69668089/5938725). We want to do something like this:
{
"$schema": "https://vega.github.io/schema/vega-lite/v2.json",
"title": "Hat Sales",
"data": {
"url": {
"index": "hat-sales",
"body": {
"size": 0,
"query": {"match_all": {}},
"aggs": {
"stores": {"terms": {"field": "store.keyword", "size": 3}},
"global": {
"filters": {
"filters": {"all": {"exists": {"field": "store.keyword"}}}
},
"aggs": {
"count": {"value_count": {"field": "store.keyword"}},
"unique_count": {"cardinality": {"field": "store.keyword"}},
"global_average": {
"bucket_script": {
"buckets_path": {"total": "count", "unique": "unique_count"},
"script": "params.total / params.unique"
}
}
}
}
}
}
},
"format": {"property": "aggregations.stores.buckets"}
},
"transform": [
{"calculate": "datum.key", "as": "store"},
{"calculate": "datum.doc_count", "as": "count"}
],
"layer": [
{
"name": "Sales of top 3 stores",
"mark": "bar",
"encoding": {
"x": {"type": "nominal", "field": "store", "sort": "-y"},
"y": {"type": "quantitative", "field": "count"}
}
},
{
"name": "Average number of sales over all stores",
"mark": {"type": "rule", "color": "red"},
??????????????????
}
]
}
But how can I have one layer use the data from "aggregations.stores.buckets" and another layer use data from "aggregations.global.buckets" in order to access that global_average?
I did get it to work using this:
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"description": "A simple bar chart with embedded data.",
"data": {
"url": {
"index": "hat-sales",
"body": {
"size": 0,
"query": {"match_all": {}},
"aggs": {
"stores": {"terms": {"field": "store.keyword", "size": 3}},
"global": {
"filters": {
"filters": {"all": {"exists": {"field": "store.keyword"}}}
},
"aggs": {
"count": {"value_count": {"field": "store.keyword"}},
"unique_count": {"cardinality": {"field": "store.keyword"}},
"global_average": {
"bucket_script": {
"buckets_path": {"total": "count", "unique": "unique_count"},
"script": "params.total / params.unique"
}
}
}
}
}
}
}
},
"transform": [
{"flatten": ["aggregations.stores.buckets"]},
{"calculate": "datum['aggregations.stores.buckets'].key", "as": "store"},
{
"calculate": "datum['aggregations.stores.buckets'].doc_count",
"as": "count"
},
{
"calculate": "datum.aggregations.global.buckets.all.global_average.value",
"as": "global_average"
}
],
"layer": [
{
"name": "Sales of top 3 stores",
"mark": "bar",
"encoding": {
"x": {"type": "nominal", "field": "store", "sort": "-y"},
"y": {"type": "quantitative", "field": "count"}
}
},
{
"name": "Global Average",
"mark": {"type": "rule", "color": "red"},
"encoding": {"y": {"field": "global_average", "type": "quantitative"}}
}
]
}
It is less than ideal because the flatten transforms makes it so that the individual datum objects are somewhat bigger. It is also confusing that once you flatten aggregations.stores.buckets, that becomes the literal name-- "aggregations.stores.buckets"-- of a field of datum, which must be accessed via square bracket notation because it contains periods.

Elasticsearch aggregation count mismatch

I am getting improper aggregation count from ES query. I understand from ES documentation cardinality and term aggregations are not accurate but which i got is having too much difference.
Mapping of my index is
{
"dynamic_templates": [{
"template_action": {
"mapping": {
"type": "string",
"index": "not_analyzed"
},
"match": "*",
"match_mapping_type": "*"
}
}],
"_parent": {
"type": "users"
},
"date_detection": False,
"properties": {
"traits": {
"type": "object"
},
"cl_utm_params": {
"type": "object"
},
"cl_other_params": {
"type": "object"
},
"cl_triggered_ts": {
"type": "date"
}
}
}
a sample document
{
"client_id": "cl58vivh8w7t",
"user_id": "CL.1122029143.1904488380.1218174474.2049762488",
"session_id": "CL.1886305621.906039613",
"source": "Google",
"action": "pageview",
"cl_triggered_ts": "2016-09-09T00:13:33.818Z",
"browser": "Microsoft Edge 13",
"platform": "Windows 10",
"screen_size": "1920 x 1080",
"device": "Desktop",
"ip_address": "98.236.246.165",
"country": "United States",
"city": "Weirton",
"postal_code": "26062",
"location": "40.4224, -80.5739",
"timezone": "America/New_York",
"state": "West Virginia",
"continent": "North America",
"isp": "Comcast Cable",
"browser_language": "",
"traits": {},
"cl_utm_params": {},
"cl_other_params": {}
}
from below query i am getting unique no of sessions for each source and unique no of sessions for each device by source using bucket and metric aggregations
{
"query": {
"bool": {
"must": [
{"match": {"client_id": "cl58vivh8w7t"}}
]
}
},
"aggs": {
"top_source": {
"terms": {
"field": "source"
},
"aggs": {
"total_unique_sessions": {"cardinality": {"field": "session_id"}},
"per_device": {
"terms": {"field": "device"},
"aggs": {"device_session": {"cardinality": {"field": "session_id"}}}
}
}
}
},
"size": 0
}
for reference i have given a single bucket below. from this the sum of each device's session value should be equal to the total_unique_sessions value.
I suspect is there something wrong with my query or my calculations?
{
"key": "www.google.com",
"doc_count": 68947,
"per_device": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "Desktop",
"doc_count": 49254,
"device_session": {
"value": 2413
}
},
{
"key": "Mobile",
"doc_count": 16317,
"device_session": {
"value": 3222
}
},
{
"key": "Tablet",
"doc_count": 3343,
"device_session": {
"value": 636
}
},
{
"key": "TV",
"doc_count": 33,
"device_session": {
"value": 9
}
}
]
},
"total_unique_sessions": {
"value": 9058
}
}
I see you have are using match query.
Normally we do term query for aggregation. I think match is causing this problem.
{
"query": {
"bool": {
"must": [
{"term": {"client_id": "cl58vivh8w7t"}}
]
}
},
"aggs": {
"top_source": {
"terms": {
"field": "source"
},
"aggs": {
"total_unique_sessions": {"cardinality": {"field": "session_id"}},
"per_device": {
"terms": {"field": "device"},
"aggs": {"device_session": {"cardinality": {"field": "session_id"}}}
}
}
}
},
"size": 0
}

Resources