I am using dc.js and crossfilter.js to create a d3 dashboard, and am wondering how to implement a regression line into a scatterplot chart that responds to filtering.
I have been playing with a few examples re adding a regression line, but I have been unsuccessful extracting and incorporating the code.
I don't have a problem with the math, but rather with how to access the filtered data from the dimension, and then how to add the regression line to to the filtered scatterplot chart (so that the regression line also responds to future filtering).
jsFiddle Demo
var data = [
{"record":"record","date":"date","cars":"cars","bikes":"bikes"},
{"record":"1","date":"01/05/2012","cars":"1488.1","bikes":"49.73"},
{"record":"2","date":"02/05/2012","cars":"1374.29","bikes":"52.44"},
{"record":"3","date":"03/05/2012","cars":"1353.01","bikes":"47.92"},
{"record":"4","date":"04/05/2012","cars":"1420.33","bikes":"50.69"},
{"record":"5","date":"05/05/2012","cars":"1544.11","bikes":"47.47"},
{"record":"6","date":"06/05/2012","cars":"1292.84","bikes":"47.75"},
{"record":"7","date":"07/05/2012","cars":"1318.9","bikes":"48.64"},
{"record":"8","date":"08/05/2012","cars":"1686.3","bikes":"50.9"},
{"record":"9","date":"09/05/2012","cars":"1603.99","bikes":"53.44"},
{"record":"10","date":"10/05/2012","cars":"1420.1","bikes":"53.29"},
{"record":"11","date":"11/05/2012","cars":"1410.8","bikes":"54.06"},
{"record":"12","date":"12/05/2012","cars":"1374.62","bikes":"51.24"},
{"record":"13","date":"13/05/2012","cars":"1279.53","bikes":"53.96"},
{"record":"14","date":"14/05/2012","cars":"1330.47","bikes":"49.5"},
{"record":"15","date":"15/05/2012","cars":"1377.61","bikes":"52.32"},
{"record":"16","date":"16/05/2012","cars":"1302.12","bikes":"51.96"},
{"record":"17","date":"17/05/2012","cars":"1326.9","bikes":"49.86"},
{"record":"18","date":"18/05/2012","cars":"1181.55","bikes":"50.25"},
{"record":"19","date":"19/05/2012","cars":"1493.75","bikes":"51.24"},
{"record":"20","date":"20/05/2012","cars":"1463.9","bikes":"50.88"},
{"record":"21","date":"21/05/2012","cars":"1370.16","bikes":"51.09"},
{"record":"22","date":"22/05/2012","cars":"1403.3","bikes":"51.67"},
{"record":"23","date":"23/05/2012","cars":"1277.65","bikes":"49.3"},
{"record":"24","date":"24/05/2012","cars":"1361.94","bikes":"50.47"},
{"record":"25","date":"25/05/2012","cars":"1400.8","bikes":"51.55"},
{"record":"26","date":"26/05/2012","cars":"1289.09","bikes":"47.17"},
{"record":"27","date":"27/05/2012","cars":"1258.39","bikes":"52.12"},
{"record":"28","date":"28/05/2012","cars":"1288.71","bikes":"49.28"},
{"record":"29","date":"29/05/2012","cars":"1511.86","bikes":"50.73"},
{"record":"30","date":"30/05/2012","cars":"1300.38","bikes":"52.39"},
{"record":"31","date":"31/05/2012","cars":"1455.19","bikes":"49.53"},
{"record":"32","date":"01/06/2012","cars":"1311.89","bikes":"50.37"},
{"record":"33","date":"02/06/2012","cars":"1368.64","bikes":"50.87"},
{"record":"34","date":"03/06/2012","cars":"1360.05","bikes":"50.51"},
{"record":"35","date":"04/06/2012","cars":"1382.56","bikes":"49.67"},
{"record":"36","date":"05/06/2012","cars":"1304.15","bikes":"47.6"},
{"record":"37","date":"06/06/2012","cars":"1271.57","bikes":"50.22"},
{"record":"38","date":"07/06/2012","cars":"1442.38","bikes":"50.8"},
{"record":"39","date":"08/06/2012","cars":"1406.38","bikes":"53.14"},
{"record":"40","date":"09/06/2012","cars":"1724.16","bikes":"49.66"},
{"record":"41","date":"10/06/2012","cars":"1931.05","bikes":"53"},
{"record":"42","date":"11/06/2012","cars":"1669.47","bikes":"53.71"},
{"record":"43","date":"12/06/2012","cars":"1794.06","bikes":"51.78"},
{"record":"44","date":"13/06/2012","cars":"1625.98","bikes":"51.58"},
{"record":"45","date":"14/06/2012","cars":"1371.51","bikes":"52.36"},
{"record":"46","date":"15/06/2012","cars":"1418.05","bikes":"47.64"},
{"record":"47","date":"16/06/2012","cars":"1431","bikes":"53.14"},
{"record":"48","date":"17/06/2012","cars":"1527.21","bikes":"48.63"},
{"record":"49","date":"18/06/2012","cars":"1320.95","bikes":"51.7"},
{"record":"50","date":"19/06/2012","cars":"1396.93","bikes":"52.92"}
];
tSel1 = "cars";
tSel2 = "bikes";
data.forEach(function (d) {
d[tSel1] = +d[tSel1];
d[tSel2] = +d[tSel2];
});
var facts = crossfilter(data);
var allDimension = facts.groupAll();
var scatterDimension = facts.dimension(function(d) {return [+d[tSel1], +d[tSel2]];});
var scatterGroup = scatterDimension.group().reduceSum(function(d) { return d[tSel1]; });
var maxY1 = d3.max(data, function(d) {return d[tSel1]});
var maxY2 = d3.max(data, function(d) {return d[tSel2]});
var maxY1Plus = maxY1 + (maxY1 * 0.1);
var maxY2Plus = maxY2 + (maxY2 * 0.1);
var minY1 = d3.min(data, function(d) {return d[tSel1]});
var minY1Minus = minY1 * 0.9;
var minY2 = d3.min(data, function(d) {return d[tSel2]});
var minY2Minus = minY2 * 0.9;
xyScatterChart = dc.scatterPlot("#scatterPlot");
xyScatterChart
.width(600)
.height(400)
.margins({top: 20, right: 20, bottom: 20, left: 60})
.dimension(scatterDimension)
.group(scatterGroup)
.symbolSize(6)
.highlightedSize(15)
.brushOn(false)
.excludedOpacity(0.5)
.excludedSize(5)
.renderHorizontalGridLines(true)
.renderVerticalGridLines(true)
.x(d3.scale.linear().domain([minY1Minus,maxY1Plus]))
.y(d3.scale.linear().domain([minY2Minus,maxY2Plus]));
dc.renderAll();
dc.redrawAll();
<link href="http://dc-js.github.io/dc.js/css/dc.css" rel="stylesheet"/>
<script src="http://dc-js.github.io/dc.js/js/d3.js"></script>
<script src="http://dc-js.github.io/dc.js/js/crossfilter.js"></script>
<script src="http://dc-js.github.io/dc.js/js/dc.js"></script>
<div id="scatterPlot"></div>
References:
https://groups.google.com/forum/#!topic/dc-js-user-group/HaQMegKa_U0
https://bl.ocks.org/ctufts/298bfe4b11989960eeeecc9394e9f118
It would be awesome to include an example in dc.js, since this is something lots of people can use.
Maybe we can work together on that? I don't know the math but here's a simple way to use a composite chart to display a line on data calculated from an aggregated group.
First off, here's the composite chart with the old scatter plot embedded in it:
var composite = dc.compositeChart("#composite");
composite
.width(600)
.height(400)
.margins({top: 20, right: 20, bottom: 20, left: 60})
.dimension(scatterDimension)
.group(scatterGroup)
.compose([
dc.scatterPlot(composite)
.symbolSize(6)
.highlightedSize(15)
.brushOn(false)
.excludedOpacity(0.5)
.excludedSize(5)
.renderHorizontalGridLines(true)
.renderVerticalGridLines(true),
dc.lineChart(composite)
.group(regressionGroup(scatterGroup))
])
.x(d3.scale.linear().domain([minY1Minus,maxY1Plus]))
.y(d3.scale.linear().domain([minY2Minus,maxY2Plus]));
Note that we're supplying the scatter group to both the composite and the scatter plot. That's just because the composite chart requires a group even though it doesn't actually use it.
We've moved the parameters that have to do with coordinates to the main (composite) chart, but everything that is specific to the scatter plot stays on it. We've also added a line chart to the composite, which uses a "fake group" based on the scatter group.
This fake group is particularly fake, but it should be enough to get you started. Since I don't have time to learn the math today, I'll just pretend that the first and last points are the regression:
function regressionGroup(group) {
return {
all: function() {
var _all = group.all();
var first, last;
for(var i=0; i < _all.length; ++i) {
var key = _all[i].key;
if(!isNaN(key[0]) && !isNaN(key[1])) {
var kv = {key: key[0], value: key[1]};
if(!first)
first = kv;
last = kv;
}
}
return [first, last];
}
};
}
As with all fake groups, the idea is to calculate some group-like data when the chart asks for it (and no sooner), based on another group. Here the calculation is not very interesting, because you know how to calculate a regression and I don't. You'll want to replace first and last and the for loop with a real calculation; all this is doing is checking for valid points and keeping the first and last ones that it finds.
Interestingly, the scatter plot takes data where the key contains both x and y coordinates, but the line chart takes data where the key is x and the value is y. That's why we have the transformation kv = {key: key[0], value: key[1]}
Postscript
Note that you'll run into a dc.js bug if you put the regression guide points outside of the domain - the stack mixin is too aggressive about clipping points to the domain. There is an easy, ugly workaround that seems to work in this case: tell the line chart it has an ordinal x scale even though it doesn't:
var composite = dc.compositeChart("#composite"),
lineChart;
composite
.width(600)
// ...
.compose([
// ...
lineChart = dc.lineChart(composite)
.group(regressionGroup(scatterGroup))
])
lineChart.isOrdinal = d3.functor(true);
Yuck! But it works! This hack probably only works inside a composite!
https://jsfiddle.net/gordonwoodhull/5tpcxov1/12/
I have a fully functional example of regression. I was precisely doing it when I came here for help and I found your question. It requires regression.js (here).
This follows Gordon's excellent suggestion of a "fake group", which should really be called an inline group, or immediate group, or even group on-the-fly. Here is mine:
function myRegressionGroup(group, min, max, filter = false) {
return {
all: function() {
var _all = group.all();
var first, last;
if(filter) reg = regression.linear(_all.filter(function(k,v) {if(k.key[0]) return k.key}).map((k,v) => k.key));
else reg = regression.linear(_all.map((k,v) => k.key));
first = reg.predict(min);
last = reg.predict(max)
return [{key:first[0], value: first[1]}, {key: last[0], value: last[1]}]
}
};
}
Please notice that this function requires a crossfilter group and also the min and max from the x-scale. Since you typically have these values calculated for your xScale, all it takes is reusing them here. This is because the function uses the extremes with the predict method to calculate the two points of the regression line.
The optional filter data wrangler is for you to decide whether to remove empty values on x or not.
#Gordon, how should I do in order to include my regression example in the Examples of using dc.js?
I have a dataset (data) with the following row/column structure:
Date Category1 Category2 Revenue
30/12/2014 a x 10
30/12/2014 b x 15
31/12/2014 a x 11
1/1/2015 a x 13
2/1/2015 a x 14
2/1/2015 b x 9
2/1/2015 c z 4
...
Based on data I create a couple of dimensions and groups:
var ndx = crossfilter(data);
var cat1Dim = ndx.dimension(function(d) {return d.Category1;});
var revenuePerCat1 = cat1Dim.group().reduceSum(function(d) { return d.Revenue; });
var cat2Dim = ndx.dimension(function(d) {return d.Category2;});
var revenuePerCat2 = cat2Dim.group().reduceSum(function(d) { return d.Revenue; });
var dateDim = ndx.dimension(function(d) { return d.Date; });
var revenuePerDate = dateDim.group().reduceSum(function(d) { return d.Revenue; });
Next, I create the following charts:
a line chart; dimension = dateDim, group = revenuePerDate
a pie-chart; dimension = cat1Dim, group = revenuePerCat1
a pie-chart; dimension = cat2Dim, group = revenuePerCat2
Besides the charts I would also like to show the year-to-date value of the revenues via a numberDisplay. Initially I thought to achieve this by adding a simple if condition to the reduceSum function where I reduce the data to contain only items of the current year, like so:
var ytdRev = ndx.groupAll().reduceSum(function(d) { if(d.Date.getFullYear() == curYear) {return d.Revenue;} else{return 0;}});
A box containing a numberDisplay item is then called by:
box_ytd
.formatNumber("$,.4s")
.valueAccessor(function(d) {return Math.round(d * 1000) / 1000; })
.group(ytdRev);
This works perfectly fine if one selects one of the categories displayed in the pie-charts, but is incorrect when one also starts to filter date ranges in the line chart. Namely, instead of a year-to-date value, actually a 'date-to-date' value for the specific selection will be returned. Although this behaviour is correct from a technical perspective, I would like to know how I can instruct dc.js such that it will only take into account chart selections from a certain set of charts when rendering a numberDisplay. The selections made in the pie-charts should, however, both update the displayed selection in the line chart and the numberDisplay.
Ideally, I would like to use one crossfilter instance only, but I am open to any suggestions that involve a second crossfilter as well.
EDIT:
Based on Gordon's comment I played around with a custom reduce function. Instead of ndx.groupAll() I applied the following reduce function with a .groupAll() on the dimension level:
function reduceAdd(p,v) {
if(v.Date.getFullYear() == curYear)
p.revenue += +v.Revenue;
return p;}
function reduceRemove(p,v) {
if v.Date.getFullYear() == curYear)
p.revenue -= +v.Revenue;
return p;}
function reduceInitial() {
return {revenue:0 };}
var ytdRev = dateDim.groupAll().reduce(reduceAdd, reduceRemove, reduceInitial);
The .valueAccessor in the numberDisplay is changed from d.value.revenue to d.revenue:
box_ytd
.formatNumber("$,.4s")
.valueAccessor(function(d) {return Math.round(d.revenue * 1000) / 1000; })
.group(ytdRev);
The numberDisplay will now reflect the total value for the current year for each of the selections made in the pie-charts. Date selections will only affect the pie-charts' values; the numberDisplay shares the same dimension with the line chart and hence the numberDisplay is unaffected by any selections on that dimension.
Based on Gordon's comment I played around with a custom reduce function. Instead of ndx.groupAll() I applied the following reduce function with a .groupAll() on the dimension level:
function reduceAdd(p,v) {
if(v.Date.getFullYear() == curYear)
p.revenue += +v.Revenue;
return p;}
function reduceRemove(p,v) {
if v.Date.getFullYear() == curYear)
p.revenue -= +v.Revenue;
return p;}
function reduceInitial() {
return {revenue:0 };}
var ytdRev = dateDim.groupAll().reduce(reduceAdd, reduceRemove, reduceInitial);
The .valueAccessor in the numberDisplay is changed from d.value.revenue to d.revenue:
box_ytd
.formatNumber("$,.4s")
.valueAccessor(function(d) {return Math.round(d.revenue * 1000) / 1000; })
.group(ytdRev);
The numberDisplay will now reflect the total value for the current year for each of the selections made in the pie-charts. Date selections will only affect the pie-charts' values; the numberDisplay shares the same dimension with the line chart and hence the numberDisplay is unaffected by any selections on that dimension.
I have the following parsing code of time in h:m:s format
var ISO8601format=d3.time.format("%Y-%m-%dT%H:%M:%SZ");
var hoursandminsformat=d3.time.format("%H:%M:%S");
e.time=hoursandminsformat(ISO8601format.parse(e.time));
I have a json file with reading at different times from different sensors.
Sample data is=
[
{"id":1,"time":"2015-03-29T20:32:24Z"},
{"id":2,"time":"2015-03-29T20:32:24Z"},
{"id":3,"time":"2015-03-29T20:32:24Z"},
{"id":1,"time":"2015-03-29T20:33:24Z"},
{"id":2,"time":"2015-03-29T20:33:24Z"},
{"id":3,"time":"2015-03-29T20:33:24Z"},
]
I am going to plot a dc.js rowchart where there will be time in minutes in the x axis and frequency in the y axis. I am using the following code to do this. But its returning NaNs.
var freqchart= dc.lineChart("#chart1");
var countByTime=ndx.dimension(function (d) {return d.time; });
var freqbyTimeGroup = countByTime.group().reduceCount();
freqchart.width(400).height(200).transitionDuration(500)
.dimension(countByTime).group(freqbyTimeGroup).elasticY(true).x(
d3.time.scale().domain([d3.min(data,function(d){return d.time;}),
d3.max(data,function(d){return d.time;})])).xUnits(d3.time.minutes).yAxisLabel("Frequency").xAxisLabel('Time').elasticX(true)
How can I solve this problem? Here is the jsfiddle that's not working
Hurray I got the solution. Here is the code
var data=[
{"id":20,"time":"2015-03-29T20:32:24Z","speed":20},
{"id":21,"time":"2015-03-29T20:32:24Z","speed":15},
{"id":22,"time":"2015-03-29T20:32:24Z","speed":16},
{"id":23,"time":"2015-03-29T20:33:25Z","speed":14},
{"id":20,"time":"2015-03-29T20:33:26Z","speed":20},
{"id":21,"time":"2015-03-29T20:34:24Z","speed":10},
{"id":22,"time":"2015-03-29T20:34:24Z","speed":15},
{"id":23,"time":"2015-03-29T20:35:24Z","speed":15},
]
// The datset is much larger with many detector. This is sample
var dateformat=d3.time.format("%H:%M:%S").parse;
var ISO8601format=d3.time.format("%Y-%m-%dT%H:%M:%SZ");
var hoursandminsformat=d3.time.format("%H:%M:%S");
data.forEach(function(d) {
d.time=d.time.substring(11,19);
d.time=dateformat(d.time);
});
Here is the working jsfiddle