I am using dc.js and crossfilter.js to create a d3 dashboard, and am wondering how to implement a regression line into a scatterplot chart that responds to filtering.
I have been playing with a few examples re adding a regression line, but I have been unsuccessful extracting and incorporating the code.
I don't have a problem with the math, but rather with how to access the filtered data from the dimension, and then how to add the regression line to to the filtered scatterplot chart (so that the regression line also responds to future filtering).
jsFiddle Demo
var data = [
{"record":"record","date":"date","cars":"cars","bikes":"bikes"},
{"record":"1","date":"01/05/2012","cars":"1488.1","bikes":"49.73"},
{"record":"2","date":"02/05/2012","cars":"1374.29","bikes":"52.44"},
{"record":"3","date":"03/05/2012","cars":"1353.01","bikes":"47.92"},
{"record":"4","date":"04/05/2012","cars":"1420.33","bikes":"50.69"},
{"record":"5","date":"05/05/2012","cars":"1544.11","bikes":"47.47"},
{"record":"6","date":"06/05/2012","cars":"1292.84","bikes":"47.75"},
{"record":"7","date":"07/05/2012","cars":"1318.9","bikes":"48.64"},
{"record":"8","date":"08/05/2012","cars":"1686.3","bikes":"50.9"},
{"record":"9","date":"09/05/2012","cars":"1603.99","bikes":"53.44"},
{"record":"10","date":"10/05/2012","cars":"1420.1","bikes":"53.29"},
{"record":"11","date":"11/05/2012","cars":"1410.8","bikes":"54.06"},
{"record":"12","date":"12/05/2012","cars":"1374.62","bikes":"51.24"},
{"record":"13","date":"13/05/2012","cars":"1279.53","bikes":"53.96"},
{"record":"14","date":"14/05/2012","cars":"1330.47","bikes":"49.5"},
{"record":"15","date":"15/05/2012","cars":"1377.61","bikes":"52.32"},
{"record":"16","date":"16/05/2012","cars":"1302.12","bikes":"51.96"},
{"record":"17","date":"17/05/2012","cars":"1326.9","bikes":"49.86"},
{"record":"18","date":"18/05/2012","cars":"1181.55","bikes":"50.25"},
{"record":"19","date":"19/05/2012","cars":"1493.75","bikes":"51.24"},
{"record":"20","date":"20/05/2012","cars":"1463.9","bikes":"50.88"},
{"record":"21","date":"21/05/2012","cars":"1370.16","bikes":"51.09"},
{"record":"22","date":"22/05/2012","cars":"1403.3","bikes":"51.67"},
{"record":"23","date":"23/05/2012","cars":"1277.65","bikes":"49.3"},
{"record":"24","date":"24/05/2012","cars":"1361.94","bikes":"50.47"},
{"record":"25","date":"25/05/2012","cars":"1400.8","bikes":"51.55"},
{"record":"26","date":"26/05/2012","cars":"1289.09","bikes":"47.17"},
{"record":"27","date":"27/05/2012","cars":"1258.39","bikes":"52.12"},
{"record":"28","date":"28/05/2012","cars":"1288.71","bikes":"49.28"},
{"record":"29","date":"29/05/2012","cars":"1511.86","bikes":"50.73"},
{"record":"30","date":"30/05/2012","cars":"1300.38","bikes":"52.39"},
{"record":"31","date":"31/05/2012","cars":"1455.19","bikes":"49.53"},
{"record":"32","date":"01/06/2012","cars":"1311.89","bikes":"50.37"},
{"record":"33","date":"02/06/2012","cars":"1368.64","bikes":"50.87"},
{"record":"34","date":"03/06/2012","cars":"1360.05","bikes":"50.51"},
{"record":"35","date":"04/06/2012","cars":"1382.56","bikes":"49.67"},
{"record":"36","date":"05/06/2012","cars":"1304.15","bikes":"47.6"},
{"record":"37","date":"06/06/2012","cars":"1271.57","bikes":"50.22"},
{"record":"38","date":"07/06/2012","cars":"1442.38","bikes":"50.8"},
{"record":"39","date":"08/06/2012","cars":"1406.38","bikes":"53.14"},
{"record":"40","date":"09/06/2012","cars":"1724.16","bikes":"49.66"},
{"record":"41","date":"10/06/2012","cars":"1931.05","bikes":"53"},
{"record":"42","date":"11/06/2012","cars":"1669.47","bikes":"53.71"},
{"record":"43","date":"12/06/2012","cars":"1794.06","bikes":"51.78"},
{"record":"44","date":"13/06/2012","cars":"1625.98","bikes":"51.58"},
{"record":"45","date":"14/06/2012","cars":"1371.51","bikes":"52.36"},
{"record":"46","date":"15/06/2012","cars":"1418.05","bikes":"47.64"},
{"record":"47","date":"16/06/2012","cars":"1431","bikes":"53.14"},
{"record":"48","date":"17/06/2012","cars":"1527.21","bikes":"48.63"},
{"record":"49","date":"18/06/2012","cars":"1320.95","bikes":"51.7"},
{"record":"50","date":"19/06/2012","cars":"1396.93","bikes":"52.92"}
];
tSel1 = "cars";
tSel2 = "bikes";
data.forEach(function (d) {
d[tSel1] = +d[tSel1];
d[tSel2] = +d[tSel2];
});
var facts = crossfilter(data);
var allDimension = facts.groupAll();
var scatterDimension = facts.dimension(function(d) {return [+d[tSel1], +d[tSel2]];});
var scatterGroup = scatterDimension.group().reduceSum(function(d) { return d[tSel1]; });
var maxY1 = d3.max(data, function(d) {return d[tSel1]});
var maxY2 = d3.max(data, function(d) {return d[tSel2]});
var maxY1Plus = maxY1 + (maxY1 * 0.1);
var maxY2Plus = maxY2 + (maxY2 * 0.1);
var minY1 = d3.min(data, function(d) {return d[tSel1]});
var minY1Minus = minY1 * 0.9;
var minY2 = d3.min(data, function(d) {return d[tSel2]});
var minY2Minus = minY2 * 0.9;
xyScatterChart = dc.scatterPlot("#scatterPlot");
xyScatterChart
.width(600)
.height(400)
.margins({top: 20, right: 20, bottom: 20, left: 60})
.dimension(scatterDimension)
.group(scatterGroup)
.symbolSize(6)
.highlightedSize(15)
.brushOn(false)
.excludedOpacity(0.5)
.excludedSize(5)
.renderHorizontalGridLines(true)
.renderVerticalGridLines(true)
.x(d3.scale.linear().domain([minY1Minus,maxY1Plus]))
.y(d3.scale.linear().domain([minY2Minus,maxY2Plus]));
dc.renderAll();
dc.redrawAll();
<link href="http://dc-js.github.io/dc.js/css/dc.css" rel="stylesheet"/>
<script src="http://dc-js.github.io/dc.js/js/d3.js"></script>
<script src="http://dc-js.github.io/dc.js/js/crossfilter.js"></script>
<script src="http://dc-js.github.io/dc.js/js/dc.js"></script>
<div id="scatterPlot"></div>
References:
https://groups.google.com/forum/#!topic/dc-js-user-group/HaQMegKa_U0
https://bl.ocks.org/ctufts/298bfe4b11989960eeeecc9394e9f118
It would be awesome to include an example in dc.js, since this is something lots of people can use.
Maybe we can work together on that? I don't know the math but here's a simple way to use a composite chart to display a line on data calculated from an aggregated group.
First off, here's the composite chart with the old scatter plot embedded in it:
var composite = dc.compositeChart("#composite");
composite
.width(600)
.height(400)
.margins({top: 20, right: 20, bottom: 20, left: 60})
.dimension(scatterDimension)
.group(scatterGroup)
.compose([
dc.scatterPlot(composite)
.symbolSize(6)
.highlightedSize(15)
.brushOn(false)
.excludedOpacity(0.5)
.excludedSize(5)
.renderHorizontalGridLines(true)
.renderVerticalGridLines(true),
dc.lineChart(composite)
.group(regressionGroup(scatterGroup))
])
.x(d3.scale.linear().domain([minY1Minus,maxY1Plus]))
.y(d3.scale.linear().domain([minY2Minus,maxY2Plus]));
Note that we're supplying the scatter group to both the composite and the scatter plot. That's just because the composite chart requires a group even though it doesn't actually use it.
We've moved the parameters that have to do with coordinates to the main (composite) chart, but everything that is specific to the scatter plot stays on it. We've also added a line chart to the composite, which uses a "fake group" based on the scatter group.
This fake group is particularly fake, but it should be enough to get you started. Since I don't have time to learn the math today, I'll just pretend that the first and last points are the regression:
function regressionGroup(group) {
return {
all: function() {
var _all = group.all();
var first, last;
for(var i=0; i < _all.length; ++i) {
var key = _all[i].key;
if(!isNaN(key[0]) && !isNaN(key[1])) {
var kv = {key: key[0], value: key[1]};
if(!first)
first = kv;
last = kv;
}
}
return [first, last];
}
};
}
As with all fake groups, the idea is to calculate some group-like data when the chart asks for it (and no sooner), based on another group. Here the calculation is not very interesting, because you know how to calculate a regression and I don't. You'll want to replace first and last and the for loop with a real calculation; all this is doing is checking for valid points and keeping the first and last ones that it finds.
Interestingly, the scatter plot takes data where the key contains both x and y coordinates, but the line chart takes data where the key is x and the value is y. That's why we have the transformation kv = {key: key[0], value: key[1]}
Postscript
Note that you'll run into a dc.js bug if you put the regression guide points outside of the domain - the stack mixin is too aggressive about clipping points to the domain. There is an easy, ugly workaround that seems to work in this case: tell the line chart it has an ordinal x scale even though it doesn't:
var composite = dc.compositeChart("#composite"),
lineChart;
composite
.width(600)
// ...
.compose([
// ...
lineChart = dc.lineChart(composite)
.group(regressionGroup(scatterGroup))
])
lineChart.isOrdinal = d3.functor(true);
Yuck! But it works! This hack probably only works inside a composite!
https://jsfiddle.net/gordonwoodhull/5tpcxov1/12/
I have a fully functional example of regression. I was precisely doing it when I came here for help and I found your question. It requires regression.js (here).
This follows Gordon's excellent suggestion of a "fake group", which should really be called an inline group, or immediate group, or even group on-the-fly. Here is mine:
function myRegressionGroup(group, min, max, filter = false) {
return {
all: function() {
var _all = group.all();
var first, last;
if(filter) reg = regression.linear(_all.filter(function(k,v) {if(k.key[0]) return k.key}).map((k,v) => k.key));
else reg = regression.linear(_all.map((k,v) => k.key));
first = reg.predict(min);
last = reg.predict(max)
return [{key:first[0], value: first[1]}, {key: last[0], value: last[1]}]
}
};
}
Please notice that this function requires a crossfilter group and also the min and max from the x-scale. Since you typically have these values calculated for your xScale, all it takes is reusing them here. This is because the function uses the extremes with the predict method to calculate the two points of the regression line.
The optional filter data wrangler is for you to decide whether to remove empty values on x or not.
#Gordon, how should I do in order to include my regression example in the Examples of using dc.js?
I would like to create a bar chart based on dates in x-axis. Labels should be displayed as month (i.e. Jan, Jan'17 - preferred). Within my data I have always first date of following months, i.e. 01Jan, 01Feb, 01Mar. I have created a chart but I am not able to make it aligned.
var chart = dc.barChart("#" + el.id);
var chCategory = ndx.dimension(function(d) {return d[chCategoryName];});
chValues = chCategory.group().reduceSum(
return parseFloat(d[chValueName]);});
//set range for x-axis
var minDate = chCategory.bottom(1)[0][chCategoryName];
var maxDate = chCategory.top(1)[0][chCategoryName];
chart
.width(800)
.height(200)
.x(d3.time.scale().domain([minDate,maxDate]))
.xUnits(d3.time.months)
.dimension(chCategory)
.group(chValues)
.renderHorizontalGridLines(true)
// .centerBar(true) //does not look better
.controlsUseVisibility(true)
.ordinalColors(arrColors)
.transitionDuration(1000)
.margins({top: 10, left: 80, right: 5, bottom: 20})
I have already read post: dc.js x-axis will not display ticks as months, shows decimals instead
but I am not able to implement it in a way that will keep correct sorting for different years.
dc.js takes the domain pretty literally - the x axis stretches exactly from the beginning to the end, disregarding the width of the bars or their placement. It's a design bug.
Here are two workarounds.
keep bars centered and add padding
If you're using elasticX you can manually correct it like this:
chart.centerBar(true)
.xAxisPadding(15).xAxisPaddingUnit('day')
If you're just setting the domain manually, that's
minDate = d3.time.day.offset(minDate, -15);
maxDate = d3.time.day.offset(maxDate, 15);
align the ticks to the left of bars and correct the right side of the domain
You don't say what problem you run into when you don't center the bars. But I know the right bar can get clipped.
If you want the elasticX effect, you can implement it manually like this, offsetting the right side by a month (example):
function calc_domain(chart) {
var min = d3.min(chart.group().all(), function(kv) { return kv.key; }),
max = d3.max(chart.group().all(), function(kv) { return kv.key; });
max = d3.time.month.offset(max, 1);
chart.x().domain([min, max]);
}
chart.on('preRender', calc_domain);
chart.on('preRedraw', calc_domain);
Or without elasticX that's just:
maxDate = d3.time.month.offset(maxDate, 1);
jsFiddle:
http://jsfiddle.net/PYeFP/
I have a bar chart set up that graphs a users number of trips by day
tripVolume = dc.barChart("#trip-volume")
.width(980) // (optional) define chart width, :default = 200
.height(75) // (optional) define chart height, :default = 200
.transitionDuration(0) // (optional) define chart transition duration, :default = 500
.margins({ top: 10, right: 50, bottom: 30, left: 40 })
.dimension(tripsByDateDimension) // set dimension
.group(tripsByDateGroup) // set group
// (optional) whether chart should rescale y axis to fit data, :default = false
.elasticY(false)
// (optional) whether chart should rescale x axis to fit data, :default = false
.elasticX(false)
// define x scale
.x(d3.time.scale().domain([tripsByDateDimension.bottom(1)[0].startDate, tripsByDateDimension.top(1)[0].startDate ]))
// (optional) set filter brush rounding
.round(d3.time.day.round)
// define x axis units
.xUnits(d3.time.days)
// (optional) whether bar should be center to its x value, :default=false
.centerBar(true)
// (optional) render horizontal grid lines, :default=false
.renderHorizontalGridLines(true)
// (optional) render vertical grid lines, :default=false
.renderVerticalGridLines(true)
.brushOn(false);
The graph displays fine but I would like to filter it using some jQuery controls.
When the user selects the date I am trying to add a filter to the chart, the filter gets added but the chart does not change, even if I redraw() or render().
This is how the crossfilter is setup:
tripsCx = crossfilter(data.rows);
var allTripsGroup = tripsCx.groupAll();
var tripsByDateDimension = tripsCx.dimension(function (d) { return d.startDate; });
var tripsByDateGroup = tripsByDateDimension.group(d3.time.day);
The following are some of the methods I have used to try to apply a filter:
This should use the filterRange:
d.filter(d.dimension().top(20)[19], d.dimension().top(20)[0]);
FilterFunction:
d.filter(function (d) {
return d.getTime() > start.valueOf() && d.getTime() < end.valueOf();
});
FilterExact:
d.filter(d.dimension().top(20)[0]);
I also tried bypassing the chart and applying the filter directly on the dimension:
d.dimension().filterFunction(function (d) {
return d.getTime() > start.valueOf() && d.getTime() < end.valueOf()
});
Nothing I have done causes the chart to change.
I am beginning to think that I have the wrong expectation of what the filter function should do?
How can I manually filter the data in the dimension to have the chart updated?
I don't want to use a brush.
I will be filtering the data based on different criteria, I'm just trying to get the simple case working first.
I've spent a couple of days on this now and I'm at a loss as to what to try next.
Any help would be greatly appreciated.
Have you tried to reset your x property of the graph after setting the crossfilter filter
I have a somewhat similar case and what I do after each action that changes the filtered values is something along the lines of
.x(..).dimension(...).group(...)
after creating/setting the filters
Tried to do something like that
$('#filter').on('click', function(){
var minDate = tripsByDateDimension.top(5)[4].startDate;
var maxDate = tripsByDateDimension.top(5)[0].startDate;
console.log(tripVolume.filters());
tripVolume.filter([minDate, maxDate]);
tripVolume.x(d3.time.scale().domain([minDate,maxDate]));
console.log(tripVolume.filters());
dc.redrawAll()
});
http://jsfiddle.net/PYeFP/5/
Better answer per the discussion in the comment is to add the filter to the dimension, not the chart
Finally, one needs to realize what is mentioned in https://github.com/square/crossfilter/wiki/API-Reference#group-map-reduce
Note: a grouping intersects the crossfilter's current filters, except for the associated dimension's filter. Thus, group methods consider only records that satisfy every filter except this dimension's filter. So, if the crossfilter of payments is filtered by type and total, then group by total only observes the filter by type.
(also see https://groups.google.com/d/msg/dc-js-user-group/UFxvUND7hmY/btbAjqIIzl8J)