Reduce number of datapoints using crossfilter - d3.js

Let's say I have a 100 years worth of monthly data, total of 1200 data points, see bottom.
To plot a tiny overview line chart (e.g. just 100 data points) I have to do it manually by grouping. For instance, group the data by year, then get the average of 12 months value, iterate through every group, then finally reduced the data points to 100.
Instead of this approach, is there a convenient way using crossfilter or any other library?
[
{ date: 1900-01, value: 72000000000},
{ date: 1900-02, value: 58000000000},
{ date: 1900-03, value: 2900000000},
{ date: 1900-04, value: 31000000000},
{ date: 1900-05, value: 33000000000},
...
{ date: 1999-11, value: 30000000000},
{ date: 1999-12, value: 10000000000},
]

It's going to be the same algorithm no matter which library you use, just different ways of specifying it. In this case d3.nest is probably the easiest way to do this, but if you want quick filtering, the crossfilter way isn't too bad.
The difference between using d3.nest and crossfilter is that we're not constructing an array of values, just a single value. So we'll maintain both sum and count.
We'll also need to specify what happens when a row is removed from a bin.
var parse = d3.timeParse("%Y-%m");
data.forEach(function(d) {
// it's best to convert fields before passing to crossfilter
// because crossfilter will look at them many times
d.date = parse(d.key);
});
var cf = crossfilter(data);
var yearDim = cf.dimension(d => d3.timeYear(d.date));
var yearAvgGroup = yearDim.group().reduce(
function(p, v) { // add
p.sum += v.value;
++p.count;
p.avg = p.sum/p.count;
return p;
},
function(p, v) { // remove
p.sum -= v.value;
--p.count;
p.avg = p.count ? p.sum/p.count : 0;
return p;
},
function() { // init
return {sum: 0, count: 0, avg: 0};
}
);
Now yearAvgGroup.all() will return an array of key/value pairs, where the key is the year, and the value contains sum, count, and avg.
Crossfilter doesn't make this problem particularly convenient to solve, but reductio has a helper function for this:
var yearAvgGroup = yearDim.group();
reductio().avg(d => d.value);
Note: it doesn't matter unless you have ton of data, but it's more efficient to only compute sum and count in the group, and compute the average when it's needed.
If you're using dc.js, you can use valueAccessor for this:
// remove avg lines from the above, and
chart.dimension(yearDim)
.group(yearAvgGroup)
.valueAccessor(kv => kv.value.sum / kv.value.count);

Assuming your question is only concerned with producing the data, you can use d3-nest, without crossfilter, to average each year:
Parsing the date value, you can then format the date as a year to create a key. This groups values by key, then we rollup those values with a function to calculate the mean for a given year:
var parse = d3.timeParse("%Y-%m"); // takes: "1900-01"
var format = d3.timeFormat("%Y"); // gives: "1900"
var means = d3.nest()
.key(function(d) { return format(parse(d.date)); })
.rollup(function(values) { return d3.mean(values, function(d) {return d.value; }) })
.entries(data);
Which gives us the following structure:
[
{
"key": "1900",
"value": 39380000000
},
{
"key": "1999",
"value": 20000000000
}
]
var data = [
{ date: "1900-01", value: 72000000000},
{ date: "1900-02", value: 58000000000},
{ date: "1900-03", value: 2900000000},
{ date: "1900-04", value: 31000000000},
{ date: "1900-05", value: 33000000000},
{ date: "1999-11", value: 30000000000},
{ date: "1999-12", value: 10000000000},
];
var parse = d3.timeParse("%Y-%m");
var format = d3.timeFormat("%Y");
var means = d3.nest()
.key(function(d) { return format(parse(d.date)); })
.rollup(function(values) { return d3.mean(values, function(d) {return d.value; }) })
.entries(data);
console.log(means);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/5.7.0/d3.min.js"></script>

Related

Display multiple bar on barChart from a custom reducer

I have a group with custom reducer calculating various total and average values. The goal is to show them all on the same barChart. But I can only get the first bar to show. Here is the JSFiddler
https://jsfiddle.net/71k0guxe/15/
Is it possible to show all the value on the barChart?
Thanks in advance!
Data
ID,SurveySent,ResponseReceived
1,Yes,No
2,No,No
3,Yes,Yes
4,No,No
5,Yes,Yes
6,No,No
7,Yes,No
8,No,No
9,Yes,No
10,No,No
Code
var chart = dc.barChart("#test");
//d3.csv("morley.csv", function(error, experiments) {
var experiments = d3.csvParse(d3.select('pre#data').text());
var ndx = crossfilter(experiments),
dimStat = ndx.dimension(function(d) {return "Statistics";}),
groupStat = dimStat.group().reduce(reduceAdd, reduceRemove, reduceInitial);
function reduceAdd(p, v) {
++p.count;
if (v.SurveySent === "Yes") p.sent++;
if (v.ResponseReceived === "Yes") p.received++;
return p;
}
function reduceRemove(p, v) {
--p.count;
if (v.SurveySent === "Yes") p.sent--;
if (v.ResponseReceived === "Yes") p.received--;
return p;
}
function reduceInitial() {
return {count: 0, sent: 0, received: 0};
}
chart
.width(400)
.height(400)
.xUnits(dc.units.ordinal)
.label(function(d) { return d.data.value })
.elasticY(true)
.x(d3.scaleOrdinal().domain(["Total", "Sent", "Received"]))
.brushOn(false)
.yAxisLabel("This is the Y Axis!")
.dimension(dimStat)
.group(groupStat)
.valueAccessor(function (d) {
//Is it possible to return count sent and received all from here?
return d.value.count;
})
.on('renderlet', function(chart) {
chart.selectAll('rect').on("click", function(d) {
console.log("click!", d);
});
});
chart.render();
Just got some idea from the FAQ section of dc.js/wiki/FAQ
Fake Groups
"dc.js uses a very limited part of the crossfilter API - in fact, it really only uses dimension.filter() and group.all()."
I don't care about filtering, so i just need to mark up my own group.all. Basically transpose it from one row to multiple row. Works my purpose.
/* solution */
var groupStatTranposed = group_transpose(groupStat);
function group_transpose(source_group, f) {
return {
all:function () {
return [
{key: "Total", value: source_group.all()[0].value.count},
{key: "Sent", value: source_group.all()[0].value.sent},
{key: "Received", value: source_group.all()[0].value.received}
];
}
};
}
//use groupStatTranposed in the chart.
/** solution */

How to decide dimensions and groups in dc.js?

I am new to dc.js and facing issues in deciding dimensions and groups. I have data like this
this.data = [
{Type:'Type1', Day:1, Count: 20},
{Type:'Type2', Day:1, Count: 10},
{Type:'Type1', Day:2, Count: 30},
{Type:'Type2', Day:2, Count: 10}
]
I have to show a composite chart of two linecharts one for type Type1 and other for Type2. My x-axis will be Day. So one of my dimensions will be Day
var ndx = crossfilter(this.data);
var dayDim = ndx.dimension(function(d) { return d.Day; })
How the grouping will be done? If I do it on Count, the total count of a particular Day shows up which I don't want.
Your question isn't entirely clear, but it sounds like you want to group by both Type and Day
One way to do it is to use composite keys:
var typeDayDimension = ndx.dimension(function(d) {return [d.Type, d.Day]; }),
typeDayGroup = typeDayDimension.group().reduceSum(function(d) { return d.Count; });
Then you could use the series chart to generate two line charts inside a composite chart.
var chart = dc.seriesChart("#test");
chart
.width(768)
.height(480)
.chart(function(c) { return dc.lineChart(c); })
// ...
.dimension(typeDayDimension)
.group(typeDayGroup)
.seriesAccessor(function(d) {return d.key[0];})
.keyAccessor(function(d) {return +d.key[1];}) // convert to number
// ...
See the series chart example for more details.
Although what Gordon suggested is working perfectly fine, if you want to achieve the same result using composite chart then you can use group.reduce(add, remove, initial) method.
function reduceAdd(p, v) {
if (v.Type === "Type1") {
p.docCount += v.Count;
}
return p;
}
function reduceRemove(p, v) {
if (v.Type === "Type1") {
p.docCount -= v.Count;
}
return p;
}
function reduceInitial() {
return { docCount: 0 };
}
Here's an example: http://jsfiddle.net/curtisp/7frw79q6
Quoting Gordon:
Series chart is just a composite chart with the automatic splitting of the data and generation of the child charts.

Crossfilter isn't applying filter to multi line-chart. What am I missing?

I am new to using crossfilter, dc.js, and d3.js. I am struggling to get the filters to apply to my composite line chart. I've gone through several tutorials, but apparently am missing something as the charts don't change or look different at all if I remove the dimension with the filter applied.
Here is an example of my data:
var data = array(
{
price:{value: 38}
shipment:{start_date: "2017-12-06", end_date: "2018-01-15"}
side:"sell"
},
{
price:{value: 44}
shipment:{start_date: "2017-10-08", end_date: "2018-01-15"}
side:"sell"
},
{
price:{value: 38}
shipment:{start_date: "2017-11-15", end_date: "2018-01-15"}
side:"buy"
},
{
price:{value: 38}
shipment:{start_date: "2017-10-25", end_date: "2018-01-15"}
side:"buy"
}
);
And here is where I declare my dimensions:
` var crossFilteredData = crossfilter(data);
// Dimension by start_date
var dateDimension = crossFilteredData.dimension(function(d) {
var date = Date.parse(d.shipment.start_date);
return date;
});
// Dimension by side
var sideDimension = crossFilteredData.dimension(function(d) {
console.log(d.side);
return d.side;
});
sideDimension.filter("buy");
sideDimension.top(Infinity);`
After declaring my dimensions and applying a filter to the sideDimension, I am building my group and calculating a date's max price and min price for each day:
var performanceByDateGroup = dateDimension.group().reduce(
function (p, v) {
++p.count;
p.sum += v.price.value;
// Calculate Min
if (p.minPrice > v.price.value) {
p.minPrice = v.price.value;
}
// Calculate Max
if (p.maxPrice < v.price.value) {
p.maxPrice = v.price.value;
}
return p;
},
function (p, v) {
--p.count;
p.sum -= v.price.value;
return p;
},
function () {
return {
count: 0,
sum: 0,
minPrice: 1000,
maxPrice: 0
};
}
);
Lastly, I put the dimension and groups into the composite line chart:
priceChart
.width(960)
.height(400)
.margins({top: 10, right: 10, bottom: 40, left: 10})
.transitionDuration(500)
.elasticY(true)
.renderHorizontalGridLines(true)
.yAxisLabel('Price')
.shareTitle(false)
.x(d3.time.scale().domain([Date.parse("2017-11-01"), Date.parse("2018-03-31")]))
.xAxisLabel('Shipment Start Date')
.legend(dc.legend().x(40).y(0).itemHeight(16).gap(4))
.compose([
dc.lineChart(priceChart)
.dimension(dateDimension)
.group(performanceByDateGroup, 'Min Price')
.colors('red')
.renderTitle(true)
.title(function(d) {
return 'Min: $' + d.value.minPrice.toFixed(2);
})
.valueAccessor(function (d) {
return d.value.minPrice;
}),
dc.lineChart(priceChart)
.dimension(dateDimension)
.group(performanceByDateGroup, 'Max Price')
.colors('green')
.renderTitle(true)
.title(function(d) {
return 'Max: $' + d.value.maxPrice.toFixed(2);
})
.valueAccessor(function (d) {
return d.value.maxPrice;
})
])
.brushOn(false);
dc.renderAll();
The chart shows all the plotted points, as if the entire sideDimension variable is not being recognized at all. If I remove the sideDimension variable and filter, the chart looks the exact same.
I greatly appreciate any help or suggestions you can offer.
It's difficult, but not impossible to calculate min and max values using a crossfilter reduction.
When crossfilter is evaluating a group, it will first add all the records and then remove the records that don't match the filters. This is so that the result is consistent whether or not the filters existed when the dimension was created. (For example, you want zeros for values that exist but are filtered out.)
In this case, you are not doing anything with minPrice and maxPrice inside of your reduceRemove function:
function (p, v) {
--p.count;
p.sum -= v.price.value;
return p;
},
So as we observe, the records are added but never removed.
However, the situation is worse than this, because min and max are more complicated aggregations than sums and averages. Think about it: you can remember the min and max, but when those are removed, what value do you fall back on?
reductio has handy functions for doing min and max, or if you want to do it yourself, this example shows how.

Update multiple tags rowchart in dc.js

I am looking for how to create a rowchart in dc.js to show and filter items with multiple tags. I've summed up a few answers given on stack overflow, and now have a working code.
var data = [
{id:1, tags: [1,2,3]},
{id:2, tags: [3]},
{id:3, tags: [1]},
{id:4, tags: [2,3]},
{id:5, tags: [3]},
{id:6, tags: [1,2,3]},
{id:7, tags: [1,2]}];
var content=crossfilter(data);
var idDimension = content.dimension(function (d) { return d.id; });
var grid = dc.dataTable("#idgrid");
grid
.dimension(idDimension)
.group(function(d){ return "ITEMS" })
.columns([
function(d){return d.id+" : "; },
function(d){return d.tags;},
])
function reduceAdd(p, v) {
v.tags.forEach (function(val, idx) {
p[val] = (p[val] || 0) + 1; //increment counts
});
return p;
}
function reduceRemove(p, v) {
v.tags.forEach (function(val, idx) {
p[val] = (p[val] || 0) - 1; //decrement counts
});
return p;
}
function reduceInitial() {
return {};
}
var tags = content.dimension(function (d) { return d.tags });
var groupall = tags.groupAll();
var tagsGroup = groupall.reduce(reduceAdd, reduceRemove, reduceInitial).value();
tagsGroup.all = function() {
var newObject = [];
for (var key in this) {
if (this.hasOwnProperty(key) && key != "") {
newObject.push({
key: key,
value: this[key]
});
}
}
return newObject;
}
var tagsChart = dc.rowChart("#idtags")
tagsChart
.width(400)
.height(200)
.renderLabel(true)
.labelOffsetY(10)
.gap(2)
.group(tagsGroup)
.dimension(tags)
.elasticX(true)
.transitionDuration(1000)
.colors(d3.scale.category10())
.label(function (d) { return d.key })
.filterHandler (function (dimension, filters) {
var fm = filters.map(Number)
dimension.filter(null);
if (fm.length === 0)
dimension.filter(null);
else
dimension.filterFunction(function (d) {
for (var i=0; i < fm.length; i++) {
if (d.indexOf(fm[i]) <0) return false;
}
return true;
});
return filters;
}
)
.xAxis().ticks(5);
It can be seen on http://jsfiddle.net/ewm76uru/24/
Nevertheless, the rowchart is not updated when I filter by one tag. For example, on jsfiddle, if you select tag '1', it filters items 1,3,6 and 7. Fine. But the rowchart is not updated... I Should have tag '3' count lowered to 2 for example.
Is there a way to have the rowchart tags counts updated each time I filter by tags ?
Thanks.
After a long struggle, I think I have finally gathered a working solution.
As said on crossfilter documentation : "a grouping intersects the crossfilter's current filters, except for the associated dimension's filter"
So, the tags dimension is not filtered when tag selection is modified, and there is no flag or function to force this reset. Nevertheless, there is a workaround (given here : https://github.com/square/crossfilter/issues/146).
The idea is to duplicate the 'tags' dimension, and to use it as the filtered dimension :
var tags = content.dimension(function (d) { return d.tags });
// duplicate the dimension
var tags2 = content.dimension(function (d) { return d.tags });
var groupall = tags.groupAll();
...
tagsChart
.group(tagsGroup)
.dimension(tags2) // and use this duplicated dimension
as it can been seen here :
http://jsfiddle.net/ewm76uru/30/
I hope this will help.

dc.js - min value from group per day

I have the following data structure and would like to display a lineChart with the minimum value of 'amount' (group) by Day (of date) (dimension).
var data = [
{date: "2014-01-01", amount: 10},
{date: "2014-01-01", amount: 1},
{date: "2014-01-15", amount: 0},
{date: "2014-01-15", amount: 10 },
{date: "2014-02-20", amount: 100 },
{date: "2014-02-20", amount: 10 },
];
Where as I would normally be doing something along the following lines, I'm not sure how to find the min in the group.
var dateDim = facts.dimension(function (d) {return d3.time.day(d.date);});
var dateDimGroup = dateDim.group().reduceSum(function (d) { return d.amount; })
Is this possible? I can't seem to find any examples of this so any help would be really appreciated.
Thanks!
You're going to need to keep a custom grouping. The basic idea is that you maintain an array (ordered is best) of all the values in a group. In your add function, you the current value to the array and assign the first value to a 'min' property of the group. In your remove function you remove values, you remove the current value from the array and then assign the first value to the 'min' property (and check if the list is empty, in which case set it to undefined or something along those lines).
You functions will look something like this:
function add(accessor) {
var i;
var bisect = crossfilter.bisect.by(function(d) { return d; }).left;
return function (p, v) {
// Not sure if this is more efficient than sorting.
i = bisect(p.valueList, accessor(v), 0, p.valueList.length);
p.valueList.splice(i, 0, accessor(v));
p.min = p.valueList[0];
return p;
};
};
function remove(accessor) {
var i;
var bisect = crossfilter.bisect.by(function(d) { return d; }).left;
return function (p, v) {
i = bisect(p.valueList, accessor(v), 0, p.valueList.length);
// Value already exists or something has gone terribly wrong.
p.valueList.splice(i, 1);
p.min = p.valueList[0];
return p;
};
};
function initial() {
return function () {
return {
valueList: [],
min: undefined
};
};
}
'accessor' is a function to get at the value you want the minimum of, like in reduceSum. Call each of these functions to return the function you use in the corresponding place in the .group(add, remove, initial) function.
This code is pretty much ripped straight out of reductio, so you could also use reductio and do:
var dateDim = facts.dimension(function (d) {return d3.time.day(d.date);});
var dateDimGroup = reductio().min(function (d) { return d.amount; })(dateDim.group());
Your dateDimGroup will then have a 'min' property that should give you the minimum amount for every date.

Resources