Let's say I have a 100 years worth of monthly data, total of 1200 data points, see bottom.
To plot a tiny overview line chart (e.g. just 100 data points) I have to do it manually by grouping. For instance, group the data by year, then get the average of 12 months value, iterate through every group, then finally reduced the data points to 100.
Instead of this approach, is there a convenient way using crossfilter or any other library?
[
{ date: 1900-01, value: 72000000000},
{ date: 1900-02, value: 58000000000},
{ date: 1900-03, value: 2900000000},
{ date: 1900-04, value: 31000000000},
{ date: 1900-05, value: 33000000000},
...
{ date: 1999-11, value: 30000000000},
{ date: 1999-12, value: 10000000000},
]
It's going to be the same algorithm no matter which library you use, just different ways of specifying it. In this case d3.nest is probably the easiest way to do this, but if you want quick filtering, the crossfilter way isn't too bad.
The difference between using d3.nest and crossfilter is that we're not constructing an array of values, just a single value. So we'll maintain both sum and count.
We'll also need to specify what happens when a row is removed from a bin.
var parse = d3.timeParse("%Y-%m");
data.forEach(function(d) {
// it's best to convert fields before passing to crossfilter
// because crossfilter will look at them many times
d.date = parse(d.key);
});
var cf = crossfilter(data);
var yearDim = cf.dimension(d => d3.timeYear(d.date));
var yearAvgGroup = yearDim.group().reduce(
function(p, v) { // add
p.sum += v.value;
++p.count;
p.avg = p.sum/p.count;
return p;
},
function(p, v) { // remove
p.sum -= v.value;
--p.count;
p.avg = p.count ? p.sum/p.count : 0;
return p;
},
function() { // init
return {sum: 0, count: 0, avg: 0};
}
);
Now yearAvgGroup.all() will return an array of key/value pairs, where the key is the year, and the value contains sum, count, and avg.
Crossfilter doesn't make this problem particularly convenient to solve, but reductio has a helper function for this:
var yearAvgGroup = yearDim.group();
reductio().avg(d => d.value);
Note: it doesn't matter unless you have ton of data, but it's more efficient to only compute sum and count in the group, and compute the average when it's needed.
If you're using dc.js, you can use valueAccessor for this:
// remove avg lines from the above, and
chart.dimension(yearDim)
.group(yearAvgGroup)
.valueAccessor(kv => kv.value.sum / kv.value.count);
Assuming your question is only concerned with producing the data, you can use d3-nest, without crossfilter, to average each year:
Parsing the date value, you can then format the date as a year to create a key. This groups values by key, then we rollup those values with a function to calculate the mean for a given year:
var parse = d3.timeParse("%Y-%m"); // takes: "1900-01"
var format = d3.timeFormat("%Y"); // gives: "1900"
var means = d3.nest()
.key(function(d) { return format(parse(d.date)); })
.rollup(function(values) { return d3.mean(values, function(d) {return d.value; }) })
.entries(data);
Which gives us the following structure:
[
{
"key": "1900",
"value": 39380000000
},
{
"key": "1999",
"value": 20000000000
}
]
var data = [
{ date: "1900-01", value: 72000000000},
{ date: "1900-02", value: 58000000000},
{ date: "1900-03", value: 2900000000},
{ date: "1900-04", value: 31000000000},
{ date: "1900-05", value: 33000000000},
{ date: "1999-11", value: 30000000000},
{ date: "1999-12", value: 10000000000},
];
var parse = d3.timeParse("%Y-%m");
var format = d3.timeFormat("%Y");
var means = d3.nest()
.key(function(d) { return format(parse(d.date)); })
.rollup(function(values) { return d3.mean(values, function(d) {return d.value; }) })
.entries(data);
console.log(means);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/5.7.0/d3.min.js"></script>
I have data that looks like this:
var records = [
{id: '1', cat: 'A'},
{id: '2', cat: 'A'},
{id: '3', cat: 'B'},
{id: '4', cat: 'B'},
{id: '5', cat: 'B'},
{id: '6', cat: 'C'}
];
I want to create a dc.numberDisplay that displays the count of the number of unique categories, 3 in the example data above (A, B, & C).
This is what I'm currently doing:
var ndx = crossfilter(data); // init crossfilter
// create dimension based on category
var categoryDimension = ndx.dimension(
function (d) {
return d.category;
}
);
// Group by category
var categoryGroup = categoryDimension.group();
var categoryCount = dc.numberDisplay('#category-count'); // An empty span
categoryCount
.group(categoryGroup)
.valueAccessor(
function (d) { return d.value; }
);
The problem is that the numberDisplay displays 2 instead of 3. When debugging, I found that when the valueAccessor is called, d is the count of the number of elements of category A instead of the count of the number of categories.
How can I solve this problem?
UPDATE: Thanks to Nathan's solution, here is a working code snippet (ES2016 style)
const categoryDimension = claims.dimension(
(d) => {
return d.cat;
}
);
const categoryGroup = categoryDimension.groupAll().reduce(
(p, v) => { // add element
const cat = v.cat;
const count = p.categories.get(cat) || 0;
p.categories.set(cat, count + 1);
return p;
},
(p, v) => { // remove element
const cat = v.cat;
const count = p.categories.get(cat);
if (count === 1) {
p.categories.delete(cat);
} else {
p.categories.set(cat, count - 1);
}
return p;
},
() => { // init
return {
categories: new Map()
};
});
categoryCount
.group(categoryGroup)
.valueAccessor(
(d) => {
return d.categories.size;
}
);
You will need to use groupAll() since the number-display only looks at the top group. Then provide custom reduce functions to track unique categories. Finally, when DC.js pulls the value from the top group (there is only one) - just return the number of categories (which is the number of keys in the p object).
var categoryGroup = categoryDimension.groupAll().reduce(
function (p, v) { //add
if(p[v.cat]) {
p[v.cat]++;
} else {
p[v.cat] = 1;
}
return p;
},
function (p, v) { //remove
p[v.cat]--;
if(p[v.cat] === 0) {
delete p[v.cat];
}
return p;
},
function () { //init
//initial p - only one since using groupAll
return {};
}
);
console.debug("groups", categoryGroup.value());
dc.numberDisplay('#category-count')
.group(categoryGroup)
.valueAccessor(
function (d) { return Object.keys(d).length; }
);
I am looking for how to create a rowchart in dc.js to show and filter items with multiple tags. I've summed up a few answers given on stack overflow, and now have a working code.
var data = [
{id:1, tags: [1,2,3]},
{id:2, tags: [3]},
{id:3, tags: [1]},
{id:4, tags: [2,3]},
{id:5, tags: [3]},
{id:6, tags: [1,2,3]},
{id:7, tags: [1,2]}];
var content=crossfilter(data);
var idDimension = content.dimension(function (d) { return d.id; });
var grid = dc.dataTable("#idgrid");
grid
.dimension(idDimension)
.group(function(d){ return "ITEMS" })
.columns([
function(d){return d.id+" : "; },
function(d){return d.tags;},
])
function reduceAdd(p, v) {
v.tags.forEach (function(val, idx) {
p[val] = (p[val] || 0) + 1; //increment counts
});
return p;
}
function reduceRemove(p, v) {
v.tags.forEach (function(val, idx) {
p[val] = (p[val] || 0) - 1; //decrement counts
});
return p;
}
function reduceInitial() {
return {};
}
var tags = content.dimension(function (d) { return d.tags });
var groupall = tags.groupAll();
var tagsGroup = groupall.reduce(reduceAdd, reduceRemove, reduceInitial).value();
tagsGroup.all = function() {
var newObject = [];
for (var key in this) {
if (this.hasOwnProperty(key) && key != "") {
newObject.push({
key: key,
value: this[key]
});
}
}
return newObject;
}
var tagsChart = dc.rowChart("#idtags")
tagsChart
.width(400)
.height(200)
.renderLabel(true)
.labelOffsetY(10)
.gap(2)
.group(tagsGroup)
.dimension(tags)
.elasticX(true)
.transitionDuration(1000)
.colors(d3.scale.category10())
.label(function (d) { return d.key })
.filterHandler (function (dimension, filters) {
var fm = filters.map(Number)
dimension.filter(null);
if (fm.length === 0)
dimension.filter(null);
else
dimension.filterFunction(function (d) {
for (var i=0; i < fm.length; i++) {
if (d.indexOf(fm[i]) <0) return false;
}
return true;
});
return filters;
}
)
.xAxis().ticks(5);
It can be seen on http://jsfiddle.net/ewm76uru/24/
Nevertheless, the rowchart is not updated when I filter by one tag. For example, on jsfiddle, if you select tag '1', it filters items 1,3,6 and 7. Fine. But the rowchart is not updated... I Should have tag '3' count lowered to 2 for example.
Is there a way to have the rowchart tags counts updated each time I filter by tags ?
Thanks.
After a long struggle, I think I have finally gathered a working solution.
As said on crossfilter documentation : "a grouping intersects the crossfilter's current filters, except for the associated dimension's filter"
So, the tags dimension is not filtered when tag selection is modified, and there is no flag or function to force this reset. Nevertheless, there is a workaround (given here : https://github.com/square/crossfilter/issues/146).
The idea is to duplicate the 'tags' dimension, and to use it as the filtered dimension :
var tags = content.dimension(function (d) { return d.tags });
// duplicate the dimension
var tags2 = content.dimension(function (d) { return d.tags });
var groupall = tags.groupAll();
...
tagsChart
.group(tagsGroup)
.dimension(tags2) // and use this duplicated dimension
as it can been seen here :
http://jsfiddle.net/ewm76uru/30/
I hope this will help.
Consider the following tabular data (just an example):
A,B,C,D,x,y,z
a0,b0,c0,d0,0.007,0.710,0.990
a0,b0,c0,d1,0.283,0.040,1.027
a0,b0,c1,d0,0.017,0.688,2.840
a0,b0,c1,d1,0.167,0.132,2.471
a0,b1,c0,d0,0.041,0.851,1.078
a0,b1,c0,d1,0.235,1.027,1.027
a0,b1,c1,d0,0.037,0.934,2.282
a0,b1,c1,d1,0.023,1.049,2.826
a1,b0,c0,d0,0.912,0.425,1.055
a1,b0,c0,d1,0.329,0.932,0.836
a1,b0,c1,d0,0.481,0.681,0.997
a1,b0,c1,d1,0.782,0.595,2.294
a1,b1,c0,d0,0.264,0.918,0.857
a1,b1,c0,d1,0.053,1.001,0.920
a1,b1,c1,d0,1.161,1.090,1.470
a1,b1,c1,d1,0.130,0.992,2.121
Note that each combination of distinct values for columns A, B, C, and D occurs exactly once in this table. Hence, one can think of this subset of columns as the "key columns", and the remaining columns as the "value columns".
Let's say this data is in some file data.csv, and that we read this file in with d3.csv, into the callback argument data, like so:
d3.csv('data.csv', function (error, data) {
...
});
I'm looking for a convenient d3.js manipulation to transform data so that the C column is "pivoted". By this I mean that the "value" columns of the transformed table are those obtained by "crossing" the values of the C column with the original "value" columns, x, y, and z. In other words, in csv format, the transformed table would look like this:
A,B,D,x_c0,x_c1,y_c0,y_c1,z_c0,z_c1
a0,b0,d0,0.007,0.017,0.710,0.688,0.990,2.840
a0,b0,d1,0.283,0.167,0.040,0.132,1.027,2.471
a0,b1,d0,0.041,0.037,0.851,0.934,1.078,2.282
a0,b1,d1,0.235,0.023,1.027,1.049,1.027,2.826
a1,b0,d0,0.912,0.481,0.425,0.681,1.055,0.997
a1,b0,d1,0.329,0.782,0.932,0.595,0.836,2.294
a1,b1,d0,0.264,1.161,0.918,1.090,0.857,1.470
a1,b1,d1,0.053,0.130,1.001,0.992,0.920,2.121
In case there's no easy way to do this, a simpler (but still useful) variant would be to do a similar transformation after first discarding all but one of the "value" columns. For example, after discarding the x and y columns, pivoting the C column would produce (in csv format):
A,B,D,c0,c1
a0,b0,d0,0.990,2.840
a0,b0,d1,1.027,2.471
a0,b1,d0,1.078,2.282
a0,b1,d1,1.027,2.826
a1,b0,d0,1.055,0.997
a1,b0,d1,0.836,2.294
a1,b1,d0,0.857,1.470
a1,b1,d1,0.920,2.121
The simplification lies in that now the original value column (z) can be simply replaced by a set of columns named after the values (c0 and c1 in this case) in the column that was pivoted (C).
You are looking for d3.nest:
d3.csv('data.csv', function (data) {
var nester = d3.nest()
.key(function (d) { return d.A; })
.key(function (d) { return d.B; })
.key(function (d) { return d.D; })
.rollup(function (values) {
var sortedValues = values.sort(function (x, y) {
return x.C < y.C ? -1 : x.C > y.C ? 1 : 0;
});
var mkKey = function (c, name, v) {
return {
name: 'C_' + c + '_' + name,
value: v
};
}
var pivotedX = sortedValues.map(function (d) { return mkKey(d.C, 'x', d.x); }),
pivotedY = sortedValues.map(function (d) { return mkKey(d.C, 'y', d.y); }),
pivotedZ = sortedValues.map(function (d) { return mkKey(d.C, 'z', d.z); });
return Array.prototype.concat.apply([], [pivotedX, pivotedY, pivotedZ]);
});
var nestedData = nester.entries(data);
var pivotedData = [];
nestedData.forEach(function (kv1) {
var a = kv1.key;
kv1.values.forEach(function (kv2) {
var b = kv2.key;
kv2.values.forEach(function (kv3) {
var d = kv3.key;
var obj = {
A: a,
B: b,
D: d
};
kv3.values.forEach(function (d){
obj[d.name] = d.value;
})
pivotedData.push(obj);
})
})
})
console.log(JSON.stringify(pivotedData, null, ' '));
});
The result in nestedData would be of the following form:
[
{
"A": "a0",
"B": "b0",
"D": "d0",
"C_c0_x": "0.007",
"C_c1_x": "0.017",
"C_c0_y": "0.710",
"C_c1_y": "0.688",
"C_c0_z": "0.990",
"C_c1_z": "2.840"
},
...,
{
"A": "a1",
"B": "b1",
"D": "d1",
"C_c0_x": "0.053",
"C_c1_x": "0.130",
"C_c0_y": "1.001",
"C_c1_y": "0.992",
"C_c0_z": "0.920",
"C_c1_z": "2.121"
}
]
Demo Look at script.js and the output on the console.