How to pivot a table with d3.js? - d3.js

Consider the following tabular data (just an example):
A,B,C,D,x,y,z
a0,b0,c0,d0,0.007,0.710,0.990
a0,b0,c0,d1,0.283,0.040,1.027
a0,b0,c1,d0,0.017,0.688,2.840
a0,b0,c1,d1,0.167,0.132,2.471
a0,b1,c0,d0,0.041,0.851,1.078
a0,b1,c0,d1,0.235,1.027,1.027
a0,b1,c1,d0,0.037,0.934,2.282
a0,b1,c1,d1,0.023,1.049,2.826
a1,b0,c0,d0,0.912,0.425,1.055
a1,b0,c0,d1,0.329,0.932,0.836
a1,b0,c1,d0,0.481,0.681,0.997
a1,b0,c1,d1,0.782,0.595,2.294
a1,b1,c0,d0,0.264,0.918,0.857
a1,b1,c0,d1,0.053,1.001,0.920
a1,b1,c1,d0,1.161,1.090,1.470
a1,b1,c1,d1,0.130,0.992,2.121
Note that each combination of distinct values for columns A, B, C, and D occurs exactly once in this table. Hence, one can think of this subset of columns as the "key columns", and the remaining columns as the "value columns".
Let's say this data is in some file data.csv, and that we read this file in with d3.csv, into the callback argument data, like so:
d3.csv('data.csv', function (error, data) {
...
});
I'm looking for a convenient d3.js manipulation to transform data so that the C column is "pivoted". By this I mean that the "value" columns of the transformed table are those obtained by "crossing" the values of the C column with the original "value" columns, x, y, and z. In other words, in csv format, the transformed table would look like this:
A,B,D,x_c0,x_c1,y_c0,y_c1,z_c0,z_c1
a0,b0,d0,0.007,0.017,0.710,0.688,0.990,2.840
a0,b0,d1,0.283,0.167,0.040,0.132,1.027,2.471
a0,b1,d0,0.041,0.037,0.851,0.934,1.078,2.282
a0,b1,d1,0.235,0.023,1.027,1.049,1.027,2.826
a1,b0,d0,0.912,0.481,0.425,0.681,1.055,0.997
a1,b0,d1,0.329,0.782,0.932,0.595,0.836,2.294
a1,b1,d0,0.264,1.161,0.918,1.090,0.857,1.470
a1,b1,d1,0.053,0.130,1.001,0.992,0.920,2.121
In case there's no easy way to do this, a simpler (but still useful) variant would be to do a similar transformation after first discarding all but one of the "value" columns. For example, after discarding the x and y columns, pivoting the C column would produce (in csv format):
A,B,D,c0,c1
a0,b0,d0,0.990,2.840
a0,b0,d1,1.027,2.471
a0,b1,d0,1.078,2.282
a0,b1,d1,1.027,2.826
a1,b0,d0,1.055,0.997
a1,b0,d1,0.836,2.294
a1,b1,d0,0.857,1.470
a1,b1,d1,0.920,2.121
The simplification lies in that now the original value column (z) can be simply replaced by a set of columns named after the values (c0 and c1 in this case) in the column that was pivoted (C).

You are looking for d3.nest:
d3.csv('data.csv', function (data) {
var nester = d3.nest()
.key(function (d) { return d.A; })
.key(function (d) { return d.B; })
.key(function (d) { return d.D; })
.rollup(function (values) {
var sortedValues = values.sort(function (x, y) {
return x.C < y.C ? -1 : x.C > y.C ? 1 : 0;
});
var mkKey = function (c, name, v) {
return {
name: 'C_' + c + '_' + name,
value: v
};
}
var pivotedX = sortedValues.map(function (d) { return mkKey(d.C, 'x', d.x); }),
pivotedY = sortedValues.map(function (d) { return mkKey(d.C, 'y', d.y); }),
pivotedZ = sortedValues.map(function (d) { return mkKey(d.C, 'z', d.z); });
return Array.prototype.concat.apply([], [pivotedX, pivotedY, pivotedZ]);
});
var nestedData = nester.entries(data);
var pivotedData = [];
nestedData.forEach(function (kv1) {
var a = kv1.key;
kv1.values.forEach(function (kv2) {
var b = kv2.key;
kv2.values.forEach(function (kv3) {
var d = kv3.key;
var obj = {
A: a,
B: b,
D: d
};
kv3.values.forEach(function (d){
obj[d.name] = d.value;
})
pivotedData.push(obj);
})
})
})
console.log(JSON.stringify(pivotedData, null, ' '));
});
The result in nestedData would be of the following form:
[
{
"A": "a0",
"B": "b0",
"D": "d0",
"C_c0_x": "0.007",
"C_c1_x": "0.017",
"C_c0_y": "0.710",
"C_c1_y": "0.688",
"C_c0_z": "0.990",
"C_c1_z": "2.840"
},
...,
{
"A": "a1",
"B": "b1",
"D": "d1",
"C_c0_x": "0.053",
"C_c1_x": "0.130",
"C_c0_y": "1.001",
"C_c1_y": "0.992",
"C_c0_z": "0.920",
"C_c1_z": "2.121"
}
]
Demo Look at script.js and the output on the console.

Related

Display top n values based on count and not on alphabets dc.js

I have tried using the code below to display the top 3 values based on the count of the val in the json provided below.
yearRingChart.width(300).height(300).dimension(yearDim).group(
spendPertime).innerRadius(50).controlsUseVisibility(true)
.data(function(d) {
return d.order(
function(d) {
return d.val;
}).top(3);
}).ordering(function(d) {
return -d.val;
});
But this is getting sorted based on alphabetical order and not based on val.
I need to display the top 3 names with highest val in the json provided.
The code snippet is provided below -
var yearRingChart = dc.pieChart("#chart-ring-year"),
spendHistChart = dc.barChart("#chart-hist-spend"),
spenderRowChart = dc.rowChart("#chart-row-spenders");
var table = dc.dataTable('#table');
// use static or load via d3.csv("spendData.csv", function(error, spendData) {/* do stuff */});
var spendData = [{
Name : 'A',
val : '100',
time : 9,
place : 'Kolkata'
}, {
Name : 'B',
val : '40',
time : 10,
place : 'Angalore'
}, {
Name : 'C',
val : '5',
time : 11,
place : 'Raipur'
}, {
Name : 'A',
val : '70',
time : 12,
place : 'Chennai'
}, {
Name : 'B',
val : '20',
time : 10,
place : 'Mumbai'
}];
// normalize/parse data
spendData.forEach(function(d) {
d.val = d.val.match(/\d+/)[0];
});
// set crossfilter
var ndx = crossfilter(spendData), yearDim =
ndx.dimension(function(
d) {
return d.place;
}), spendDim = ndx.dimension(function(d) {
return Math.floor(d.val / 10);
}), nameDim = ndx.dimension(function(d) {
return d.Name;
}), spendPertime = yearDim.group().reduceSum(function(d) {
return +d.val;
})
, spendPerSev = yearDim.group().reduceSum(function(d) {
return +d.val;
}), spendPerName = nameDim.group().reduceSum(function(d) {
return +d.val;
}), spendHist = spendDim.group().reduceCount();
yearRingChart.width(300).height(300).dimension(yearDim).group(
spendPertime).innerRadius(50).controlsUseVisibility(true)
.data(function(d) {
return d.order(
function(d) {
return d.val;
}).top(3);
}).ordering(function(d) {
return -d.ue;
});
I am a newbie in dc.js. Any help will be appreciated.
Thankyou
I suspect that the big problem you are running into is that .ordering() - which is the right way to do this - takes group data, not raw data. Once you have aggregated your data using a group, you are going to have an array of {key,value} pairs, not your original array of data.
So I think when you attempt to read d.val or d.ue, you're just getting undefined, which results in no sorting. You could verify this by putting a breakpoint or console.log in your ordering callbacks. That's how I usually debug these kinds of things, when I have a running example.
group.order isn't going to have much effect, so I would suggest removing that.
I'd also suggest using .cap() instead of that complicated data-order-top thing you have, which looks complicated / delicate. Capping is built-in functionality for pie charts and row charts.

Crossfilter and DC.js: reduce to unique number

In the example below, I am trying to sum by unique occurence of Respond_Id. eg. in this case, it should be in total 3, "Respond_Id" being 258,261 and 345.
This is my data:
{"Respond_Id":258,"Gender":"Female","Age":"18-21","Answer":424},
{"Respond_Id":258,"Gender":"Female","Age":"18-21","Answer":428},
{"Respond_Id":261,"Gender":"Male","Age":"22-26", "Answer":427},
{"Respond_Id":261,"Gender":"Male","Age":"22-26", "Answer":432},
{"Respond_Id":345,"Gender":"Female","Age":"27-30","Answer":424},
{"Respond_Id":345,"Gender":"Female","Age":"27-30","Answer":425},
{"Respond_Id":345,"Gender":"Female","Age":"27-30","Answer":433},
I know I should use group reduce for this, so I tried (adapted from an example):
var ntotal = answerDim.group().reduce(
function(p, d) {
if(d.Respond_Id in p.Respond_Ids){
p.Respond_Ids[d.Respond_Id]++;
}
else {
p.Respond_Ids[d.Respond_Id] = 1;
p.RespondCount++;
}
return p;
},
function (p, d) {
p.Respond_Ids[d.Respond_Id]--;
if(p.Respond_Ids[d.Respond_Id] === 0){
delete p.Respond_Ids[d.Respond_Id];
p.RespondCount--;
}
return p;
},
function () {
return {
RespondCount: 0,
Respond_Ids: {}
};
}
);
Then:
numberDisplay
.group(ntotal)
.valueAccessor(function(d){ return d.value.RespondCount; });
dc.renderAll();
But seems not working. Does someone know how to make it work ? Thank you
Based on your JSFiddle, your setup is like this:
var RespondDim = ndx.dimension(function (d) { return d.Respond_Id;});
var ntotal = RespondDim.group().reduce(
function(p, d) {
if(d.Respond_Id in p.Respond_Ids){
p.Respond_Ids[d.Respond_Id]++;
}
else {
p.Respond_Ids[d.Respond_Id] = 1;
p.RespondCount++;
}
return p;
},
function (p, d) {
p.Respond_Ids[d.Respond_Id]--;
if(p.Respond_Ids[d.Respond_Id] === 0){
delete p.Respond_Ids[d.Respond_Id];
p.RespondCount--;
}
return p;
},
function () {
return {
RespondCount: 0,
Respond_Ids: {}
};
});
What is important to note here is that your group keys, by default, are the same as your dimension keys. So you will have one group per respondent ID. This isn't what you want.
You could switch to using dimension.groupAll, which is designed for this use case, but unfortunately the dimension.groupAll.reduce signature is slightly different. The easiest fix for you is going to be to just define your dimension to have a single value:
var RespondDim = ndx.dimension(function (d) { return true;});
Now you'll see that ntotal.all() will look like this:
{key: true, value: {RespondCount: 3, Respond_Ids: {258: 2, 261: 2, 345: 3}}}
Working fiddle: https://jsfiddle.net/v0rdoyrt/2/

dc.numberDisplay showing count for single group, not count of number of groups

I have data that looks like this:
var records = [
{id: '1', cat: 'A'},
{id: '2', cat: 'A'},
{id: '3', cat: 'B'},
{id: '4', cat: 'B'},
{id: '5', cat: 'B'},
{id: '6', cat: 'C'}
];
I want to create a dc.numberDisplay that displays the count of the number of unique categories, 3 in the example data above (A, B, & C).
This is what I'm currently doing:
var ndx = crossfilter(data); // init crossfilter
// create dimension based on category
var categoryDimension = ndx.dimension(
function (d) {
return d.category;
}
);
// Group by category
var categoryGroup = categoryDimension.group();
var categoryCount = dc.numberDisplay('#category-count'); // An empty span
categoryCount
.group(categoryGroup)
.valueAccessor(
function (d) { return d.value; }
);
The problem is that the numberDisplay displays 2 instead of 3. When debugging, I found that when the valueAccessor is called, d is the count of the number of elements of category A instead of the count of the number of categories.
How can I solve this problem?
UPDATE: Thanks to Nathan's solution, here is a working code snippet (ES2016 style)
const categoryDimension = claims.dimension(
(d) => {
return d.cat;
}
);
const categoryGroup = categoryDimension.groupAll().reduce(
(p, v) => { // add element
const cat = v.cat;
const count = p.categories.get(cat) || 0;
p.categories.set(cat, count + 1);
return p;
},
(p, v) => { // remove element
const cat = v.cat;
const count = p.categories.get(cat);
if (count === 1) {
p.categories.delete(cat);
} else {
p.categories.set(cat, count - 1);
}
return p;
},
() => { // init
return {
categories: new Map()
};
});
categoryCount
.group(categoryGroup)
.valueAccessor(
(d) => {
return d.categories.size;
}
);
You will need to use groupAll() since the number-display only looks at the top group. Then provide custom reduce functions to track unique categories. Finally, when DC.js pulls the value from the top group (there is only one) - just return the number of categories (which is the number of keys in the p object).
var categoryGroup = categoryDimension.groupAll().reduce(
function (p, v) { //add
if(p[v.cat]) {
p[v.cat]++;
} else {
p[v.cat] = 1;
}
return p;
},
function (p, v) { //remove
p[v.cat]--;
if(p[v.cat] === 0) {
delete p[v.cat];
}
return p;
},
function () { //init
//initial p - only one since using groupAll
return {};
}
);
console.debug("groups", categoryGroup.value());
dc.numberDisplay('#category-count')
.group(categoryGroup)
.valueAccessor(
function (d) { return Object.keys(d).length; }
);

dc.js - min value from group per day

I have the following data structure and would like to display a lineChart with the minimum value of 'amount' (group) by Day (of date) (dimension).
var data = [
{date: "2014-01-01", amount: 10},
{date: "2014-01-01", amount: 1},
{date: "2014-01-15", amount: 0},
{date: "2014-01-15", amount: 10 },
{date: "2014-02-20", amount: 100 },
{date: "2014-02-20", amount: 10 },
];
Where as I would normally be doing something along the following lines, I'm not sure how to find the min in the group.
var dateDim = facts.dimension(function (d) {return d3.time.day(d.date);});
var dateDimGroup = dateDim.group().reduceSum(function (d) { return d.amount; })
Is this possible? I can't seem to find any examples of this so any help would be really appreciated.
Thanks!
You're going to need to keep a custom grouping. The basic idea is that you maintain an array (ordered is best) of all the values in a group. In your add function, you the current value to the array and assign the first value to a 'min' property of the group. In your remove function you remove values, you remove the current value from the array and then assign the first value to the 'min' property (and check if the list is empty, in which case set it to undefined or something along those lines).
You functions will look something like this:
function add(accessor) {
var i;
var bisect = crossfilter.bisect.by(function(d) { return d; }).left;
return function (p, v) {
// Not sure if this is more efficient than sorting.
i = bisect(p.valueList, accessor(v), 0, p.valueList.length);
p.valueList.splice(i, 0, accessor(v));
p.min = p.valueList[0];
return p;
};
};
function remove(accessor) {
var i;
var bisect = crossfilter.bisect.by(function(d) { return d; }).left;
return function (p, v) {
i = bisect(p.valueList, accessor(v), 0, p.valueList.length);
// Value already exists or something has gone terribly wrong.
p.valueList.splice(i, 1);
p.min = p.valueList[0];
return p;
};
};
function initial() {
return function () {
return {
valueList: [],
min: undefined
};
};
}
'accessor' is a function to get at the value you want the minimum of, like in reduceSum. Call each of these functions to return the function you use in the corresponding place in the .group(add, remove, initial) function.
This code is pretty much ripped straight out of reductio, so you could also use reductio and do:
var dateDim = facts.dimension(function (d) {return d3.time.day(d.date);});
var dateDimGroup = reductio().min(function (d) { return d.amount; })(dateDim.group());
Your dateDimGroup will then have a 'min' property that should give you the minimum amount for every date.

How to create a tooltip with custom value

i use NVD3 for a scatter chart but when hovering for the tooltip i want the label instead of the key.
this is my json:
long_data = [
{
key: 'PC1',
color: '#00cc00',
values: [
{
"label" : "Lichtpuntje" ,
"x" : 11.16,
"y" : -0.99,
"size":1000,
"color": '#FFCCOO'
} ,
{ ....
this is the js
nv.addGraph(function() {
chart = nv.models.scatterChart()
.showDistX(true)
.showDistY(true)
.useVoronoi(true)
.color(d3.scale.category10().range())
.transitionDuration(300)
...
chart.xAxis.tickFormat(d3.format('.02f'));
chart.yAxis.tickFormat(d3.format('.02f'));
chart.tooltipContent(function(i) { return labelArray[i]; });
d3.select('#test1 svg')
.datum(long_data)
.call(chart);
...
how i can i get the tooltip to have the label value? or how can i have i as index parameter?
ok, not a clean solution, but works:
chart.tooltipContent(function(key, y, e, graph) {
labelIndex=arrayContains(resultYVal, e);
return resultLabel[labelIndex];
});
function arrayContains(a, obj) {
var i = a.length;
while (i--) {
if (a[i] == obj) {
return i;
}
}
return false;
}
You can access your label variable like this:
chart.tooltipContent(function(graph) {
console.log(graph); //examine the graph object in the console for more info
return graph.point.label;
});
Newer NVD3 versions use tooltipGenerator. Also I don't understand why shev72 is iterator over the whole series, we're getting the index in the series by NVD3 directly, e.g. if our data looks like this:
data = [{"key": "Group 0", "values": [{"y": 1.65166973680992, "x": 0.693722035658137, "z": "SRR1974855"}, {"y": 1.39376073765462, "x": -0.54475764264808, "z": "SRR1974854"}]
then you can get your z values like this:
chart.tooltip.contentGenerator(function (d) {
var ptIdx = d.pointIndex;
var serIdx = d.seriesIndex;
z = d.series[serIdx].values[ptIdx].z
return z;
});
For anyone here having issues trying to show a custom tooltip with InteractiveGuideline enabled, you must use the following:
chart.interactiveLayer.tooltip.contentGenerator(function (obj) { ... });
As a bonus, here's the code for the default table layout with an added custom value 'z':
chart.interactiveLayer.tooltip.contentGenerator(function (obj) {
var date = obj.value; // Convert timestamp to String
var thead = '<thead><tr><td colspan="4"><strong class="x-value">'+ date +'</strong></td></tr></thead>';
var table = '<table>' + thead + '<tbody>';
// Iterate through all chart series data points
obj.series.forEach(dataPoint => {
// Get all relevant data
var color = dataPoint.color;
var key = dataPoint.key;
var value = dataPoint.value;
var string = dataPoint.data.z; // Custom value in data
var row = '<tr><td class="legend-color-guide"><div style="background-color: '+ color +';"></div></td><td class="key">'+ key +'</td><td class="string">'+ string +'</td><td class="value">'+ value +'</td></tr>';
table += row;
});
// Close table & body elements
table += '</tbody></table>';
return table;
});
I found the answer from davedriesmans quite useful, however note that in the code
chart.tooltipContent(function(key, y, e, graph)) is not for a scatterPlot.
This looks like the function for a pie chart. In that case you can use the e.pointIndex directly to allow you to index into the series by graph.series.values[e.pointIndex].
However, I built on the function davedriesmans suggested for a scatterplot as follows.
function getGraphtPt(graph, x1, y1) {
var a = graph.series.values;
var i = a.length;
while (i--) {
if (a[i].x==x1 & a[i].y==y1) {
return a[i];
}
}
return null;
}
the main chart call to insert the tooltip is just
chart.tooltipContent(function (key, x, y, graph) {
s = "unknown";
pt = getGraphtPt(graph, x, y);
if (pt != null) {
//below key1 is a custom string I added to each point, this could be 'x='+pt.x, or any custom string
// I found adding a custom string field to each pt to be quite handy for tooltips
s = pt.key1;
}
return '<h3>' + key + s + '</h3>';
});

Resources