dc.js - min value from group per day - dc.js

I have the following data structure and would like to display a lineChart with the minimum value of 'amount' (group) by Day (of date) (dimension).
var data = [
{date: "2014-01-01", amount: 10},
{date: "2014-01-01", amount: 1},
{date: "2014-01-15", amount: 0},
{date: "2014-01-15", amount: 10 },
{date: "2014-02-20", amount: 100 },
{date: "2014-02-20", amount: 10 },
];
Where as I would normally be doing something along the following lines, I'm not sure how to find the min in the group.
var dateDim = facts.dimension(function (d) {return d3.time.day(d.date);});
var dateDimGroup = dateDim.group().reduceSum(function (d) { return d.amount; })
Is this possible? I can't seem to find any examples of this so any help would be really appreciated.
Thanks!

You're going to need to keep a custom grouping. The basic idea is that you maintain an array (ordered is best) of all the values in a group. In your add function, you the current value to the array and assign the first value to a 'min' property of the group. In your remove function you remove values, you remove the current value from the array and then assign the first value to the 'min' property (and check if the list is empty, in which case set it to undefined or something along those lines).
You functions will look something like this:
function add(accessor) {
var i;
var bisect = crossfilter.bisect.by(function(d) { return d; }).left;
return function (p, v) {
// Not sure if this is more efficient than sorting.
i = bisect(p.valueList, accessor(v), 0, p.valueList.length);
p.valueList.splice(i, 0, accessor(v));
p.min = p.valueList[0];
return p;
};
};
function remove(accessor) {
var i;
var bisect = crossfilter.bisect.by(function(d) { return d; }).left;
return function (p, v) {
i = bisect(p.valueList, accessor(v), 0, p.valueList.length);
// Value already exists or something has gone terribly wrong.
p.valueList.splice(i, 1);
p.min = p.valueList[0];
return p;
};
};
function initial() {
return function () {
return {
valueList: [],
min: undefined
};
};
}
'accessor' is a function to get at the value you want the minimum of, like in reduceSum. Call each of these functions to return the function you use in the corresponding place in the .group(add, remove, initial) function.
This code is pretty much ripped straight out of reductio, so you could also use reductio and do:
var dateDim = facts.dimension(function (d) {return d3.time.day(d.date);});
var dateDimGroup = reductio().min(function (d) { return d.amount; })(dateDim.group());
Your dateDimGroup will then have a 'min' property that should give you the minimum amount for every date.

Related

Reduce number of datapoints using crossfilter

Let's say I have a 100 years worth of monthly data, total of 1200 data points, see bottom.
To plot a tiny overview line chart (e.g. just 100 data points) I have to do it manually by grouping. For instance, group the data by year, then get the average of 12 months value, iterate through every group, then finally reduced the data points to 100.
Instead of this approach, is there a convenient way using crossfilter or any other library?
[
{ date: 1900-01, value: 72000000000},
{ date: 1900-02, value: 58000000000},
{ date: 1900-03, value: 2900000000},
{ date: 1900-04, value: 31000000000},
{ date: 1900-05, value: 33000000000},
...
{ date: 1999-11, value: 30000000000},
{ date: 1999-12, value: 10000000000},
]
It's going to be the same algorithm no matter which library you use, just different ways of specifying it. In this case d3.nest is probably the easiest way to do this, but if you want quick filtering, the crossfilter way isn't too bad.
The difference between using d3.nest and crossfilter is that we're not constructing an array of values, just a single value. So we'll maintain both sum and count.
We'll also need to specify what happens when a row is removed from a bin.
var parse = d3.timeParse("%Y-%m");
data.forEach(function(d) {
// it's best to convert fields before passing to crossfilter
// because crossfilter will look at them many times
d.date = parse(d.key);
});
var cf = crossfilter(data);
var yearDim = cf.dimension(d => d3.timeYear(d.date));
var yearAvgGroup = yearDim.group().reduce(
function(p, v) { // add
p.sum += v.value;
++p.count;
p.avg = p.sum/p.count;
return p;
},
function(p, v) { // remove
p.sum -= v.value;
--p.count;
p.avg = p.count ? p.sum/p.count : 0;
return p;
},
function() { // init
return {sum: 0, count: 0, avg: 0};
}
);
Now yearAvgGroup.all() will return an array of key/value pairs, where the key is the year, and the value contains sum, count, and avg.
Crossfilter doesn't make this problem particularly convenient to solve, but reductio has a helper function for this:
var yearAvgGroup = yearDim.group();
reductio().avg(d => d.value);
Note: it doesn't matter unless you have ton of data, but it's more efficient to only compute sum and count in the group, and compute the average when it's needed.
If you're using dc.js, you can use valueAccessor for this:
// remove avg lines from the above, and
chart.dimension(yearDim)
.group(yearAvgGroup)
.valueAccessor(kv => kv.value.sum / kv.value.count);
Assuming your question is only concerned with producing the data, you can use d3-nest, without crossfilter, to average each year:
Parsing the date value, you can then format the date as a year to create a key. This groups values by key, then we rollup those values with a function to calculate the mean for a given year:
var parse = d3.timeParse("%Y-%m"); // takes: "1900-01"
var format = d3.timeFormat("%Y"); // gives: "1900"
var means = d3.nest()
.key(function(d) { return format(parse(d.date)); })
.rollup(function(values) { return d3.mean(values, function(d) {return d.value; }) })
.entries(data);
Which gives us the following structure:
[
{
"key": "1900",
"value": 39380000000
},
{
"key": "1999",
"value": 20000000000
}
]
var data = [
{ date: "1900-01", value: 72000000000},
{ date: "1900-02", value: 58000000000},
{ date: "1900-03", value: 2900000000},
{ date: "1900-04", value: 31000000000},
{ date: "1900-05", value: 33000000000},
{ date: "1999-11", value: 30000000000},
{ date: "1999-12", value: 10000000000},
];
var parse = d3.timeParse("%Y-%m");
var format = d3.timeFormat("%Y");
var means = d3.nest()
.key(function(d) { return format(parse(d.date)); })
.rollup(function(values) { return d3.mean(values, function(d) {return d.value; }) })
.entries(data);
console.log(means);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/5.7.0/d3.min.js"></script>

Display multiple bar on barChart from a custom reducer

I have a group with custom reducer calculating various total and average values. The goal is to show them all on the same barChart. But I can only get the first bar to show. Here is the JSFiddler
https://jsfiddle.net/71k0guxe/15/
Is it possible to show all the value on the barChart?
Thanks in advance!
Data
ID,SurveySent,ResponseReceived
1,Yes,No
2,No,No
3,Yes,Yes
4,No,No
5,Yes,Yes
6,No,No
7,Yes,No
8,No,No
9,Yes,No
10,No,No
Code
var chart = dc.barChart("#test");
//d3.csv("morley.csv", function(error, experiments) {
var experiments = d3.csvParse(d3.select('pre#data').text());
var ndx = crossfilter(experiments),
dimStat = ndx.dimension(function(d) {return "Statistics";}),
groupStat = dimStat.group().reduce(reduceAdd, reduceRemove, reduceInitial);
function reduceAdd(p, v) {
++p.count;
if (v.SurveySent === "Yes") p.sent++;
if (v.ResponseReceived === "Yes") p.received++;
return p;
}
function reduceRemove(p, v) {
--p.count;
if (v.SurveySent === "Yes") p.sent--;
if (v.ResponseReceived === "Yes") p.received--;
return p;
}
function reduceInitial() {
return {count: 0, sent: 0, received: 0};
}
chart
.width(400)
.height(400)
.xUnits(dc.units.ordinal)
.label(function(d) { return d.data.value })
.elasticY(true)
.x(d3.scaleOrdinal().domain(["Total", "Sent", "Received"]))
.brushOn(false)
.yAxisLabel("This is the Y Axis!")
.dimension(dimStat)
.group(groupStat)
.valueAccessor(function (d) {
//Is it possible to return count sent and received all from here?
return d.value.count;
})
.on('renderlet', function(chart) {
chart.selectAll('rect').on("click", function(d) {
console.log("click!", d);
});
});
chart.render();
Just got some idea from the FAQ section of dc.js/wiki/FAQ
Fake Groups
"dc.js uses a very limited part of the crossfilter API - in fact, it really only uses dimension.filter() and group.all()."
I don't care about filtering, so i just need to mark up my own group.all. Basically transpose it from one row to multiple row. Works my purpose.
/* solution */
var groupStatTranposed = group_transpose(groupStat);
function group_transpose(source_group, f) {
return {
all:function () {
return [
{key: "Total", value: source_group.all()[0].value.count},
{key: "Sent", value: source_group.all()[0].value.sent},
{key: "Received", value: source_group.all()[0].value.received}
];
}
};
}
//use groupStatTranposed in the chart.
/** solution */

Crossfilter isn't applying filter to multi line-chart. What am I missing?

I am new to using crossfilter, dc.js, and d3.js. I am struggling to get the filters to apply to my composite line chart. I've gone through several tutorials, but apparently am missing something as the charts don't change or look different at all if I remove the dimension with the filter applied.
Here is an example of my data:
var data = array(
{
price:{value: 38}
shipment:{start_date: "2017-12-06", end_date: "2018-01-15"}
side:"sell"
},
{
price:{value: 44}
shipment:{start_date: "2017-10-08", end_date: "2018-01-15"}
side:"sell"
},
{
price:{value: 38}
shipment:{start_date: "2017-11-15", end_date: "2018-01-15"}
side:"buy"
},
{
price:{value: 38}
shipment:{start_date: "2017-10-25", end_date: "2018-01-15"}
side:"buy"
}
);
And here is where I declare my dimensions:
` var crossFilteredData = crossfilter(data);
// Dimension by start_date
var dateDimension = crossFilteredData.dimension(function(d) {
var date = Date.parse(d.shipment.start_date);
return date;
});
// Dimension by side
var sideDimension = crossFilteredData.dimension(function(d) {
console.log(d.side);
return d.side;
});
sideDimension.filter("buy");
sideDimension.top(Infinity);`
After declaring my dimensions and applying a filter to the sideDimension, I am building my group and calculating a date's max price and min price for each day:
var performanceByDateGroup = dateDimension.group().reduce(
function (p, v) {
++p.count;
p.sum += v.price.value;
// Calculate Min
if (p.minPrice > v.price.value) {
p.minPrice = v.price.value;
}
// Calculate Max
if (p.maxPrice < v.price.value) {
p.maxPrice = v.price.value;
}
return p;
},
function (p, v) {
--p.count;
p.sum -= v.price.value;
return p;
},
function () {
return {
count: 0,
sum: 0,
minPrice: 1000,
maxPrice: 0
};
}
);
Lastly, I put the dimension and groups into the composite line chart:
priceChart
.width(960)
.height(400)
.margins({top: 10, right: 10, bottom: 40, left: 10})
.transitionDuration(500)
.elasticY(true)
.renderHorizontalGridLines(true)
.yAxisLabel('Price')
.shareTitle(false)
.x(d3.time.scale().domain([Date.parse("2017-11-01"), Date.parse("2018-03-31")]))
.xAxisLabel('Shipment Start Date')
.legend(dc.legend().x(40).y(0).itemHeight(16).gap(4))
.compose([
dc.lineChart(priceChart)
.dimension(dateDimension)
.group(performanceByDateGroup, 'Min Price')
.colors('red')
.renderTitle(true)
.title(function(d) {
return 'Min: $' + d.value.minPrice.toFixed(2);
})
.valueAccessor(function (d) {
return d.value.minPrice;
}),
dc.lineChart(priceChart)
.dimension(dateDimension)
.group(performanceByDateGroup, 'Max Price')
.colors('green')
.renderTitle(true)
.title(function(d) {
return 'Max: $' + d.value.maxPrice.toFixed(2);
})
.valueAccessor(function (d) {
return d.value.maxPrice;
})
])
.brushOn(false);
dc.renderAll();
The chart shows all the plotted points, as if the entire sideDimension variable is not being recognized at all. If I remove the sideDimension variable and filter, the chart looks the exact same.
I greatly appreciate any help or suggestions you can offer.
It's difficult, but not impossible to calculate min and max values using a crossfilter reduction.
When crossfilter is evaluating a group, it will first add all the records and then remove the records that don't match the filters. This is so that the result is consistent whether or not the filters existed when the dimension was created. (For example, you want zeros for values that exist but are filtered out.)
In this case, you are not doing anything with minPrice and maxPrice inside of your reduceRemove function:
function (p, v) {
--p.count;
p.sum -= v.price.value;
return p;
},
So as we observe, the records are added but never removed.
However, the situation is worse than this, because min and max are more complicated aggregations than sums and averages. Think about it: you can remember the min and max, but when those are removed, what value do you fall back on?
reductio has handy functions for doing min and max, or if you want to do it yourself, this example shows how.

Crossfilter and DC.js: reduce to unique number

In the example below, I am trying to sum by unique occurence of Respond_Id. eg. in this case, it should be in total 3, "Respond_Id" being 258,261 and 345.
This is my data:
{"Respond_Id":258,"Gender":"Female","Age":"18-21","Answer":424},
{"Respond_Id":258,"Gender":"Female","Age":"18-21","Answer":428},
{"Respond_Id":261,"Gender":"Male","Age":"22-26", "Answer":427},
{"Respond_Id":261,"Gender":"Male","Age":"22-26", "Answer":432},
{"Respond_Id":345,"Gender":"Female","Age":"27-30","Answer":424},
{"Respond_Id":345,"Gender":"Female","Age":"27-30","Answer":425},
{"Respond_Id":345,"Gender":"Female","Age":"27-30","Answer":433},
I know I should use group reduce for this, so I tried (adapted from an example):
var ntotal = answerDim.group().reduce(
function(p, d) {
if(d.Respond_Id in p.Respond_Ids){
p.Respond_Ids[d.Respond_Id]++;
}
else {
p.Respond_Ids[d.Respond_Id] = 1;
p.RespondCount++;
}
return p;
},
function (p, d) {
p.Respond_Ids[d.Respond_Id]--;
if(p.Respond_Ids[d.Respond_Id] === 0){
delete p.Respond_Ids[d.Respond_Id];
p.RespondCount--;
}
return p;
},
function () {
return {
RespondCount: 0,
Respond_Ids: {}
};
}
);
Then:
numberDisplay
.group(ntotal)
.valueAccessor(function(d){ return d.value.RespondCount; });
dc.renderAll();
But seems not working. Does someone know how to make it work ? Thank you
Based on your JSFiddle, your setup is like this:
var RespondDim = ndx.dimension(function (d) { return d.Respond_Id;});
var ntotal = RespondDim.group().reduce(
function(p, d) {
if(d.Respond_Id in p.Respond_Ids){
p.Respond_Ids[d.Respond_Id]++;
}
else {
p.Respond_Ids[d.Respond_Id] = 1;
p.RespondCount++;
}
return p;
},
function (p, d) {
p.Respond_Ids[d.Respond_Id]--;
if(p.Respond_Ids[d.Respond_Id] === 0){
delete p.Respond_Ids[d.Respond_Id];
p.RespondCount--;
}
return p;
},
function () {
return {
RespondCount: 0,
Respond_Ids: {}
};
});
What is important to note here is that your group keys, by default, are the same as your dimension keys. So you will have one group per respondent ID. This isn't what you want.
You could switch to using dimension.groupAll, which is designed for this use case, but unfortunately the dimension.groupAll.reduce signature is slightly different. The easiest fix for you is going to be to just define your dimension to have a single value:
var RespondDim = ndx.dimension(function (d) { return true;});
Now you'll see that ntotal.all() will look like this:
{key: true, value: {RespondCount: 3, Respond_Ids: {258: 2, 261: 2, 345: 3}}}
Working fiddle: https://jsfiddle.net/v0rdoyrt/2/

dc.numberDisplay showing count for single group, not count of number of groups

I have data that looks like this:
var records = [
{id: '1', cat: 'A'},
{id: '2', cat: 'A'},
{id: '3', cat: 'B'},
{id: '4', cat: 'B'},
{id: '5', cat: 'B'},
{id: '6', cat: 'C'}
];
I want to create a dc.numberDisplay that displays the count of the number of unique categories, 3 in the example data above (A, B, & C).
This is what I'm currently doing:
var ndx = crossfilter(data); // init crossfilter
// create dimension based on category
var categoryDimension = ndx.dimension(
function (d) {
return d.category;
}
);
// Group by category
var categoryGroup = categoryDimension.group();
var categoryCount = dc.numberDisplay('#category-count'); // An empty span
categoryCount
.group(categoryGroup)
.valueAccessor(
function (d) { return d.value; }
);
The problem is that the numberDisplay displays 2 instead of 3. When debugging, I found that when the valueAccessor is called, d is the count of the number of elements of category A instead of the count of the number of categories.
How can I solve this problem?
UPDATE: Thanks to Nathan's solution, here is a working code snippet (ES2016 style)
const categoryDimension = claims.dimension(
(d) => {
return d.cat;
}
);
const categoryGroup = categoryDimension.groupAll().reduce(
(p, v) => { // add element
const cat = v.cat;
const count = p.categories.get(cat) || 0;
p.categories.set(cat, count + 1);
return p;
},
(p, v) => { // remove element
const cat = v.cat;
const count = p.categories.get(cat);
if (count === 1) {
p.categories.delete(cat);
} else {
p.categories.set(cat, count - 1);
}
return p;
},
() => { // init
return {
categories: new Map()
};
});
categoryCount
.group(categoryGroup)
.valueAccessor(
(d) => {
return d.categories.size;
}
);
You will need to use groupAll() since the number-display only looks at the top group. Then provide custom reduce functions to track unique categories. Finally, when DC.js pulls the value from the top group (there is only one) - just return the number of categories (which is the number of keys in the p object).
var categoryGroup = categoryDimension.groupAll().reduce(
function (p, v) { //add
if(p[v.cat]) {
p[v.cat]++;
} else {
p[v.cat] = 1;
}
return p;
},
function (p, v) { //remove
p[v.cat]--;
if(p[v.cat] === 0) {
delete p[v.cat];
}
return p;
},
function () { //init
//initial p - only one since using groupAll
return {};
}
);
console.debug("groups", categoryGroup.value());
dc.numberDisplay('#category-count')
.group(categoryGroup)
.valueAccessor(
function (d) { return Object.keys(d).length; }
);

Resources