Create groups based on countries in dc.js - dc.js

I have a csv file and one of the field is country which have list of countries.
How to do a pie chart named as continents (not in my csv field) and i need to group the countries to continents in the code?

You will need a mapping from countries to continents, something like:
var continent = {
USA: 'North America',
France: 'Europe',
Belgium: 'Europe',
India: 'Asia',
China: 'Asia',
Kenya: 'Africa'
// ...
};
Then you can define your dimension to key on continents instead of countries by using the map:
var cf = crossfilter(data);
var continentDimension = cf.dimension(d => continents[d.country]),
continentGroup = continentDimension.group();
And the rest is as usual; take a look at the pie chart example for some hints on configuring the chart.

Related

Is there a way to filter a dimension based on the value of another field?

I'm building a data dashboard for a project and I want to be able to compare data from two distinct groups in the same dataset.
My dataset looks like this:
Number,Name,Gender,Race,Height,Publisher,Alignment,Weight,Superpower,Strength,Costume Colour
1,A-Bomb,Male,Metahuman,203,Marvel Comics,Good,441,Superhuman Strength,10,None
2,Abin Sur,Male,Alien,185,DC Comics,Good,90,Cosmic Power,40,Green
3,Abomination,Male,Metahuman,203,Marvel Comics,Bad,441,Superhuman Strength,10,None
4,Abraxas,Male,Cosmic Entity,1000,Marvel Comics,Bad,1000,Reality Warping,40,Green
5,Absorbing Man,Male,Metahuman,193,Marvel Comics,Bad,122,Matter Duplication,5,None
6,Adam Strange,Male,Human,185,DC Comics,Good,88,None,0,Red
I want to create two separate selectMenus which list the character names, but with each of the two menus filtered on Publisher name.
So one drop down will have all the characters associated with Marvel Comics, and the other will have all the characters associated with DC Comics.
Once this is set up, the idea is that the dashboard can then show a set of graphs which work as a comparison between the two characters that have been selected - so I don't want the entire dataset to be split out, I just want the selection to be filtered by Publisher.
I've been through dozens of different Stack Overflow threads about similar stuff but still struggling. I've created the dimensions for character name and character publisher but I'm getting really lost trying to use one to filter the other.
This is what I've got so far (ignore the costume color stuff, that's for something further down the line) - the data in 'heroes-information.csv' is in the same format I shared above.
// Bring in data from csv files
Promise.all([d3.csv("../data/heroes-information.csv"), d3.csv("../data/costume-colors.csv")])
.then(function(data) {
// Tidy data before use
data.forEach(function(d) {
d.Height = +d.Height;
d.Weight = +d.Weight;
d.Strength = +d.Strength;
});
// Bring in Heroes data
var ndx = crossfilter(data[0]);
// Bring in costume color data
var ndxcol = crossfilter(data[1]);
// Create colorScale to dynamically color pie chat slices
var colorScale = d3.scaleOrdinal()
.domain(data[1].map(row => row.Name))
.range(data[1].map(row => row.RGB));
// Define chart type
var dccomicsSelector = dc.selectMenu('#dccomics-selector');
// Define chart dimension
var character = ndx.dimension(dc.pluck('Name'));
var characterPublisher = ndx.dimension(dc.pluck('Publisher'));
var dccomicsCharacters = character.group();
var dccomicsPublisher = characterPublisher.group();
dccomicsSelector
.dimension(character)
.group(dccomicsCharacters);
dc.renderAll();
});
I'm probably missing something really obvious but I'm fairly new to DC.js and Crossfilter so a bit lost in the weeds with this one, any help would be much appreciated!

How to use Reductio's Exception Aggregation function on multiple columns of data?

I'm working on a data visualisation project, and I'm using the dc.js library. After converting the data into a flat data structure (for cross filter), the data looks like this:
[{"date":"2015-01-01","region":1,"cancer":10,"diabetes":5,"aqi_index":66,"pm2_5":20,"pm10":35},{"date":"2015-01-01","region":2,"cancer":30,"diabetes":25,"aqi_index":66,"pm2_5":20,"pm10":35}]
I have a line chart to display the air pollution data and a stacked area chart to display the medical records.
This is where I face a problem. As you can see, my table contains two rows for each date, one for each region. The medical data differs according to region; However, the air pollution data is the same across both regions for the same date. As I use the following code to obtain the plot of the air pollutant reading against time:
var ndx = crossfilter(data);
var dateDim = ndx.dimension(function(d) {return d["date"];});
var aqi = dateDim.group().reduceSum(function(d) {return d["aqi_index"];});
var pm2_5 = dateDim.group().reduceSum(function(d) {return d["pm2_5"];});
var pm10 = dateDim.group().reduceSum(function(d) {return d["pm10"];});
My chart for the air pollution data becomes inaccurate as I display twice the amount of pollutants for each date. How can I display the unique value of each pollutant reading (AQI Index, PM 2.5, PM 10) for each date using Reductio's Exception Aggregation function? Will doing so affect my chart for medical data?
I think this should work:
var ndx = crossfilter(data);
var dateDim = ndx.dimension(function(d) {return d["date"];});
var dateGroup = dateDim.group()
var reducer = reductio()
// Value allows multiple aggregations on the same group.
// Here aggregate all values on the "cancer" property for a date.
reducer.value("cancer").sum("cancer")
// Here aggregate only the first value of the "aqi_index" property for a date to
// avoid double-counting
reducer.value("aqi_index").exception("date").exceptionSum("aqi_index")
reducer(dateGroup)
Add as many value aggregations as you want to aggregate all of your measures on the group. Let me know if you get an error.
Example JSFiddle (see the console for results): https://jsfiddle.net/esjewett/5onebhsd/1/

Set binsize for a barchart in dc.js

I have a dataset of patients where each patient has an age.
I have a barchart which displays the patient age which is generated with the following code:
/*Age Bar Chart*/
var ageDim = cf1.dimension(dc.pluck('leeftijd'));//define age dimension
var ageGroup = ageDim.group().reduceCount();//define age group
var ageChart = dc.barChart('#ageBarChart'); //link chart to DOM
ageChart
.dimension(ageDim)
.colors('#542788')
.group(ageGroup)
.x(d3.scale.linear().domain([15,75]))
.gap(1)//gap between bars
.xAxisLabel("Leeftijd in jaren")
.yAxisLabel("Aantal Patiƫnten");
ageChart.render();
My question is as follows: How can i change the binsize, so that instead of showing how many people are within agegroup 43 it display age per groups of 5. So it will show the amount of people that are within age 40-45.
This is what the group's value function is for (sort of the "map" part of map-reduce).
You can specify that each age should fall into the low end of the range like this:
var ageGroup = ageDim.group(function(age) {
return Math.floor(age/5)*5;
}).reduceCount();//define age group
Now you'll get bins at 0, 5, 10, etc.

dc.js stacked line chart with more than 1 dimension

My dataset is an array of json of the like :
var data = [ { company: "A", date_round_1: "21/05/2002", round_1: 5, date_round_2: "21/05/2004", round_2: 20 },
...
{ company: "Z", date_round_1: "16/01/2004", round_1: 10, date_round_2: "20/12/2006", round_2: 45 }]
and I wish to display both 'round_1' and 'round_2' time series as stacked line charts.
The base line would look like this :
var fundsChart = dc.lineChart("#fundsChart");
var ndx = crossfilter(data);
var all = ndx.groupAll();
var date_1 = ndx.dimension(function(d){
return d3.time.year(d.date_round_1);
})
fundsChart
.renderArea(true)
.renderHorizontalGridLines(true)
.width(400)
.height(360)
.dimension(date_1)
.group(date_1.group().reduceSum(function(d) { return +d.round_1 }))
.x(d3.time.scale().domain([new Date(2000, 0, 1), new Date(2015, 0, 1)]))
I have tried using the stack method to add the series but the problem resides in the fact that only a single dimension can be passed as argument of the lineChart.
Can you think of a turnaround to display both series while still using a dc chart?
Are you going to be filtering on this chart? If not, just create a different group on a date_2 dimension and use that in the stack. Should work.
If you are going to be filtering, I think you'll have to change your data model a bit. You'll want to switch to have 1 record per round, so in this case you'll have 2 records for every 1 record you have now. There should be 1 date property (the date for that round), an amount property (the contents of round_x in the current structure), and a 'round' property (which would be '1', or '2', for example).
Then you need to create a date dimension and multiple groups on that dimension. The group will have a reduceSum function that looks something like:
var round1Group = dateDim.group().reduceSum(function(d) {
return d.round === '1' ? d.amount : 0;
});
So, what happens here is that we have a group that will only aggregate values from round 1. You'll create similar groups for round 2, etc. Then stack these groups in the dc.js chart.
Hopefully that helps!

Apply Filter from one Crossfilter dataset to another Crossfilter

I have two datasets that have similar columns/dimensions but are grouped differently by row and contain different measures.
Ex:
Dataset 1
Year Category SubCategory Value01 Value02
2000 Cars Sport 10 11
2000 Cars Family 15 16
2000 Boats Sport 20 21
2000 Boats Family 25 26
...
Dataset 2
Year Category ValueA ValueB
2000 Cars 100 101
2000 Boats 200 201
...
Dataset 1 has its own crossfilter object, Dataset 2 has a separate crossfilter object. I have multiple dc.js charts, some tied to the dataset 1, some to dataset 2.
When a dc.js chart filters dataset 1 on a column/dimension that also exists in dataset 2, I want to apply that same filter to dataset 2. How can this be achieved?
I don't think there is any automatic way to do this in crossfilter or dc.js. But if you're willing to roll your own dimension wrapper, you could supply that instead of the original dimension objects and have that forward to all the underlying dimensions.
EDIT: based on #Aravind's fiddle below, here is a "dimension mirror" that works, at least for this simple example:
function mirror_dimension() {
var dims = Array.prototype.slice.call(arguments, 0);
function mirror(fname) {
return function(v) {
dims.forEach(function(dim) {
dim[fname](v);
});
};
}
return {
filter: mirror('filter'),
filterExact: mirror('filterExact'),
filterRange: mirror('filterRange'),
filterFunction: mirror('filterFunction')
};
}
It's a bit messy using this. For each dimension you want to mirror from crossfilter A to crossfilter B, you'll need to create a mirror dimension on crossfilter B, and vice versa:
// Creating the dimensions
subject_DA = CFA.dimension(function(d){ return d.Subject; });
name_DA = CFA.dimension(function(d){ return d.Name; });
// mirror dimensions to receive events from crossfilter B
mirror_subject_DA = CFA.dimension(function(d){ return d.Subject; });
mirror_name_DA = CFA.dimension(function(d){ return d.Name; });
subject_DB = CFB.dimension(function(d){ return d.Subject; });
name_DB = CFB.dimension(function(d){ return d.Name; });
// mirror dimensions to receive events from crossfilter A
mirror_subject_DB = CFB.dimension(function(d){ return d.Subject; });
mirror_name_DB = CFB.dimension(function(d){ return d.Name; });
Now you tie them together when passing them off to the charts:
// subject Chart
subjectAChart
.dimension(mirror_dimension(subject_DA, mirror_subject_DB))
// ...
// subject Chart
subjectBChart
.dimension(mirror_dimension(subject_DB, mirror_subject_DA))
// ...
nameAChart
.dimension(mirror_dimension(name_DA, mirror_name_DB))
// ...
nameBChart
.dimension(mirror_dimension(name_DB, mirror_name_DA))
// ...
Since all the charts are implicitly on the same chart group, the redraw events will automatically get propagated between them when they are filtered. And each filter action on one crossfilter will get applied to the mirror dimension on the other crossfilter.
Maybe not something I'd recommend doing, but as usual, it can be made to work.
Here's the fiddle: https://jsfiddle.net/gordonwoodhull/7dwn4y87/8/
#Gordon's suggestion is a good one.
I usually approach this differently, by combining the 2 tables into a single table (add ValueA and ValueB to each row of Data Set 1) and then using custom groupings to only aggregate ValueA and Value B once for each unique Year/Category combination. Each group would need to keep a map of keys it has seen before and the count for each of those keys, only aggregating the value of ValueA or ValueB if it is a new combination of keys. This does result in complicated grouping logic, but allows you to avoid needing to coordinate between 2 Crossfilter objects.
Personally, I just find complex custom groupings easier to test and maintian than coordination logic, but that's not the case for everyone.

Resources