How can I filter rows by value and then count and plot pie chart and histogram? - dc.js

My data look like this:
{
"raw_data": [
{
"agebracket": "",
"currentstatus": "Hospitalized",
"dateannounced": "05/06/2020",
"gender": "",
....
},
{
"agebracket": "",
"currentstatus": "Recovered",
"dateannounced": "05/06/2020",
"gender": "",
.........
},
{
"agebracket": "",
"currentstatus": "Hospitalized",
"dateannounced": "05/06/2020",
"gender": "",
.......
},
I am able to plot dc graph for the whole dataset. But now I want to filter it by "currentstatus" --> "Recovered", "Hospitalized", "Deceased".
Right now it looks like this:
https://blockbuilder.org/ninjakx/3699d4c0efb0ac1d81636cf0e05eda2d
I am trying to integrate it with https://blockbuilder.org/ninjakx/fbbae54c3f4d8b2df8f9b981d46857b4.
When I will click on confirmed box then pie and histogram will show results related to hospitalized. In that above (dashboard) Those three graphs didn't have to cross filter, So I was able to write the logic. But I am confused about this one. How should I go about filtering it by "currentstatus" ("Hospitalized, "Recovered" etc)
I want this to take a "currentstatus" variable and return results related to it.
var group = dim.group(function(d) {
return binwidth * Math.floor(d/binwidth); });
So that I can do :
barChart
.height(300)
.width(500) //give it a width
.dimension(dim)
.group(group, currentstatus) //<------------------ Here
.......
I am thinking about creating three arrays for Hospitalized, Recovered, and Deceased Resp. But I think there might be some shorter solution than going this lengthy way.
EDIT:
I tried that too but I don't know how to count.
The result should be(calculated using pandas) :
gender currentstatus
F Deceased 31
Hospitalized 4225
Recovered 33
M Deceased 60
Hospitalized 7570
Recovered 50
But I get this:
0:
key: "M"
value: {Hospitalized: 7549, Deceased: 51, Recovered: 13}
__proto__: Object
1: {key: "NA", value: {…}}
2:
key: "F"
value: {Hospitalized: 4200, Deceased: 25, Recovered: 7}
__proto__: Object
length: 3
__proto__: Array(0)
How do I count? I know this below logic is wrong:
var group1 = pieTypeDimension.group().reduce(
function(p, v) { // add
p[v.currentstatus] = (p[v.currentstatus] || 0) + 1;
return p;
},
function(p, v) { // remove
p[v.currentstatus] -= 1;
return p;
},
function() { // initial
return {};
});
log("group1:::", group1.top(Infinity));
Tried this too:
var group1 = pieTypeDimension.group().reduce(
function(p, v) { // add
++p.count;
log("count:::", p.count);
p[v.currentstatus] = (p[v.currentstatus] || 0) + p.count;
return p;
},
function(p, v) { // remove
--p.count;
p[v.currentstatus] -= p.count;
return p;
},
function(p, v) { // initial
return {count:0};
});
and get this:
0:
key: "M"
value:
count: 7613
Hospitalized: 28769566
Deceased: 173237
Recovered: 39888
__proto__: Object
__proto__: Object
1:
key: "NA"
value: {count: 3, Hospitalized: 6}
__proto__: Object
2:
key: "F"
value: {count: 4232, Hospitalized: 8903341, Deceased: 43001, Recovered: 10686}
__proto__: Object
length: 3
__proto__: Array(0)
Edit:
I didn't take account of age bracket thing. Considering that Now it matches with the dc.js solution. So that logic is correct.
gender currentstatus
F Deceased 25
Hospitalized 4200
Recovered 7
M Deceased 51
Hospitalized 7549
Recovered 13
Name: currentstatus, dtype: int64

I will answer in a way that works with filtering, because that's the primary use case for dc.js.
I'd suggest going with the idiomatic crossfilter reduction for stacked charts, just without actually stacking anything.
From the FAQ:
var group = dimension.group().reduce(
function(p, v) { // add
p[v.type] = (p[v.type] || 0) + v.value;
return p;
},
function(p, v) { // remove
p[v.type] -= v.value;
return p;
},
function() { // initial
return {};
});
where type in your case is currentstatus.
This will give you a group where the values are objects keyed on status.
If you have every status for every X value, then each value object will have all status as keys; if not, some will be undefined.
Use valueAccessor to pull the field that you want for your chart, defaulting to 0 if undefined:
chart.valueAccessor(kv => kv.value[currentstatus] || 0)

Related

Reduce number of datapoints using crossfilter

Let's say I have a 100 years worth of monthly data, total of 1200 data points, see bottom.
To plot a tiny overview line chart (e.g. just 100 data points) I have to do it manually by grouping. For instance, group the data by year, then get the average of 12 months value, iterate through every group, then finally reduced the data points to 100.
Instead of this approach, is there a convenient way using crossfilter or any other library?
[
{ date: 1900-01, value: 72000000000},
{ date: 1900-02, value: 58000000000},
{ date: 1900-03, value: 2900000000},
{ date: 1900-04, value: 31000000000},
{ date: 1900-05, value: 33000000000},
...
{ date: 1999-11, value: 30000000000},
{ date: 1999-12, value: 10000000000},
]
It's going to be the same algorithm no matter which library you use, just different ways of specifying it. In this case d3.nest is probably the easiest way to do this, but if you want quick filtering, the crossfilter way isn't too bad.
The difference between using d3.nest and crossfilter is that we're not constructing an array of values, just a single value. So we'll maintain both sum and count.
We'll also need to specify what happens when a row is removed from a bin.
var parse = d3.timeParse("%Y-%m");
data.forEach(function(d) {
// it's best to convert fields before passing to crossfilter
// because crossfilter will look at them many times
d.date = parse(d.key);
});
var cf = crossfilter(data);
var yearDim = cf.dimension(d => d3.timeYear(d.date));
var yearAvgGroup = yearDim.group().reduce(
function(p, v) { // add
p.sum += v.value;
++p.count;
p.avg = p.sum/p.count;
return p;
},
function(p, v) { // remove
p.sum -= v.value;
--p.count;
p.avg = p.count ? p.sum/p.count : 0;
return p;
},
function() { // init
return {sum: 0, count: 0, avg: 0};
}
);
Now yearAvgGroup.all() will return an array of key/value pairs, where the key is the year, and the value contains sum, count, and avg.
Crossfilter doesn't make this problem particularly convenient to solve, but reductio has a helper function for this:
var yearAvgGroup = yearDim.group();
reductio().avg(d => d.value);
Note: it doesn't matter unless you have ton of data, but it's more efficient to only compute sum and count in the group, and compute the average when it's needed.
If you're using dc.js, you can use valueAccessor for this:
// remove avg lines from the above, and
chart.dimension(yearDim)
.group(yearAvgGroup)
.valueAccessor(kv => kv.value.sum / kv.value.count);
Assuming your question is only concerned with producing the data, you can use d3-nest, without crossfilter, to average each year:
Parsing the date value, you can then format the date as a year to create a key. This groups values by key, then we rollup those values with a function to calculate the mean for a given year:
var parse = d3.timeParse("%Y-%m"); // takes: "1900-01"
var format = d3.timeFormat("%Y"); // gives: "1900"
var means = d3.nest()
.key(function(d) { return format(parse(d.date)); })
.rollup(function(values) { return d3.mean(values, function(d) {return d.value; }) })
.entries(data);
Which gives us the following structure:
[
{
"key": "1900",
"value": 39380000000
},
{
"key": "1999",
"value": 20000000000
}
]
var data = [
{ date: "1900-01", value: 72000000000},
{ date: "1900-02", value: 58000000000},
{ date: "1900-03", value: 2900000000},
{ date: "1900-04", value: 31000000000},
{ date: "1900-05", value: 33000000000},
{ date: "1999-11", value: 30000000000},
{ date: "1999-12", value: 10000000000},
];
var parse = d3.timeParse("%Y-%m");
var format = d3.timeFormat("%Y");
var means = d3.nest()
.key(function(d) { return format(parse(d.date)); })
.rollup(function(values) { return d3.mean(values, function(d) {return d.value; }) })
.entries(data);
console.log(means);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/5.7.0/d3.min.js"></script>

Showing sum of multiple values with amCharts

I would like to show sum of multiple values as one chart output with amCharts. I am using dataLoader with JSON to get the data. I know I have to create a function for but I couldn't understand how to get the data from the dataLoader to calculate
{
"balloonText": "[[title]] of [[valueAxis]]:[[value]]",
"lineThickness": 3,
"id": "sumValue",
"title": "sum Value",
"valueField": (function() {
var sumValues = "calculation";
return sumValues
}
this attempt is probably not correct but this is how I started
{
"balloonText": "[[title]] of [[valueAxis]]:[[value]]",
"lineThickness": 3,
"id": "LoadigTime",
"title": "Loadig Time",
"valueField": (function() {
var sumValues = (HomePageLoad + LoginToParametersLoad + ParametersLoad + AlarmsLoad + SwitchSideLoad + LoginToAdminLoad + AdminLoad) / 7;
return sumValues
})
}
valueField cannot be a function, only a string reference to a field in your data.
If the chart is meant to be displaying the sum of all of those fields in your data as a chart, simply add logic to your postProcess callback to create a new dataset containing your sums, e.g.
postProcess: function(data) {
var newData = [];
data.forEach(function(dataItem) {
var item = {
YOUR_CATEGORY_FIELD: dataItem.YOUR_CATEGORY_FIELD, //replace with your category field name
sum: 0
};
//loop through your item's keys and sum everything up, filtering out
//your category property
item.sum = Object.keys(dataItem).reduce(function(sum, key) {
if (key !== "YOUR_CATEGORY_FIELD") {
sum += dataItem[key]
}
return sum;
}, 0);
newData.push(item);
});
return newData;
},
// ...
graphs: [{
valueField: "sum",
// other props here
}]

Get specific object field with condition and make opertion on it

I have objects like this:
{
buildings: {
"1": {
"l": 0 ,
"r": 0 ,
"s": 0 ,
"type": "GoldMine" ,
"x": 2 ,
"y": 15
} ,
"10": {
"l": 0 ,
"r": 6 ,
"s": 2 ,
"type": "MagicMine" ,
"x": 26 ,
"y": 22
}
} ,
[...]
}
I want to get objects with buildings of type "GoldMine".
I tried something with map:
r.table("Characters").map(function(row) {
return row("planet")("buildings")
})
With keys() I can iterate it:
r.db("Unnyworld").table("Characters").map(function(row) {
return row("planet")("buildings").keys().map(function(key) {
return "need to get only buildings with type == GoldMine";
})
}).limit(2)
But it returns all buildings. I want to get only buildings with type == GoldMine and change field x.
Something like this may work:
r.table('Characters')
.concatMap(function(doc) {
return doc("planet")("buildings").keys().map(function(k) {
return {id: doc('id'), key: k, type: doc("planet")("buildings")(k)('type'), x: doc("planet")("buildings")(k)('x')}
})
})
.filter(function(building) {
return building('type').eq('GoldMine')
})
.forEach(function(doc) {
return r.table('Characters').get(doc('id'))
.update({
planet: {buildings: r.object(doc('key'), {x: 1111111})}
})
})
Basically create a flat array from building by using concatMap then filter it. With result data, we can iterator over it and update to value that we want.

crossfilter to do word frequency

I would like a crossfilter group that gives the word frequency and average rating for each word in a series of surveys so that I can make an awesome interactive word-bubble-frequency chart.
My data looks like:
[{feedback: "This is a horrible service", rating:2},
{feedback: "I love everything about everything", rating: 10},
{feedback: "love the user interface, good service", rating:6},
{feedback: "", rating: 7} ]
I would like something like:
[ {key: love, count:2, ave: 8}, {key: horrible, count:1, rating:2 }, {key: service,
count: 2, rating: 4 } ,.... ]
So far I have:
function to break up string into tokens, returns object with frequency for each word
var wordcnt = function(bah ){
var hist = {}, words = bah.split(/[\s*\.*\,\;\+?\#\|:\-\/\\\[\]\(\)\{\}$%&0-9*]/)
for( var i in words)
if(words[i].length >1 )
hist[words[i]] ? hist[words[i]]+=1 : hist[words[i]]=1;
return hist;
};
Loading data into d3 and crossfilter
d3.csv("test.csv", function( error, data) {
data.forEach(function(d) {
d.rating= +d.rating;
d.wordCount= wordcnt(d.feedback.toLowerCase());
});
var ndx = crossfilter(data);
var all = ndx.groupAll();
var frequencyDimension=ndx.dimension(function(d){return d.wordCount; });
And one butchered group reduce function!
var frequencyGroup= frequencyDimension.group().reduce(
function (p, v) {
for (var key in v.wordCount) {
if (v.frequency.hasOwnProperty(key)) {
p.frequency[key]+= v[key];
p.frequency[key].count++;
p.frequency[key].sum+= v.rating ;
}
else{
p.frequency[key]=v[key];
p.frequency[key].count=1;
p.frequency[key].sum = v.rating;
}
}
p.frequency[key].ave = p.frequency[key].sum/p.frequency[key].count ;
return p;
},
function (p, v) {
for (var key in v.wordCount) {
if (v.frequency.hasOwnProperty(key)) {
p.frequency[key]-= v[key];
p.frequency[key].count--;
p.frequency[key].sum-= v.rating ;
}
//don't need an else statement because can't remove key if it doesn't exist
}
p.frequency[key].ave = p.frequency[key].sum/p.frequency[key].count ;
return p;
},
function (p,v) {
return { frequency: {} } ;
}
)

Crossfilter query

Is it possible to filter a crossfilter dataset which has an array as the value?
For example, say I have the following dataset:
var data = [
{
bookname: "the joy of clojure",
authors: ["Michael Fogus", "Chris Houser"],
tags: ["clojure", "lisp"]
},
{
bookname: "Eloquent Ruby",
authors: ["Russ Olsen"],
tags: ["ruby"]
},
{
bookname: "Design Patterns in Ruby",
authors: ["Russ Olsen"],
tags: ["design patterns", "ruby"]
}
];
Is there an easy way to access the books which are tagged by an particular tag? And also the books which have a particular author? The way I understand how to use crossfilter so far has me doing something like this:
var filtered_data = crossfilter(data);
var tags = filtered_data.dimension(function(d) {return d.tags});
var tag = tags.group();
And then when I access the grouping (like so):
tag.all()
I get this:
[{key: ["clojure", "lisp"], value: 1},
{key: ["design patterns", "ruby"], value: 1},
{key: ["ruby"], value: 1}]
When I would rather have this:
[{key: "ruby", value: 2},
{key: "clojure", value: 1},
{key: "lisp", value: 1},
{key: "design patterns", value: 1}]
I've added comments to the code below. Big picture: use reduce function.
var data = ...
var filtered_data = crossfilter(data);
var tags = filtered_data.dimension(function(d) {return d.tags});
tags.groupAll().reduce(reduceAdd, reduceRemove, reduceInitial).value()
Notice how I've used groupAll() instead of group() b/c we want our reduce functions (defined below) to operate on one group rather than 3 groups.
Now the reduce functions should look like this:
/*
v is the row in the dataset
p is {} for the first execution (passed from reduceInitial).
For every subsequent execution it is the value returned from reduceAdd of the prev row
*/
function reduceAdd(p, v) {
v.tags.forEach (function(val, idx) {
p[val] = (p[val] || 0) + 1; //increment counts
});
return p;
}
function reduceRemove(p, v) {
//omitted. not useful for demonstration
}
function reduceInitial() {
/* this is how our reduce function is seeded. similar to how inject or fold
works in functional languages. this map will contain the final counts
by the time we are done reducing our entire data set.*/
return {};
}
I've never used "crossfilter" (I'm assuming this is a JS library). Here are some pure JS methods though.
This...
data.filter(function(d) {
return d.authors.indexOf("Michael Fogus") !== -1;
})
returns this:
[{bookname:"the joy of clojure", authors:["Michael Fogus", "Chris Houser"], tags:["clojure", "lisp"]}]
This...
var res = {};
data.forEach(function(d) {
d.tags.forEach(function(tag) {
res.hasOwnProperty(tag) ? res[tag]++ : res[tag] = 1
});
})
returns this:
({clojure:1, lisp:1, ruby:2, 'design patterns':1})
To either of these, you can apply d3.entries to get your {key:"ruby", value: 2} format.

Resources