crossfilter to do word frequency - d3.js

I would like a crossfilter group that gives the word frequency and average rating for each word in a series of surveys so that I can make an awesome interactive word-bubble-frequency chart.
My data looks like:
[{feedback: "This is a horrible service", rating:2},
{feedback: "I love everything about everything", rating: 10},
{feedback: "love the user interface, good service", rating:6},
{feedback: "", rating: 7} ]
I would like something like:
[ {key: love, count:2, ave: 8}, {key: horrible, count:1, rating:2 }, {key: service,
count: 2, rating: 4 } ,.... ]
So far I have:
function to break up string into tokens, returns object with frequency for each word
var wordcnt = function(bah ){
var hist = {}, words = bah.split(/[\s*\.*\,\;\+?\#\|:\-\/\\\[\]\(\)\{\}$%&0-9*]/)
for( var i in words)
if(words[i].length >1 )
hist[words[i]] ? hist[words[i]]+=1 : hist[words[i]]=1;
return hist;
};
Loading data into d3 and crossfilter
d3.csv("test.csv", function( error, data) {
data.forEach(function(d) {
d.rating= +d.rating;
d.wordCount= wordcnt(d.feedback.toLowerCase());
});
var ndx = crossfilter(data);
var all = ndx.groupAll();
var frequencyDimension=ndx.dimension(function(d){return d.wordCount; });
And one butchered group reduce function!
var frequencyGroup= frequencyDimension.group().reduce(
function (p, v) {
for (var key in v.wordCount) {
if (v.frequency.hasOwnProperty(key)) {
p.frequency[key]+= v[key];
p.frequency[key].count++;
p.frequency[key].sum+= v.rating ;
}
else{
p.frequency[key]=v[key];
p.frequency[key].count=1;
p.frequency[key].sum = v.rating;
}
}
p.frequency[key].ave = p.frequency[key].sum/p.frequency[key].count ;
return p;
},
function (p, v) {
for (var key in v.wordCount) {
if (v.frequency.hasOwnProperty(key)) {
p.frequency[key]-= v[key];
p.frequency[key].count--;
p.frequency[key].sum-= v.rating ;
}
//don't need an else statement because can't remove key if it doesn't exist
}
p.frequency[key].ave = p.frequency[key].sum/p.frequency[key].count ;
return p;
},
function (p,v) {
return { frequency: {} } ;
}
)

Related

How can I filter rows by value and then count and plot pie chart and histogram?

My data look like this:
{
"raw_data": [
{
"agebracket": "",
"currentstatus": "Hospitalized",
"dateannounced": "05/06/2020",
"gender": "",
....
},
{
"agebracket": "",
"currentstatus": "Recovered",
"dateannounced": "05/06/2020",
"gender": "",
.........
},
{
"agebracket": "",
"currentstatus": "Hospitalized",
"dateannounced": "05/06/2020",
"gender": "",
.......
},
I am able to plot dc graph for the whole dataset. But now I want to filter it by "currentstatus" --> "Recovered", "Hospitalized", "Deceased".
Right now it looks like this:
https://blockbuilder.org/ninjakx/3699d4c0efb0ac1d81636cf0e05eda2d
I am trying to integrate it with https://blockbuilder.org/ninjakx/fbbae54c3f4d8b2df8f9b981d46857b4.
When I will click on confirmed box then pie and histogram will show results related to hospitalized. In that above (dashboard) Those three graphs didn't have to cross filter, So I was able to write the logic. But I am confused about this one. How should I go about filtering it by "currentstatus" ("Hospitalized, "Recovered" etc)
I want this to take a "currentstatus" variable and return results related to it.
var group = dim.group(function(d) {
return binwidth * Math.floor(d/binwidth); });
So that I can do :
barChart
.height(300)
.width(500) //give it a width
.dimension(dim)
.group(group, currentstatus) //<------------------ Here
.......
I am thinking about creating three arrays for Hospitalized, Recovered, and Deceased Resp. But I think there might be some shorter solution than going this lengthy way.
EDIT:
I tried that too but I don't know how to count.
The result should be(calculated using pandas) :
gender currentstatus
F Deceased 31
Hospitalized 4225
Recovered 33
M Deceased 60
Hospitalized 7570
Recovered 50
But I get this:
0:
key: "M"
value: {Hospitalized: 7549, Deceased: 51, Recovered: 13}
__proto__: Object
1: {key: "NA", value: {…}}
2:
key: "F"
value: {Hospitalized: 4200, Deceased: 25, Recovered: 7}
__proto__: Object
length: 3
__proto__: Array(0)
How do I count? I know this below logic is wrong:
var group1 = pieTypeDimension.group().reduce(
function(p, v) { // add
p[v.currentstatus] = (p[v.currentstatus] || 0) + 1;
return p;
},
function(p, v) { // remove
p[v.currentstatus] -= 1;
return p;
},
function() { // initial
return {};
});
log("group1:::", group1.top(Infinity));
Tried this too:
var group1 = pieTypeDimension.group().reduce(
function(p, v) { // add
++p.count;
log("count:::", p.count);
p[v.currentstatus] = (p[v.currentstatus] || 0) + p.count;
return p;
},
function(p, v) { // remove
--p.count;
p[v.currentstatus] -= p.count;
return p;
},
function(p, v) { // initial
return {count:0};
});
and get this:
0:
key: "M"
value:
count: 7613
Hospitalized: 28769566
Deceased: 173237
Recovered: 39888
__proto__: Object
__proto__: Object
1:
key: "NA"
value: {count: 3, Hospitalized: 6}
__proto__: Object
2:
key: "F"
value: {count: 4232, Hospitalized: 8903341, Deceased: 43001, Recovered: 10686}
__proto__: Object
length: 3
__proto__: Array(0)
Edit:
I didn't take account of age bracket thing. Considering that Now it matches with the dc.js solution. So that logic is correct.
gender currentstatus
F Deceased 25
Hospitalized 4200
Recovered 7
M Deceased 51
Hospitalized 7549
Recovered 13
Name: currentstatus, dtype: int64
I will answer in a way that works with filtering, because that's the primary use case for dc.js.
I'd suggest going with the idiomatic crossfilter reduction for stacked charts, just without actually stacking anything.
From the FAQ:
var group = dimension.group().reduce(
function(p, v) { // add
p[v.type] = (p[v.type] || 0) + v.value;
return p;
},
function(p, v) { // remove
p[v.type] -= v.value;
return p;
},
function() { // initial
return {};
});
where type in your case is currentstatus.
This will give you a group where the values are objects keyed on status.
If you have every status for every X value, then each value object will have all status as keys; if not, some will be undefined.
Use valueAccessor to pull the field that you want for your chart, defaulting to 0 if undefined:
chart.valueAccessor(kv => kv.value[currentstatus] || 0)

Crossfilter and DC.js: reduce to unique number

In the example below, I am trying to sum by unique occurence of Respond_Id. eg. in this case, it should be in total 3, "Respond_Id" being 258,261 and 345.
This is my data:
{"Respond_Id":258,"Gender":"Female","Age":"18-21","Answer":424},
{"Respond_Id":258,"Gender":"Female","Age":"18-21","Answer":428},
{"Respond_Id":261,"Gender":"Male","Age":"22-26", "Answer":427},
{"Respond_Id":261,"Gender":"Male","Age":"22-26", "Answer":432},
{"Respond_Id":345,"Gender":"Female","Age":"27-30","Answer":424},
{"Respond_Id":345,"Gender":"Female","Age":"27-30","Answer":425},
{"Respond_Id":345,"Gender":"Female","Age":"27-30","Answer":433},
I know I should use group reduce for this, so I tried (adapted from an example):
var ntotal = answerDim.group().reduce(
function(p, d) {
if(d.Respond_Id in p.Respond_Ids){
p.Respond_Ids[d.Respond_Id]++;
}
else {
p.Respond_Ids[d.Respond_Id] = 1;
p.RespondCount++;
}
return p;
},
function (p, d) {
p.Respond_Ids[d.Respond_Id]--;
if(p.Respond_Ids[d.Respond_Id] === 0){
delete p.Respond_Ids[d.Respond_Id];
p.RespondCount--;
}
return p;
},
function () {
return {
RespondCount: 0,
Respond_Ids: {}
};
}
);
Then:
numberDisplay
.group(ntotal)
.valueAccessor(function(d){ return d.value.RespondCount; });
dc.renderAll();
But seems not working. Does someone know how to make it work ? Thank you
Based on your JSFiddle, your setup is like this:
var RespondDim = ndx.dimension(function (d) { return d.Respond_Id;});
var ntotal = RespondDim.group().reduce(
function(p, d) {
if(d.Respond_Id in p.Respond_Ids){
p.Respond_Ids[d.Respond_Id]++;
}
else {
p.Respond_Ids[d.Respond_Id] = 1;
p.RespondCount++;
}
return p;
},
function (p, d) {
p.Respond_Ids[d.Respond_Id]--;
if(p.Respond_Ids[d.Respond_Id] === 0){
delete p.Respond_Ids[d.Respond_Id];
p.RespondCount--;
}
return p;
},
function () {
return {
RespondCount: 0,
Respond_Ids: {}
};
});
What is important to note here is that your group keys, by default, are the same as your dimension keys. So you will have one group per respondent ID. This isn't what you want.
You could switch to using dimension.groupAll, which is designed for this use case, but unfortunately the dimension.groupAll.reduce signature is slightly different. The easiest fix for you is going to be to just define your dimension to have a single value:
var RespondDim = ndx.dimension(function (d) { return true;});
Now you'll see that ntotal.all() will look like this:
{key: true, value: {RespondCount: 3, Respond_Ids: {258: 2, 261: 2, 345: 3}}}
Working fiddle: https://jsfiddle.net/v0rdoyrt/2/

dc.numberDisplay showing count for single group, not count of number of groups

I have data that looks like this:
var records = [
{id: '1', cat: 'A'},
{id: '2', cat: 'A'},
{id: '3', cat: 'B'},
{id: '4', cat: 'B'},
{id: '5', cat: 'B'},
{id: '6', cat: 'C'}
];
I want to create a dc.numberDisplay that displays the count of the number of unique categories, 3 in the example data above (A, B, & C).
This is what I'm currently doing:
var ndx = crossfilter(data); // init crossfilter
// create dimension based on category
var categoryDimension = ndx.dimension(
function (d) {
return d.category;
}
);
// Group by category
var categoryGroup = categoryDimension.group();
var categoryCount = dc.numberDisplay('#category-count'); // An empty span
categoryCount
.group(categoryGroup)
.valueAccessor(
function (d) { return d.value; }
);
The problem is that the numberDisplay displays 2 instead of 3. When debugging, I found that when the valueAccessor is called, d is the count of the number of elements of category A instead of the count of the number of categories.
How can I solve this problem?
UPDATE: Thanks to Nathan's solution, here is a working code snippet (ES2016 style)
const categoryDimension = claims.dimension(
(d) => {
return d.cat;
}
);
const categoryGroup = categoryDimension.groupAll().reduce(
(p, v) => { // add element
const cat = v.cat;
const count = p.categories.get(cat) || 0;
p.categories.set(cat, count + 1);
return p;
},
(p, v) => { // remove element
const cat = v.cat;
const count = p.categories.get(cat);
if (count === 1) {
p.categories.delete(cat);
} else {
p.categories.set(cat, count - 1);
}
return p;
},
() => { // init
return {
categories: new Map()
};
});
categoryCount
.group(categoryGroup)
.valueAccessor(
(d) => {
return d.categories.size;
}
);
You will need to use groupAll() since the number-display only looks at the top group. Then provide custom reduce functions to track unique categories. Finally, when DC.js pulls the value from the top group (there is only one) - just return the number of categories (which is the number of keys in the p object).
var categoryGroup = categoryDimension.groupAll().reduce(
function (p, v) { //add
if(p[v.cat]) {
p[v.cat]++;
} else {
p[v.cat] = 1;
}
return p;
},
function (p, v) { //remove
p[v.cat]--;
if(p[v.cat] === 0) {
delete p[v.cat];
}
return p;
},
function () { //init
//initial p - only one since using groupAll
return {};
}
);
console.debug("groups", categoryGroup.value());
dc.numberDisplay('#category-count')
.group(categoryGroup)
.valueAccessor(
function (d) { return Object.keys(d).length; }
);

How to map column to complex object in SlickGrid

var data = [{"Id":40072,"Id2":40071,"SmDetails":{"Id1":40071,"Id2":40072}}]
I want to display SmDetails.Id1 in a column. How is this possible? I tried:
var columns = [{name:'Personnel',field:SmDetails.id1,id:'detailId'}];
Please help me
Please help me
**My latest code**
var data = [{"Id":40072,"Id2":40071,"allocationDetails":{"Id1":40071,"allocationDetails":{"accommodationId":4007}}}]
var grid;
var columns = [ {name:"Personnel",field:"allocationDetails",fieldIdx:'accommodationId', id:"accommodationId"}];
var options = {
enableCellNavigation: true,
enableColumnReorder: false,
dataItemColumnValueExtractor:
function getValue(item, column) {
var values = item[column.field];
if (column.fieldIdx !== undefined) {
return values && values[column.fieldIdx];
} else {
return values;
}}};
var gridData=$scope.Vo;//This return as json format
grid = new Slick.Grid("#testGrid",gridData, columns);
This is the code tried recently.
You'll need to provide a custom value extractor to tell the grid how to read your object.
var options = {
enableCellNavigation: true,
enableColumnReorder: false,
dataItemColumnValueExtractor:
// Get the item column value using a custom 'fieldIdx' column param
function getValue(item, column) {
var values = item[column.field];
if (column.fieldIdx !== undefined) {
return values && values[column.fieldIdx];
} else {
return values;
}
}
};
The column definitions would look like:
{
id: "field1",
name: "Id1",
field: "SmDetails",
fieldIdx: 'Id1'
}, {
id: "field2",
name: "Id2",
field: "SmDetails",
fieldIdx: 'Id2'
} //... etc
Check out this fiddle for a working example.
try this to convert your data into object of single length values ...
newData = {};
for(key in data[0]){
parentKey = key;
if(typeof(data[0][key]) == "object"){
childData = data[0][key];
for(key in childData){
childKey = key;
newKey = parentKey+childKey;
newData[newKey] = childData[childKey];
}
} else {
newData[key] = data[0][key];
}
}
This will convert your data object like this
newData = {Id: 40072, Id2: 40071, SmDetailsId1: 40071, SmDetailsId2: 40072};
Now use this newData to map your data items in grid
I find this works well for nested properties, eg:
var columns = [
{ id: "someId", name: "Col Name", field: "myRowData.myObj.myProp", width: 40}
..
];
var options {
...
dataItemColumnValueExtractor: function getItemColumnValue(item, column) {
var val = undefined;
try {
val = eval("item." + column.field);
} catch(e) {
// ignore
}
return val;
}
};

Crossfilter query

Is it possible to filter a crossfilter dataset which has an array as the value?
For example, say I have the following dataset:
var data = [
{
bookname: "the joy of clojure",
authors: ["Michael Fogus", "Chris Houser"],
tags: ["clojure", "lisp"]
},
{
bookname: "Eloquent Ruby",
authors: ["Russ Olsen"],
tags: ["ruby"]
},
{
bookname: "Design Patterns in Ruby",
authors: ["Russ Olsen"],
tags: ["design patterns", "ruby"]
}
];
Is there an easy way to access the books which are tagged by an particular tag? And also the books which have a particular author? The way I understand how to use crossfilter so far has me doing something like this:
var filtered_data = crossfilter(data);
var tags = filtered_data.dimension(function(d) {return d.tags});
var tag = tags.group();
And then when I access the grouping (like so):
tag.all()
I get this:
[{key: ["clojure", "lisp"], value: 1},
{key: ["design patterns", "ruby"], value: 1},
{key: ["ruby"], value: 1}]
When I would rather have this:
[{key: "ruby", value: 2},
{key: "clojure", value: 1},
{key: "lisp", value: 1},
{key: "design patterns", value: 1}]
I've added comments to the code below. Big picture: use reduce function.
var data = ...
var filtered_data = crossfilter(data);
var tags = filtered_data.dimension(function(d) {return d.tags});
tags.groupAll().reduce(reduceAdd, reduceRemove, reduceInitial).value()
Notice how I've used groupAll() instead of group() b/c we want our reduce functions (defined below) to operate on one group rather than 3 groups.
Now the reduce functions should look like this:
/*
v is the row in the dataset
p is {} for the first execution (passed from reduceInitial).
For every subsequent execution it is the value returned from reduceAdd of the prev row
*/
function reduceAdd(p, v) {
v.tags.forEach (function(val, idx) {
p[val] = (p[val] || 0) + 1; //increment counts
});
return p;
}
function reduceRemove(p, v) {
//omitted. not useful for demonstration
}
function reduceInitial() {
/* this is how our reduce function is seeded. similar to how inject or fold
works in functional languages. this map will contain the final counts
by the time we are done reducing our entire data set.*/
return {};
}
I've never used "crossfilter" (I'm assuming this is a JS library). Here are some pure JS methods though.
This...
data.filter(function(d) {
return d.authors.indexOf("Michael Fogus") !== -1;
})
returns this:
[{bookname:"the joy of clojure", authors:["Michael Fogus", "Chris Houser"], tags:["clojure", "lisp"]}]
This...
var res = {};
data.forEach(function(d) {
d.tags.forEach(function(tag) {
res.hasOwnProperty(tag) ? res[tag]++ : res[tag] = 1
});
})
returns this:
({clojure:1, lisp:1, ruby:2, 'design patterns':1})
To either of these, you can apply d3.entries to get your {key:"ruby", value: 2} format.

Resources