Does PhantomJS share memory between multiple requests? - d3.js

I'm using PhantomJS + d3 to render a map of US Zipcodes as a backend process. The rendering and zip code counting takes long enough that putting the html and d3 js in the browser would require a minute to load and caused other issues, so we moved it to the backend.
If I send one request via curl to the node server that PhantomJS starts up, no problem. If I space our multiple map requests by about 15 seconds in between, also no problem. However, if I launch a couple curl requests very quickly, the rendered images wind up looking the same (aka the same image is written to multiple files.) Here is the phantom script:
var port,
server,
service,
page,
url,
svgDrawer;
fs = require('fs');
port = 9494;
server = require('webserver').create();
page = require('webpage').create();
service = server.listen(port, function (request, response) {
var drawerPayload = null;
try{
drawerPayload=JSON.parse(request.post);
} catch(e){
response.statusCode = 417;
response.write("Error : Invalid Input JSON");
response.close();
return;
}
url = 'file:///' + fs.absolute('./'+drawerPayload.inFile);
page.open(url, function (status) {
if(status=="success"){
page.evaluate(function(data){
$("body").on( "click", data, chartBuilder );
$("body").click();
var maxtimeOutMillis = 15000,
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
condition = $("svg.chart").hasClass("done"); //< defensive code
} else {
if(!condition) {
clearInterval(interval)
} else {
page.render(drawerPayload.outFile);
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
});
response.statusCode = 200;
} else {
response.statusCode = 404;
response.write("Not Found"+url);
}
response.close();
return;
});
page.onError = function (msg, trace) {
console.log(msg);
trace.forEach(function(item) {
console.log(' ', item.file, ':', item.line);
})
response.statusCode = 417;
response.write("Error : "+msg);
response.close();
return;
}
});
and the html+d3 looks like this:
<!DOCTYPE html>
<meta charset="utf-8">
<style>
.zip {
stroke: none;
}
.chart {
fill: white;
width: 1000px;
height: 500px;
}
</style>
<body>
<div id="chart-container">
<svg class="chart"></svg>
</div>
</body>
<script src="./jquery-min.js"></script>
<script src="./d3.min.js"></script>
<script type="text/javascript" src="http://d3js.org/topojson.v1.min.js"></script>
<script>
function chartBuilder(e){
var zip_data = e.data;
$.getJSON("zips_us_topo.json", function(us){
console.log("rendering...\n");
var width = 1000;
var height = 500;
var projection = d3.geo.albersUsa()
.scale(width)
.translate([width / 2, height / 2]);
var path = d3.geo.path().projection(projection);
var color = d3.scale.log().domain([1,zip_data.max+1]).range(["#cccccc","#f63337"]);
var svg = d3.select("svg.chart")
.attr("width", width)
.attr("height", height)
.style({margin: "10px 100px"})
.append("g")
.attr("class", "counties")
.selectAll("path")
.data(topojson.feature(us, us.objects.zip_codes_for_the_usa).features)
.enter()
.append("path")
.attr("class", "zip")
.style({fill: function(d){
return color(zip_data.counts[d.properties.zip] ? zip_data.counts[d.properties.zip]+1 : 1);
}})
.attr("d", path);
svg.classed("done", true);
});
}
</script>
If the requests are all curled at the same time, it looks like it's writing one image to all of the output files. Does PhantomJS create a new page for each request, or is it loading the same request each time?

You have only a single page instance for all requests that come. This might create some race conditions when new requests come in and hijack the current page.open() request. There are basically two ways to solve this depending on your preferred scenario.
Multiple "tabs"
The simple fix would be to create a new page instance for every request and they will be essentially different tabs in the same browser. So if cookies or localStorage is an issue, this is not for you.
Move page = require('webpage').create(); inside the server.listen callback and don't forget to close() the page instance after use.
Only one request at a time
Since this is a not so short running process, you can start a page.open() when it is not currently running and put all incoming requests into a queue as long as page.open() hasn't finished. After it is finished, save the response, go through the request queue and respond with the same response to all of them.
This is of course much nicer on memory consumption than the first solution if there really are many concurrent requests.
There are other problems with your code though. You use setInterval() inside page.evaluate(), which breaks off of the control flow. response.statusCode = 200; will be most likely set before the page is rendered.
That page.render() inside of page.evaluate() is the other problem. page.evaluate() is the sandboxed page context. It doesn't have access to variables defined outside of it and that includes page and require. (Solution for this isolated problem)
Those two problems can be solved with a single blow by waiting outside of the page context for a render condition inside of it. I suggest using waitFor from the examples:
if(status=="success"){
page.evaluate(function(data){
window._finishIndicationVariable = false;
$("body").on( "click", data, chartBuilder );
$("body").click();
var maxtimeOutMillis = 15000,
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
condition = $("svg.chart").hasClass("done"); //< defensive code
} else {
if(!condition) {
clearInterval(interval)
} else {
window._finishIndicationVariable = true;
clearInterval(interval); //< Stop this interval
}
}
}, 250); //< repeat check every 250ms
});
waitFor(function check(){
return page.evaluate(function(){
return window._finishIndicationVariable;
});
}, function onReady(){
page.render(drawerPayload.outFile);
response.statusCode = 200;
response.close();
});
} else {
response.statusCode = 404;
response.write("Not Found"+url);
response.close();
}
Note that response.close(); is used two times, because one of the if branches is asynchronous and the one is not.

Related

Wait for SVG to load (Ajax) before running a function

I've really searched for days, but couldn't find a solution:
I have an external SVG (800k) which i need to load completely before calling the following function. Someone helped me with the code to load the SVG but I cant figure out how to detect when the SVG is completely loaded. (I am not using jQuery). I need the function initialSvgSettings() to run after the SVG has finished loading completely. Where I have it right now, it just runs when the SVG starts loading but not when it has completed.
This is the code I am using to load the SVG:
// LOAD SVG
var svgOverlay = viewer.svgOverlay();
var path = "images/elementosV6.svg";
function loadSVG(path, callback) {
var xhr = new XMLHttpRequest();
xhr.onreadystatechange = function(e) {
try {
if (xhr.readyState == 4 && xhr.status == 200) {
callback(xhr.responseXML.documentElement);
initialSvgSettings();
}
} catch (e) {
console.log(e);
}
};
xhr.open("GET", path, true);
xhr.overrideMimeType("text/xml");
xhr.responseType = "document";
xhr.send();
}
loadSVG(path, function(data) {
var g = data.getElementById("elements");
svgOverlay.node().appendChild(g);
}); // FIN LOAD SVG
The answer nearest to your question would be to point out that instead of using XMLHttpRequest.onreadystatechange, you could use the .onload event handler, which is called only after the resource has been completely loaded.
But you can also simplify your code by loading the image into an offscreen <object>. Attach your callback to .onload, and with .contentDocument you can access the DOM of the SVG.
var svgContainer = document.createElement('object');
svgContainer.type = 'image/svg+xml';
svgContainer.onload = function () {
var g = svgContainer.contentDocument.getElementById("elements");
svgOverlay.node().appendChild(g);
}
svgContainer.data = 'images/elementosV6.svg';

How can I catch and process the data from the XHR responses using casperjs?

The data on the webpage is displayed dynamically and it seems that checking for every change in the html and extracting the data is a very daunting task and also needs me to use very unreliable XPaths. So I would want to be able to extract the data from the XHR packets.
I hope to be able to extract information from XHR packets as well as generate 'XHR' packets to be sent to the server.
The extracting information part is more important for me because the sending of information can be handled easily by automatically triggering html elements using casperjs.
I'm attaching a screenshot of what I mean.
The text in the response tab is the data I need to process afterwards. (This XHR response has been received from the server.)
This is not easily possible, because the resource.received event handler only provides meta data like url, headers or status, but not the actual data. The underlying phantomjs event handler acts the same way.
Stateless AJAX Request
If the ajax call is stateless, you may repeat the request
casper.on("resource.received", function(resource){
// somehow identify this request, here: if it contains ".json"
// it also also only does something when the stage is "end" otherwise this would be executed two times
if (resource.url.indexOf(".json") != -1 && resource.stage == "end") {
var data = casper.evaluate(function(url){
// synchronous GET request
return __utils__.sendAJAX(url, "GET");
}, resource.url);
// do something with data, you might need to JSON.parse(data)
}
});
casper.start(url); // your script
You may want to add the event listener to resource.requested. That way you don't need to way for the call to complete.
You can also do this right inside of the control flow like this (source: A: CasperJS waitForResource: how to get the resource i've waited for):
casper.start(url);
var res, resData;
casper.waitForResource(function check(resource){
res = resource;
return resource.url.indexOf(".json") != -1;
}, function then(){
resData = casper.evaluate(function(url){
// synchronous GET request
return __utils__.sendAJAX(url, "GET");
}, res.url);
// do something with the data here or in a later step
});
casper.run();
Stateful AJAX Request
If it is not stateless, you would need to replace the implementation of XMLHttpRequest. You will need to inject your own implementation of the onreadystatechange handler, collect the information in the page window object and later collect it in another evaluate call.
You may want to look at the XHR faker in sinon.js or use the following complete proxy for XMLHttpRequest (I modeled it after method 3 from How can I create a XMLHttpRequest wrapper/proxy?):
function replaceXHR(){
(function(window, debug){
function args(a){
var s = "";
for(var i = 0; i < a.length; i++) {
s += "\t\n[" + i + "] => " + a[i];
}
return s;
}
var _XMLHttpRequest = window.XMLHttpRequest;
window.XMLHttpRequest = function() {
this.xhr = new _XMLHttpRequest();
}
// proxy ALL methods/properties
var methods = [
"open",
"abort",
"setRequestHeader",
"send",
"addEventListener",
"removeEventListener",
"getResponseHeader",
"getAllResponseHeaders",
"dispatchEvent",
"overrideMimeType"
];
methods.forEach(function(method){
window.XMLHttpRequest.prototype[method] = function() {
if (debug) console.log("ARGUMENTS", method, args(arguments));
if (method == "open") {
this._url = arguments[1];
}
return this.xhr[method].apply(this.xhr, arguments);
}
});
// proxy change event handler
Object.defineProperty(window.XMLHttpRequest.prototype, "onreadystatechange", {
get: function(){
// this will probably never called
return this.xhr.onreadystatechange;
},
set: function(onreadystatechange){
var that = this.xhr;
var realThis = this;
that.onreadystatechange = function(){
// request is fully loaded
if (that.readyState == 4) {
if (debug) console.log("RESPONSE RECEIVED:", typeof that.responseText == "string" ? that.responseText.length : "none");
// there is a response and filter execution based on url
if (that.responseText && realThis._url.indexOf("whatever") != -1) {
window.myAwesomeResponse = that.responseText;
}
}
onreadystatechange.call(that);
};
}
});
var otherscalars = [
"onabort",
"onerror",
"onload",
"onloadstart",
"onloadend",
"onprogress",
"readyState",
"responseText",
"responseType",
"responseXML",
"status",
"statusText",
"upload",
"withCredentials",
"DONE",
"UNSENT",
"HEADERS_RECEIVED",
"LOADING",
"OPENED"
];
otherscalars.forEach(function(scalar){
Object.defineProperty(window.XMLHttpRequest.prototype, scalar, {
get: function(){
return this.xhr[scalar];
},
set: function(obj){
this.xhr[scalar] = obj;
}
});
});
})(window, false);
}
If you want to capture the AJAX calls from the very beginning, you need to add this to one of the first event handlers
casper.on("page.initialized", function(resource){
this.evaluate(replaceXHR);
});
or evaluate(replaceXHR) when you need it.
The control flow would look like this:
function replaceXHR(){ /* from above*/ }
casper.start(yourUrl, function(){
this.evaluate(replaceXHR);
});
function getAwesomeResponse(){
return this.evaluate(function(){
return window.myAwesomeResponse;
});
}
// stops waiting if window.myAwesomeResponse is something that evaluates to true
casper.waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
});
casper.run();
As described above, I create a proxy for XMLHttpRequest so that every time it is used on the page, I can do something with it. The page that you scrape uses the xhr.onreadystatechange callback to receive data. The proxying is done by defining a specific setter function which writes the received data to window.myAwesomeResponse in the page context. The only thing you need to do is retrieving this text.
JSONP Request
Writing a proxy for JSONP is even easier, if you know the prefix (the function to call with the loaded JSON e.g. insert({"data":["Some", "JSON", "here"],"id":"asdasda")). You can overwrite insert in the page context
after the page is loaded
casper.start(url).then(function(){
this.evaluate(function(){
var oldInsert = insert;
insert = function(json){
window.myAwesomeResponse = json;
oldInsert.apply(window, arguments);
};
});
}).waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
}).run();
or before the request is received (if the function is registered just before the request is invoked)
casper.on("resource.requested", function(resource){
// filter on the correct call
if (resource.url.indexOf(".jsonp") != -1) {
this.evaluate(function(){
var oldInsert = insert;
insert = function(json){
window.myAwesomeResponse = json;
oldInsert.apply(window, arguments);
};
});
}
}).run();
casper.start(url).waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
}).run();
I may be late into the party, but the answer may help someone like me who would fall into this problem later in future.
I had to start with PhantomJS, then moved to CasperJS but finally settled with SlimerJS. Slimer is based on Phantom, is compatible with Casper, and can send you back the response body using the same onResponseReceived method, in "response.body" part.
Reference: https://docs.slimerjs.org/current/api/webpage.html#webpage-onresourcereceived
#Artjom's answer's doesn't work for me in the recent Chrome and CasperJS versions.
Based on #Artjom's answer and based on gilly3's answer on how to replace XMLHttpRequest, I have composed a new solution that should work in most/all versions of the different browsers. Works for me.
SlimerJS cannot work on newer version of FireFox, therefore no good for me.
Here is the the generic code to add a listner to load of XHR (not dependent on CasperJS):
var addXHRListener = function (XHROnStateChange) {
var XHROnLoad = function () {
if (this.readyState == 4) {
XHROnStateChange(this)
}
}
var open_original = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function (method, url, async, unk1, unk2) {
this.requestUrl = url
open_original.apply(this, arguments);
};
var xhrSend = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.send = function () {
var xhr = this;
if (xhr.addEventListener) {
xhr.removeEventListener("readystatechange", XHROnLoad);
xhr.addEventListener("readystatechange", XHROnLoad, false);
} else {
function readyStateChange() {
if (handler) {
if (handler.handleEvent) {
handler.handleEvent.apply(xhr, arguments);
} else {
handler.apply(xhr, arguments);
}
}
XHROnLoad.apply(xhr, arguments);
setReadyStateChange();
}
function setReadyStateChange() {
setTimeout(function () {
if (xhr.onreadystatechange != readyStateChange) {
handler = xhr.onreadystatechange;
xhr.onreadystatechange = readyStateChange;
}
}, 1);
}
var handler;
setReadyStateChange();
}
xhrSend.apply(xhr, arguments);
};
}
Here is CasperJS code to emit a custom event on load of XHR:
casper.on("page.initialized", function (resource) {
var emitXHRLoad = function (xhr) {
window.callPhantom({eventName: 'xhr.load', eventData: xhr})
}
this.evaluate(addXHRListener, emitXHRLoad);
});
casper.on('remote.callback', function (data) {
casper.emit(data.eventName, data.eventData)
});
Here is a code to listen to "xhr.load" event and get the XHR response body:
casper.on('xhr.load', function (xhr) {
console.log('xhr load', xhr.requestUrl)
console.log('xhr load', xhr.responseText)
});
Additionally, you can also directly download the content and manipulate it later.
Here is the example of the script I am using to retrieve a JSON and save it locally :
var casper = require('casper').create({
pageSettings: {
webSecurityEnabled: false
}
});
var url = 'https://twitter.com/users/username_available?username=whatever';
casper.start('about:blank', function() {
this.download(url, "hop.json");
});
casper.run(function() {
this.echo('Done.').exit();
});

EmberJS 1.0.0 RC7 run code after images load

I'm trying to resize images after the view and images load on screen.
I have this TAG route and after update the EmberJS to RC7 I saw that the didInsertedElement stopped to work like before, it run one time only, but I need it to run every time I enter the view.
So I tried to run the code on setupController like this:
App.TagRoute = Ember.Route.extend({
model: function(params) {
return App.TKRPTumblr.findAll(params.tag);
},
setupController: function(controller, tag) {
if(typeof tag === 'object'){
controller.set('content', tag.response);
}else{
App.TKRPTumblr.findAll(tag)
.then(function(data) {
controller.set('content', data.response);
});
}
this.resizeView();
},
resizeView: function(){
var WIDTH = 0, RATIO, IMG, IMG_HEIGHT, IMG_WIDTH, FINAL_WIDTH, SIDEBAR_WIDTH = $('#sidebar').outerWidth(), MAX_WIDTH = $(window).width() - SIDEBAR_WIDTH, MAX_HEIGHT = $(window).height() - 125;
if($('html .post').find('img').length > 0){
$('.post').each(function(){
IMG = $(this).find('img');
IMG_WIDTH = IMG.attr('width');
IMG_HEIGHT = IMG.attr('height');
if(IMG_WIDTH > MAX_WIDTH){
RATIO = MAX_WIDTH / WIDTH;
IMG.width(MAX_WIDTH).height(IMG_HEIGHT * RATIO);
}
if(IMG_HEIGHT > MAX_HEIGHT){
RATIO = MAX_HEIGHT / IMG_HEIGHT;
IMG.width(IMG_WIDTH * RATIO).height(MAX_HEIGHT);
}
WIDTH += IMG.width();
});
if(isNaN(WIDTH)){
WIDTH = $('.post img').width() + SIDEBAR_WIDTH;
$('body').children('.ember-view').innerWidth(WIDTH);
}else{
WIDTH += SIDEBAR_WIDTH + 100;
$('body').children('.ember-view').innerWidth(WIDTH);
}
var evt = document.createEvent('Event');
evt.initEvent('_htmlReady', true, true);
document.dispatchEvent(evt);
}else{
Ember.run.next(this, function() {
this.resizeView();
});
}
}
});
Unfortunately it didn't worked as well :/ I can't figure out now how to do that :(( can anyone help me?
it run one time only, but I need it to run every time I enter the view.
If you rely on data returned by your model hook, then I guess you should rather use the routes afterModel hook, this is called every time you enter the route and the model is resolved.
App.TagRoute = Ember.Route.extend({
afterModel: function() {
this.resizeView();
}
...
});
But if you only care to invoke your function every time the route is entered no matter if the model is resolved or not, then using the routes activate hook is also an option:
App.TagRoute = Ember.Route.extend({
activate: function() {
this.resizeView();
}
...
});
Hope it helps.

Angular $http, $q: track progress

Is there a way to track progress of http requests with Angular $http and $q? I'm making $http calls from a list of urls and then using $q.all I'm returning result of all requests. I would like to track progress of each request (promise resolved) so that I can show some progress to the user. I'm thinking of emitting event when a promise gets resolved but I'm not sure where should that be.
var d = $q.defer();
var promises = [];
for(var i = 0; i < urls.length; i++){
var url = urls[i];
var p = $http.get(url, {responseType: "arraybuffer"});
promises.push(p);
}
$q.all(promises).then(function(result){
d.resolve(result);
}, function(rejection){
d.reject(rejection);
});
return d.promise;
EDIT:
OK, after a bit of fiddling, this is what I've come up with
var d = $q.defer();
var promises = [];
var completedCount = 0;
for(var i = 0; i < urls.length; i++){
var url = urls[i];
var p = $http.get(url, {responseType: "arraybuffer"}).then(function(respose){
completedCount = completedCount+1;
var progress = Math.round((completedCount/urls.length)*100);
$rootScope.$broadcast('download.completed', {progress: progress});
return respose;
}, function(error){
return error;
});
promises.push(p);
}
$q.all(promises).then(function(result){
d.resolve(result);
}, function(rejection){
d.reject(rejection);
});
return d.promise;
Not sure if it is the right way of doing it.
I see you have already edit your own code, but if you need a more overall solution, keep reading
I once made a progress solution based on all pending http request (showing a indicator that something is loading, kind of like youtube has on the top progress bar)
js:
app.controller("ProgressCtrl", function($http) {
this.loading = function() {
return !!$http.pendingRequests.length;
};
});
html:
<div id="fixedTopBar" ng-controller="ProgressCtrl as Progress">
<div id="loading" ng-if="Progress.loading()">
loading...
</div>
</div>
.
Hardcore
For my latest project it wasn't just enought with just request calls. I started to get into sockets, webworker, filesystem, filereader, dataChannel and any other asynchronous calls that use $q. So i start looking into how i could get all the pending promises (including $http). Turns out there wasn't any angular solution, so i kind of monkey patched the $q provider by decorating it.
app.config(function($provide) {
$provide.decorator("$q", function($delegate) {
// $delegate == original $q service
var orgDefer = $delegate.defer;
$delegate.pendingPromises = 0;
// overide defer method
$delegate.defer = function() {
$delegate.pendingPromises++; // increass
var defer = orgDefer();
// decreass no mather of success or faliur
defer.promise['finally'](function() {
$delegate.pendingPromises--;
});
return defer;
}
return $delegate
});
});
app.controller("ProgressCtrl", function($q) {
this.loading = function() {
return !!$q.pendingPromises;
};
});
This may not perhaps fit everyone needs for production but it could be useful to developers to see if there is any unresolved issues that has been left behind and never gets called
Make a small general helper function:
function allWithProgress(promises, progress) {
var total = promises.length;
var now = 0;
promises.forEach(function(p) {
p.then(function() {
now++;
progress(now / total);
});
})
return $q.all(promises);
}
Then use it:
var promises = urls.map(function(url) {
return $http.get(url, {responseType: "arraybuffer"});
});
allWithProgress(promises, function(progress) {
progress = Math.round(progress * 100);
$rootScope.$broadcast('download.completed', {progress: progress});
}).catch(function(error) {
console.log(error);
});

Why is AJAX settimeout not working?

I'm going a little crazy trying to understand why this outputs the json data once and then not again as per timeout function. What am I doing wrong?
<script type="text/javascript">
var myTimer =0;
function ajax_get_json(){
var results = document.getElementById("results");
var hr = new XMLHttpRequest();
hr.open("POST", "ballJson.php",true);
hr.setRequestHeader("Content-type", "application/json");
hr.onreadystatechange = function() {
if(hr.readyState == 4 && hr.status == 200) {
var data = JSON.parse(hr.responseText);
results.innerHTML = "";
for(var obj in data){
results.innerHTML += data[obj]+"<br />";
}
}
}
hr.send(null);
results.innerHTML = "requesting...";
myTimer = setTimeout('ajax_get_json()',10000);
}
</script>
I think that you want to use the setInterval function as setTimeout only executes one time.
It is also much preferred to use setTimeout(function () { ajax_get_json(); }, 10000) or setTimeout(ajax_get_json, 10000) instead of using a string that is evaluated as the function callback.
Also note that the ajax requests may complete at any time, so it won't exactly be once every 10 seconds using setInterval. You could also call setTimout in the onreadystatechange callback to chain the calls.

Resources