everything was working flawlessly like 3 days ago when I finished writing some code and testing. Today when I wanted to do some more changes it stopped working.. Like without a reason. Code didn't change a bit. I'm getting this error:
Uncaught RangeError: Maximum call stack size exceeded
Debugger tells me that error has been thrown somewhere during this call:
return open.apply(this, arguments);
Because of that anything that should be loaded via AJAX on the site is not working.
This is the code for intercepting the AJAX calls:
var XHR = XMLHttpRequest.prototype;
var open = XHR.open;
var send = XHR.send;
var setRequestHeader = XHR.setRequestHeader;
XHR.open = function (method, url) {
this._url = url;
this._requestHeaders = {};
return open.apply(this, arguments);
};
XHR.setRequestHeader = function (header, value) {
this._requestHeaders[header] = value;
return setRequestHeader.apply(this, arguments);
};
XHR.send = function (postData) {
this.addEventListener('load', function () {
var myUrl = this._url ? this._url.toLowerCase() : this._url;
if (myUrl) {
if (this.responseType != 'blob' && this.responseText) {
try {
var allowedUrls = ['shoutbox.json', 'comments/latest.json'];
if (new RegExp(allowedUrls.join("|")).test(this._url)) {
var extensionID = 'exampleExtensionID';
chrome.runtime.sendMessage(extensionID, { interception: true });
}
} catch (err) {
}
}
}
});
return send.apply(this, arguments);
};
Has anyone idea why it might stopped working?
I had to put my injected.js (XHR) into self invoking function.
I am trying to download a file with CasperJS. If using browser, the download begins when user clicks a button and the response headers look like this:
I have tried these two methods with no luck:
1) https://stackoverflow.com/a/26334034
With this approach, the code block inside the if-statement is never executed. If I remove the condition, a bunch of resources are being saved, such as css files and so on. So the event listener is working, but for some reason not triggering when I use CasperJs click function to click the button that should start the download.
2) https://stackoverflow.com/a/30122021/692695
File.csv is saved but it's the web sites sourcecode, not the csv-file I get when I click the button on the website.
All of my code:
'use strict';
var utils = require('utils');
var casper = require('casper').create({
//verbose: true,
//logLevel: "debug",
clientScripts: ["node_modules/jquery/dist/jquery.min.js"]
});
function writeHtml(filename) {
var fs = require('fs');
var content = casper.getHTML();
fs.write(filename, content, 'w');
}
function getUrl() {
var url;
url = $('.tableofcontent_link:contains("Väestö työmarkkina-aseman, sukupuolen ja iän mukaan")').parent().attr('href');
return url;
}
casper.selectOptionByValue = function(selector, valueToMatch){
this.evaluate(function(selector, valueToMatch){
var select = document.querySelector(selector),
found = false;
Array.prototype.forEach.call(select.children, function(opt, i){
if (!found && opt.value.indexOf(valueToMatch) !== -1) {
select.selectedIndex = i;
found = true;
}
});
// dispatch change event in case there is some kind of validation
var evt = document.createEvent("UIEvents"); // or "HTMLEvents"
evt.initUIEvent("change", true, true);
select.dispatchEvent(evt);
}, selector, valueToMatch);
};
var link;
var url = 'http://pxnet2.stat.fi/PXWeb/pxweb/fi/StatFin/StatFin__tym__tyti/?table';
casper.start(url);
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X)');
casper.then(function () {
this.echo("Now at: " + this.getCurrentUrl());
link = 'http://pxnet2.stat.fi' + casper.evaluate(getUrl);
});
casper.then(function () {
this.open(link);
});
casper.then(function() {
this.echo("Now at: " + this.getCurrentUrl());
// Select all data for each item
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl01_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl02_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl03_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl04_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl05_VariableValueSelect_VariableValueSelect_SelectAllButton');
});
casper.then(function() {
// casper.selectOptionByValue('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_OutputFormats_OutputFormats_OutputFormatDropDownList',
// 'FileTypeExcelX');
// Select the format of the file from the select option list at the bottom
casper.selectOptionByValue('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_OutputFormats_OutputFormats_OutputFormatDropDownList',
'FileTypeCsvWithHeadingAndSemiColon');
});
casper.then(function () {
// just for debugging
writeHtml('page1.html');
});
casper.then(function() {
//casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_ButtonViewTable');
});
casper.then(function() {
// FIRST ATTEMPT TO LOAD THE DATA TO a file called file.csv
var formData = casper.evaluate(function(){
return $('form#aspnetForm').serialize();
});
//this.echo("Params: " + formData);
var targetFile = 'file.csv';
casper.download(link, targetFile, 'POST', formData);
});
casper.then(function () {
// just for debugging
writeHtml('page2.html');
});
// SECCOND ATTEMPT TO LOAD THE DATA TO a file called stats.csv
casper.on('resource.received', function (resource) {
if ((resource.url.indexOf('tyti_001') !== -1) ) {
this.echo(resource.url);
var file;
file = "stats.csv";
try {
this.echo("Attempting to download file " + file);
var fs = require('fs');
casper.download(resource.url, fs.workingDirectory + '/' + file);
} catch (e) {
this.echo(e);
}
}
});
casper.run(function () {
this.echo('End').exit();
});
And my package.json:
{
"scripts": {
"test": "dotest"
},
"pre-commit": ["test"],
"dependencies": {
"jquery": "^3.3.1"
},
"devDependencies": {
"pre-commit": "^1.2.2"
}
}
Explanation of the code:
First visit this page: http://pxnet2.stat.fi/PXWeb/pxweb/fi/StatFin/StatFin__tym__tyti/statfin_tyti_pxt_001.px/?rxid=bd4d5dc1-358d-407e-ae47-13266b79bfd0
There, dynamically pick a specified link and move there.
Select all data by clicking the V-shapen icon (look at the attached screenshot) and then select the format of the file.
I've faced this issue earlier with all versions except phantomjs 2.0.0. I also tried the solutions you shared from SO a year ago and they didn't work as well.
I'm going to assume you're using a phantomjs version other than 2.0.0.
Here's the link to download it
https://bitbucket.org/ariya/phantomjs/downloads/
With it, you will have access to onFileDownload method which you can override and use like below
casper.page.onFileDownload = function(status){
console.log('onFileDownload(' + status + ')');
return "newfile.csv";
};
onFileDownload will be called whenever a file is downloaded as a result of clicking a button (ajax) or as a result of sequential get/post requests.
All you have to do is, trigger the click on the button/link that will initiate download.
Note : My solution is assuming that everything else (site is not blocking phantomjs and your request headers/cookies are as expected)
I have created a store that attempts to do an AJAX request and set this.restaurants to the returned data. If I hard code the data in as an array of json to the `this.restaurants everything works great. But my AJAX call is not being run in time. How can i fix this? The console log displays everything as expected
class RestaurantStore extends EventEmitter {
constructor() {
super()
this.restaurants = this.getRestaurants()
}
getRestaurants() {
const url = `/lib/data/restaurants.json`
var r = new XMLHttpRequest();
r.open("GET", url, true);
r.onreadystatechange = function () {
if (r.readyState != 4 || r.status != 200) return;
const json = r.responseText
console.log(json)
return json
// this.restaurants = json
};
r.send();
}
}
The data on the webpage is displayed dynamically and it seems that checking for every change in the html and extracting the data is a very daunting task and also needs me to use very unreliable XPaths. So I would want to be able to extract the data from the XHR packets.
I hope to be able to extract information from XHR packets as well as generate 'XHR' packets to be sent to the server.
The extracting information part is more important for me because the sending of information can be handled easily by automatically triggering html elements using casperjs.
I'm attaching a screenshot of what I mean.
The text in the response tab is the data I need to process afterwards. (This XHR response has been received from the server.)
This is not easily possible, because the resource.received event handler only provides meta data like url, headers or status, but not the actual data. The underlying phantomjs event handler acts the same way.
Stateless AJAX Request
If the ajax call is stateless, you may repeat the request
casper.on("resource.received", function(resource){
// somehow identify this request, here: if it contains ".json"
// it also also only does something when the stage is "end" otherwise this would be executed two times
if (resource.url.indexOf(".json") != -1 && resource.stage == "end") {
var data = casper.evaluate(function(url){
// synchronous GET request
return __utils__.sendAJAX(url, "GET");
}, resource.url);
// do something with data, you might need to JSON.parse(data)
}
});
casper.start(url); // your script
You may want to add the event listener to resource.requested. That way you don't need to way for the call to complete.
You can also do this right inside of the control flow like this (source: A: CasperJS waitForResource: how to get the resource i've waited for):
casper.start(url);
var res, resData;
casper.waitForResource(function check(resource){
res = resource;
return resource.url.indexOf(".json") != -1;
}, function then(){
resData = casper.evaluate(function(url){
// synchronous GET request
return __utils__.sendAJAX(url, "GET");
}, res.url);
// do something with the data here or in a later step
});
casper.run();
Stateful AJAX Request
If it is not stateless, you would need to replace the implementation of XMLHttpRequest. You will need to inject your own implementation of the onreadystatechange handler, collect the information in the page window object and later collect it in another evaluate call.
You may want to look at the XHR faker in sinon.js or use the following complete proxy for XMLHttpRequest (I modeled it after method 3 from How can I create a XMLHttpRequest wrapper/proxy?):
function replaceXHR(){
(function(window, debug){
function args(a){
var s = "";
for(var i = 0; i < a.length; i++) {
s += "\t\n[" + i + "] => " + a[i];
}
return s;
}
var _XMLHttpRequest = window.XMLHttpRequest;
window.XMLHttpRequest = function() {
this.xhr = new _XMLHttpRequest();
}
// proxy ALL methods/properties
var methods = [
"open",
"abort",
"setRequestHeader",
"send",
"addEventListener",
"removeEventListener",
"getResponseHeader",
"getAllResponseHeaders",
"dispatchEvent",
"overrideMimeType"
];
methods.forEach(function(method){
window.XMLHttpRequest.prototype[method] = function() {
if (debug) console.log("ARGUMENTS", method, args(arguments));
if (method == "open") {
this._url = arguments[1];
}
return this.xhr[method].apply(this.xhr, arguments);
}
});
// proxy change event handler
Object.defineProperty(window.XMLHttpRequest.prototype, "onreadystatechange", {
get: function(){
// this will probably never called
return this.xhr.onreadystatechange;
},
set: function(onreadystatechange){
var that = this.xhr;
var realThis = this;
that.onreadystatechange = function(){
// request is fully loaded
if (that.readyState == 4) {
if (debug) console.log("RESPONSE RECEIVED:", typeof that.responseText == "string" ? that.responseText.length : "none");
// there is a response and filter execution based on url
if (that.responseText && realThis._url.indexOf("whatever") != -1) {
window.myAwesomeResponse = that.responseText;
}
}
onreadystatechange.call(that);
};
}
});
var otherscalars = [
"onabort",
"onerror",
"onload",
"onloadstart",
"onloadend",
"onprogress",
"readyState",
"responseText",
"responseType",
"responseXML",
"status",
"statusText",
"upload",
"withCredentials",
"DONE",
"UNSENT",
"HEADERS_RECEIVED",
"LOADING",
"OPENED"
];
otherscalars.forEach(function(scalar){
Object.defineProperty(window.XMLHttpRequest.prototype, scalar, {
get: function(){
return this.xhr[scalar];
},
set: function(obj){
this.xhr[scalar] = obj;
}
});
});
})(window, false);
}
If you want to capture the AJAX calls from the very beginning, you need to add this to one of the first event handlers
casper.on("page.initialized", function(resource){
this.evaluate(replaceXHR);
});
or evaluate(replaceXHR) when you need it.
The control flow would look like this:
function replaceXHR(){ /* from above*/ }
casper.start(yourUrl, function(){
this.evaluate(replaceXHR);
});
function getAwesomeResponse(){
return this.evaluate(function(){
return window.myAwesomeResponse;
});
}
// stops waiting if window.myAwesomeResponse is something that evaluates to true
casper.waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
});
casper.run();
As described above, I create a proxy for XMLHttpRequest so that every time it is used on the page, I can do something with it. The page that you scrape uses the xhr.onreadystatechange callback to receive data. The proxying is done by defining a specific setter function which writes the received data to window.myAwesomeResponse in the page context. The only thing you need to do is retrieving this text.
JSONP Request
Writing a proxy for JSONP is even easier, if you know the prefix (the function to call with the loaded JSON e.g. insert({"data":["Some", "JSON", "here"],"id":"asdasda")). You can overwrite insert in the page context
after the page is loaded
casper.start(url).then(function(){
this.evaluate(function(){
var oldInsert = insert;
insert = function(json){
window.myAwesomeResponse = json;
oldInsert.apply(window, arguments);
};
});
}).waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
}).run();
or before the request is received (if the function is registered just before the request is invoked)
casper.on("resource.requested", function(resource){
// filter on the correct call
if (resource.url.indexOf(".jsonp") != -1) {
this.evaluate(function(){
var oldInsert = insert;
insert = function(json){
window.myAwesomeResponse = json;
oldInsert.apply(window, arguments);
};
});
}
}).run();
casper.start(url).waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
}).run();
I may be late into the party, but the answer may help someone like me who would fall into this problem later in future.
I had to start with PhantomJS, then moved to CasperJS but finally settled with SlimerJS. Slimer is based on Phantom, is compatible with Casper, and can send you back the response body using the same onResponseReceived method, in "response.body" part.
Reference: https://docs.slimerjs.org/current/api/webpage.html#webpage-onresourcereceived
#Artjom's answer's doesn't work for me in the recent Chrome and CasperJS versions.
Based on #Artjom's answer and based on gilly3's answer on how to replace XMLHttpRequest, I have composed a new solution that should work in most/all versions of the different browsers. Works for me.
SlimerJS cannot work on newer version of FireFox, therefore no good for me.
Here is the the generic code to add a listner to load of XHR (not dependent on CasperJS):
var addXHRListener = function (XHROnStateChange) {
var XHROnLoad = function () {
if (this.readyState == 4) {
XHROnStateChange(this)
}
}
var open_original = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function (method, url, async, unk1, unk2) {
this.requestUrl = url
open_original.apply(this, arguments);
};
var xhrSend = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.send = function () {
var xhr = this;
if (xhr.addEventListener) {
xhr.removeEventListener("readystatechange", XHROnLoad);
xhr.addEventListener("readystatechange", XHROnLoad, false);
} else {
function readyStateChange() {
if (handler) {
if (handler.handleEvent) {
handler.handleEvent.apply(xhr, arguments);
} else {
handler.apply(xhr, arguments);
}
}
XHROnLoad.apply(xhr, arguments);
setReadyStateChange();
}
function setReadyStateChange() {
setTimeout(function () {
if (xhr.onreadystatechange != readyStateChange) {
handler = xhr.onreadystatechange;
xhr.onreadystatechange = readyStateChange;
}
}, 1);
}
var handler;
setReadyStateChange();
}
xhrSend.apply(xhr, arguments);
};
}
Here is CasperJS code to emit a custom event on load of XHR:
casper.on("page.initialized", function (resource) {
var emitXHRLoad = function (xhr) {
window.callPhantom({eventName: 'xhr.load', eventData: xhr})
}
this.evaluate(addXHRListener, emitXHRLoad);
});
casper.on('remote.callback', function (data) {
casper.emit(data.eventName, data.eventData)
});
Here is a code to listen to "xhr.load" event and get the XHR response body:
casper.on('xhr.load', function (xhr) {
console.log('xhr load', xhr.requestUrl)
console.log('xhr load', xhr.responseText)
});
Additionally, you can also directly download the content and manipulate it later.
Here is the example of the script I am using to retrieve a JSON and save it locally :
var casper = require('casper').create({
pageSettings: {
webSecurityEnabled: false
}
});
var url = 'https://twitter.com/users/username_available?username=whatever';
casper.start('about:blank', function() {
this.download(url, "hop.json");
});
casper.run(function() {
this.echo('Done.').exit();
});
I have the following function in my controller:
$scope.model.listApplicantStatuses = function(){
var statusChoices = jobsService.getApplicantStatuses();
if(statusChoices !== null)
return statusChoices;
//if($scope.model.listApplicantStatuses.inProgress)
// return;
//$scope.model.listApplicantStatuses.inProgress = true;
jobsService.fetchApplicantStatuses().then(function(data){
jobsService.setApplicantStatuses(data.data);
return data.data;
},
function(data){
$scope.layout.showNotification('error', 10 * 1000, 'we are is experiencing technical difficulties Contact Support');
});
}
corresponding service code:
jobsServ.fetchApplicantStatuses = function(){
return $http.get(utils.getBaseUrl() + '/applications/status_choices', utils.getConfig());
}
jobsServ.getApplicantStatuses = function(){
return that.applicantStatusChoices;
},
jobsServ.setApplicantStatuses = function(choices){
that.applicantStatusChoices = choices;
},
Example DOM usage:
<select class="statusSelect" data-ng-model="model.editedApplicant[applicant.id].status" data-ng-show="layout.statusVisible(applicant.id) && !layout.statusLoader[applicant.id]" data-ng-options="key as val for (key, val) in model.listApplicantStatuses()" data-ng-change="model.updateStatus(applicant.id)"></select>
Now, the problem that I am having is that while chrome waits until the first function call completes, and then gives me the data that I get from the AJAX call, and returns undefined in the meanwhile, Firefox calls the function over and over again, creating a whole lot of uneeded XHR requests.
I commented out the code that was setting the inProgress $scope variable, a because it seems to much of a jQurish solution to me, and because that would force me to change my code in many places, and create another Boolean flag such as this per every request to the server.
I would expect the Firefox behaviour. Perhaps you should change the logic as:
// a variable to keep statuses, or a promise
$scope.applicantStatusList = $scope.model.listApplicantStatuses();
Change listApplicantStatuses() to return the promise:
$scope.model.listApplicantStatuses = function() {
var statusChoices = jobsService.getApplicantStatuses();
if(statusChoices !== null)
return statusChoices;
return jobsService.fetchApplicantStatuses().then(function(data){
jobsService.setApplicantStatuses(data.data);
return data.data;
},
function(data){
$scope.layout.showNotification(...);
});
};
And use the applicantStatusList in the <select>:
<select ... data-ng-options="key as val for (key, val) in applicantStatusList" ...></select>
I solved this by allocating a local variable named _root at the function that sends the xhr, setting _root.ingProgress to true once the request is send, and switching it to false once an answer is received.
Works like a charm.
code:
$scope.model.listApplicantStatuses = function(){
var statusChoices = jobsService.getApplicantStatuses();
if(statusChoices !== null)
return statusChoices;
var _root = this;
if(_root.inProgress)
return;
_root.inProgress = true;
jobsService.fetchApplicantStatuses().then(function(data){
jobsService.setApplicantStatuses(data.data);
_root.inProgress = false;
return data.data;
},
function(data){
_root.inProgress = false;
$scope.layout.showNotification('error', 10 * 1000, 'We are currently experiencing technical difficulties Contact Support');
});
}