How can I catch and process the data from the XHR responses using casperjs? - ajax

The data on the webpage is displayed dynamically and it seems that checking for every change in the html and extracting the data is a very daunting task and also needs me to use very unreliable XPaths. So I would want to be able to extract the data from the XHR packets.
I hope to be able to extract information from XHR packets as well as generate 'XHR' packets to be sent to the server.
The extracting information part is more important for me because the sending of information can be handled easily by automatically triggering html elements using casperjs.
I'm attaching a screenshot of what I mean.
The text in the response tab is the data I need to process afterwards. (This XHR response has been received from the server.)

This is not easily possible, because the resource.received event handler only provides meta data like url, headers or status, but not the actual data. The underlying phantomjs event handler acts the same way.
Stateless AJAX Request
If the ajax call is stateless, you may repeat the request
casper.on("resource.received", function(resource){
// somehow identify this request, here: if it contains ".json"
// it also also only does something when the stage is "end" otherwise this would be executed two times
if (resource.url.indexOf(".json") != -1 && resource.stage == "end") {
var data = casper.evaluate(function(url){
// synchronous GET request
return __utils__.sendAJAX(url, "GET");
}, resource.url);
// do something with data, you might need to JSON.parse(data)
}
});
casper.start(url); // your script
You may want to add the event listener to resource.requested. That way you don't need to way for the call to complete.
You can also do this right inside of the control flow like this (source: A: CasperJS waitForResource: how to get the resource i've waited for):
casper.start(url);
var res, resData;
casper.waitForResource(function check(resource){
res = resource;
return resource.url.indexOf(".json") != -1;
}, function then(){
resData = casper.evaluate(function(url){
// synchronous GET request
return __utils__.sendAJAX(url, "GET");
}, res.url);
// do something with the data here or in a later step
});
casper.run();
Stateful AJAX Request
If it is not stateless, you would need to replace the implementation of XMLHttpRequest. You will need to inject your own implementation of the onreadystatechange handler, collect the information in the page window object and later collect it in another evaluate call.
You may want to look at the XHR faker in sinon.js or use the following complete proxy for XMLHttpRequest (I modeled it after method 3 from How can I create a XMLHttpRequest wrapper/proxy?):
function replaceXHR(){
(function(window, debug){
function args(a){
var s = "";
for(var i = 0; i < a.length; i++) {
s += "\t\n[" + i + "] => " + a[i];
}
return s;
}
var _XMLHttpRequest = window.XMLHttpRequest;
window.XMLHttpRequest = function() {
this.xhr = new _XMLHttpRequest();
}
// proxy ALL methods/properties
var methods = [
"open",
"abort",
"setRequestHeader",
"send",
"addEventListener",
"removeEventListener",
"getResponseHeader",
"getAllResponseHeaders",
"dispatchEvent",
"overrideMimeType"
];
methods.forEach(function(method){
window.XMLHttpRequest.prototype[method] = function() {
if (debug) console.log("ARGUMENTS", method, args(arguments));
if (method == "open") {
this._url = arguments[1];
}
return this.xhr[method].apply(this.xhr, arguments);
}
});
// proxy change event handler
Object.defineProperty(window.XMLHttpRequest.prototype, "onreadystatechange", {
get: function(){
// this will probably never called
return this.xhr.onreadystatechange;
},
set: function(onreadystatechange){
var that = this.xhr;
var realThis = this;
that.onreadystatechange = function(){
// request is fully loaded
if (that.readyState == 4) {
if (debug) console.log("RESPONSE RECEIVED:", typeof that.responseText == "string" ? that.responseText.length : "none");
// there is a response and filter execution based on url
if (that.responseText && realThis._url.indexOf("whatever") != -1) {
window.myAwesomeResponse = that.responseText;
}
}
onreadystatechange.call(that);
};
}
});
var otherscalars = [
"onabort",
"onerror",
"onload",
"onloadstart",
"onloadend",
"onprogress",
"readyState",
"responseText",
"responseType",
"responseXML",
"status",
"statusText",
"upload",
"withCredentials",
"DONE",
"UNSENT",
"HEADERS_RECEIVED",
"LOADING",
"OPENED"
];
otherscalars.forEach(function(scalar){
Object.defineProperty(window.XMLHttpRequest.prototype, scalar, {
get: function(){
return this.xhr[scalar];
},
set: function(obj){
this.xhr[scalar] = obj;
}
});
});
})(window, false);
}
If you want to capture the AJAX calls from the very beginning, you need to add this to one of the first event handlers
casper.on("page.initialized", function(resource){
this.evaluate(replaceXHR);
});
or evaluate(replaceXHR) when you need it.
The control flow would look like this:
function replaceXHR(){ /* from above*/ }
casper.start(yourUrl, function(){
this.evaluate(replaceXHR);
});
function getAwesomeResponse(){
return this.evaluate(function(){
return window.myAwesomeResponse;
});
}
// stops waiting if window.myAwesomeResponse is something that evaluates to true
casper.waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
});
casper.run();
As described above, I create a proxy for XMLHttpRequest so that every time it is used on the page, I can do something with it. The page that you scrape uses the xhr.onreadystatechange callback to receive data. The proxying is done by defining a specific setter function which writes the received data to window.myAwesomeResponse in the page context. The only thing you need to do is retrieving this text.
JSONP Request
Writing a proxy for JSONP is even easier, if you know the prefix (the function to call with the loaded JSON e.g. insert({"data":["Some", "JSON", "here"],"id":"asdasda")). You can overwrite insert in the page context
after the page is loaded
casper.start(url).then(function(){
this.evaluate(function(){
var oldInsert = insert;
insert = function(json){
window.myAwesomeResponse = json;
oldInsert.apply(window, arguments);
};
});
}).waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
}).run();
or before the request is received (if the function is registered just before the request is invoked)
casper.on("resource.requested", function(resource){
// filter on the correct call
if (resource.url.indexOf(".jsonp") != -1) {
this.evaluate(function(){
var oldInsert = insert;
insert = function(json){
window.myAwesomeResponse = json;
oldInsert.apply(window, arguments);
};
});
}
}).run();
casper.start(url).waitFor(getAwesomeResponse, function then(){
var data = JSON.parse(getAwesomeResponse());
// Do something with data
}).run();

I may be late into the party, but the answer may help someone like me who would fall into this problem later in future.
I had to start with PhantomJS, then moved to CasperJS but finally settled with SlimerJS. Slimer is based on Phantom, is compatible with Casper, and can send you back the response body using the same onResponseReceived method, in "response.body" part.
Reference: https://docs.slimerjs.org/current/api/webpage.html#webpage-onresourcereceived

#Artjom's answer's doesn't work for me in the recent Chrome and CasperJS versions.
Based on #Artjom's answer and based on gilly3's answer on how to replace XMLHttpRequest, I have composed a new solution that should work in most/all versions of the different browsers. Works for me.
SlimerJS cannot work on newer version of FireFox, therefore no good for me.
Here is the the generic code to add a listner to load of XHR (not dependent on CasperJS):
var addXHRListener = function (XHROnStateChange) {
var XHROnLoad = function () {
if (this.readyState == 4) {
XHROnStateChange(this)
}
}
var open_original = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function (method, url, async, unk1, unk2) {
this.requestUrl = url
open_original.apply(this, arguments);
};
var xhrSend = XMLHttpRequest.prototype.send;
XMLHttpRequest.prototype.send = function () {
var xhr = this;
if (xhr.addEventListener) {
xhr.removeEventListener("readystatechange", XHROnLoad);
xhr.addEventListener("readystatechange", XHROnLoad, false);
} else {
function readyStateChange() {
if (handler) {
if (handler.handleEvent) {
handler.handleEvent.apply(xhr, arguments);
} else {
handler.apply(xhr, arguments);
}
}
XHROnLoad.apply(xhr, arguments);
setReadyStateChange();
}
function setReadyStateChange() {
setTimeout(function () {
if (xhr.onreadystatechange != readyStateChange) {
handler = xhr.onreadystatechange;
xhr.onreadystatechange = readyStateChange;
}
}, 1);
}
var handler;
setReadyStateChange();
}
xhrSend.apply(xhr, arguments);
};
}
Here is CasperJS code to emit a custom event on load of XHR:
casper.on("page.initialized", function (resource) {
var emitXHRLoad = function (xhr) {
window.callPhantom({eventName: 'xhr.load', eventData: xhr})
}
this.evaluate(addXHRListener, emitXHRLoad);
});
casper.on('remote.callback', function (data) {
casper.emit(data.eventName, data.eventData)
});
Here is a code to listen to "xhr.load" event and get the XHR response body:
casper.on('xhr.load', function (xhr) {
console.log('xhr load', xhr.requestUrl)
console.log('xhr load', xhr.responseText)
});

Additionally, you can also directly download the content and manipulate it later.
Here is the example of the script I am using to retrieve a JSON and save it locally :
var casper = require('casper').create({
pageSettings: {
webSecurityEnabled: false
}
});
var url = 'https://twitter.com/users/username_available?username=whatever';
casper.start('about:blank', function() {
this.download(url, "hop.json");
});
casper.run(function() {
this.echo('Done.').exit();
});

Related

Firefox sending multiple XHR requests when binding a controller function to the DOM

I have the following function in my controller:
$scope.model.listApplicantStatuses = function(){
var statusChoices = jobsService.getApplicantStatuses();
if(statusChoices !== null)
return statusChoices;
//if($scope.model.listApplicantStatuses.inProgress)
// return;
//$scope.model.listApplicantStatuses.inProgress = true;
jobsService.fetchApplicantStatuses().then(function(data){
jobsService.setApplicantStatuses(data.data);
return data.data;
},
function(data){
$scope.layout.showNotification('error', 10 * 1000, 'we are is experiencing technical difficulties Contact Support');
});
}
corresponding service code:
jobsServ.fetchApplicantStatuses = function(){
return $http.get(utils.getBaseUrl() + '/applications/status_choices', utils.getConfig());
}
jobsServ.getApplicantStatuses = function(){
return that.applicantStatusChoices;
},
jobsServ.setApplicantStatuses = function(choices){
that.applicantStatusChoices = choices;
},
Example DOM usage:
<select class="statusSelect" data-ng-model="model.editedApplicant[applicant.id].status" data-ng-show="layout.statusVisible(applicant.id) && !layout.statusLoader[applicant.id]" data-ng-options="key as val for (key, val) in model.listApplicantStatuses()" data-ng-change="model.updateStatus(applicant.id)"></select>
Now, the problem that I am having is that while chrome waits until the first function call completes, and then gives me the data that I get from the AJAX call, and returns undefined in the meanwhile, Firefox calls the function over and over again, creating a whole lot of uneeded XHR requests.
I commented out the code that was setting the inProgress $scope variable, a because it seems to much of a jQurish solution to me, and because that would force me to change my code in many places, and create another Boolean flag such as this per every request to the server.
I would expect the Firefox behaviour. Perhaps you should change the logic as:
// a variable to keep statuses, or a promise
$scope.applicantStatusList = $scope.model.listApplicantStatuses();
Change listApplicantStatuses() to return the promise:
$scope.model.listApplicantStatuses = function() {
var statusChoices = jobsService.getApplicantStatuses();
if(statusChoices !== null)
return statusChoices;
return jobsService.fetchApplicantStatuses().then(function(data){
jobsService.setApplicantStatuses(data.data);
return data.data;
},
function(data){
$scope.layout.showNotification(...);
});
};
And use the applicantStatusList in the <select>:
<select ... data-ng-options="key as val for (key, val) in applicantStatusList" ...></select>
I solved this by allocating a local variable named _root at the function that sends the xhr, setting _root.ingProgress to true once the request is send, and switching it to false once an answer is received.
Works like a charm.
code:
$scope.model.listApplicantStatuses = function(){
var statusChoices = jobsService.getApplicantStatuses();
if(statusChoices !== null)
return statusChoices;
var _root = this;
if(_root.inProgress)
return;
_root.inProgress = true;
jobsService.fetchApplicantStatuses().then(function(data){
jobsService.setApplicantStatuses(data.data);
_root.inProgress = false;
return data.data;
},
function(data){
_root.inProgress = false;
$scope.layout.showNotification('error', 10 * 1000, 'We are currently experiencing technical difficulties Contact Support');
});
}

AJAX - Return responseText

I've seen the myriad threads sprawled across the Internet about the following similar code in an AJAX request returning undefined:
AJAX.onreadystatechange = function() {
if(AJAX.readyState == 4) {
if(AJAX.status == 200) {
var response = AJAX.responseText;
return response;
}
else {
window.alert('Error: ' + AJAX.status);
return false;
}
}
};
I know that I'm supposed to "do something with" responseText like writing it to the HTML. The problem: I don't have that luxury. This bit of code is intended to be inside of a generic method for running fast AJAX requests that way all the code for making an AJAX request doesn't have to written out over and over again (~40×) with the chance of a minor problem here or there that breaks the application.
My method HAS to explicitly return responseText "or else." No writing to HTML. How would I do this? Also, I'd appreciate a lack of plugs for JQuery.
What I'm looking for:
function doAjax(param) {
// set up XmlHttpRequest
AJAX.onreadystatechange = function() {
if(AJAX.readyState == 4) {
if(AJAX.status == 200) {
var response = AJAX.responseText;
return response;
}
else {
window.alert('Error: ' + AJAX.status);
return false;
}
}
};
// send data
}
...
function doSpecificAjax() {
var param = array();
var result = doAjax(param);
// manipulate result
}
Doing a little research I came across this SOF post:
Ajax responseText comes back as undefined
Based on that post, it looks like you may want to implement your ajax method like this:
function doRequest(url, callback) {
var xmlhttp = ....; // create a new request here
xmlhttp.open("GET", url, true); // for async
xmlhttp.onreadystatechange=function() {
if (xmlhttp.readyState==4) {
if (xmlhttp.status == 200) {
// pass the response to the callback function
callback(null, xmlhttp.responseText);
} else {
// pass the error to the callback function
callback(xmlhttp.statusText);
}
}
}
xmlhttp.send(null);
}
Then, you can call that method like this...
doRequest('http://mysite.com/foo', function(err, response) { // pass an anonymous function
if (err) {
return "";
} else {
return response;
}
});
This should return the responseText accurately. Let me know if this doesn't give you back the correct results.

AJAX Ready State stuck on 1

Hi I can see this has been discussed but after perusing the issues/answers I still don't seem to be able to get even this simple AJAX call to bump out of ready state 1.
Here's the Javascript I have:
<script language="javascript" type="text/javascript">
var request;
function createRequest()
{
try
{
request = new XMLHttpRequest();
} catch (trymicrosoft) {
try {
request = new ActiveXObject("Msxml2.XMLHTTP");
} catch (othermicrosoft) {
try {
request = new ActiveXObject("Microsoft.XMLHTTP");
} catch (failed) {
request = false;
}
}
}
if (!request)
alert("Error initializing XMLHttpRequest!");
}
function loadClassesBySchool()
{
//get require web form pieces for this call
createRequest(); // function to get xmlhttp object
var schoolId = getDDLSelectionValue("ddlSchools");
var grade = getDDLSelectionValue("ddlGrades");
var url = "courses.php?grades=" + escape(grade) + "&schoolId=" + escape(schoolId);
//open server connection
request.open("GET", url, true);
//Setup callback function for server response
//+++read on overflow that some fixed the issue with an onload event this simply had
//+++the handle spitback 2 readystate = 1 alerts
request.onload = updateCourses();
request.onreadystatechanged = updateCourses();
//send the result
request.send();
}
function updateCourses()
{
alert('ready state changed' + request.readyState);
}
function getDDLSelectionValue(ddlID)
{
return document.getElementById(ddlID).options[document.getElementById(ddlID).selectedIndex].value;
}
</script>
The PHP is HERE just a simple print which if i navigate to in the browser (IE/Chrome) loads fine:
<?php
print "test";
?>
I'm quite new at this but seems like I can't get the most bare bones AJAX calls to work, any help as to how work past this would be greatly appreciated.
All I get out of my callback function 'updateCourses' is a 1...
Well after more digging I actually gave up and switched over to jQuery which should for all intents and purposes be doing the EXACT same thing except for the fact that jQuery works... I was just less comfortable with it but so be it.
Here's the jQuery to accomplish the same:
function loadCoursesBySchool(){
var grades = getDDLSelectionValue("ddlGrades");
var schoolId = getDDLSelectionValue("ddlSchools");
jQuery.ajax({
url: "courses.php?grades=" + grades + "&schoolId=" + schoolId,
success: function (data) {
courseDisplay(data);
}
});
}
function courseDisplay(response)
{
//check if anything was setn back!?
if(!response)
{
$("#ddlCourses").html("");
//do nothing?
}
else
{
//empty DLL
$("#ddlCourses").html("");
//add entries
$(response).appendTo("#ddlCourses");
}
}

How to write javascript in client side to receive and parse `chunked` response in time?

I'm using play framework, to generate chunked response. The code is:
class Test extends Controller {
public static void chunk() throws InterruptedException {
for (int i = 0; i < 10; i++) {
String data = repeat("" + i, 1000);
response.writeChunk(data);
Thread.sleep(1000);
}
}
}
When I use browser to visit http://localhost:9000/test/chunk, I can see the data displayed increased every second. But, when I write a javascript function to receive and handle the data, found it will block until all data received.
The code is:
$(function(){
$.ajax(
"/test/chunked",
{
"success": function(data, textStatus, xhr) {
alert(textStatus);
}
}
);
});
I can see a message box popped up after 10s, when all the data received.
How to get the stream and handle the data in time?
jQuery doesn't support that, but you can do that with plain XHR:
var xhr = new XMLHttpRequest()
xhr.open("GET", "/test/chunked", true)
xhr.onprogress = function () {
console.log("PROGRESS:", xhr.responseText)
}
xhr.send()
This works in all modern browsers, including IE 10. W3C specification here.
The downside here is that xhr.responseText contains an accumulated response. You can use substring on it, but a better idea is to use the responseType attribute and use slice on an ArrayBuffer.
Soon we should be able to use ReadableStream API (MDN docs here). The code below seems to be working with Chrome Version 62.0.3202.94:
fetch(url).then(function (response) {
let reader = response.body.getReader();
let decoder = new TextDecoder();
return readData();
function readData() {
return reader.read().then(function ({value, done}) {
let newData = decoder.decode(value, {stream: !done});
console.log(newData);
if (done) {
console.log('Stream complete');
return;
}
return readData();
});
}
});
the success event would fire when the complete data transmission is done and the connection closed with a 200 response code. I believe you should be able to implement a native onreadystatechanged event and see the data packets as they come.

How to cancel/abort jQuery AJAX request?

I've an AJAX request which will be made every 5 seconds. But the problem is before the AJAX request if the previous request is not completed I've to abort that request and make a new request.
My code is something like this, how to resolve this issue?
$(document).ready(
var fn = function(){
$.ajax({
url: 'ajax/progress.ftl',
success: function(data) {
//do something
}
});
};
var interval = setInterval(fn, 500);
);
The jquery ajax method returns a XMLHttpRequest object. You can use this object to cancel the request.
The XMLHttpRequest has a abort method, which cancels the request, but if the request has already been sent to the server then the server will process the request even if we abort the request but the client will not wait for/handle the response.
The xhr object also contains a readyState which contains the state of the request(UNSENT-0, OPENED-1, HEADERS_RECEIVED-2, LOADING-3 and DONE-4). we can use this to check whether the previous request was completed.
$(document).ready(
var xhr;
var fn = function(){
if(xhr && xhr.readyState != 4){
xhr.abort();
}
xhr = $.ajax({
url: 'ajax/progress.ftl',
success: function(data) {
//do something
}
});
};
var interval = setInterval(fn, 500);
);
When you make a request to a server, have it check to see if a progress is not null (or fetching that data) first. If it is fetching data, abort the previous request and initiate the new one.
var progress = null
function fn () {
if (progress) {
progress.abort();
}
progress = $.ajax('ajax/progress.ftl', {
success: function(data) {
//do something
progress = null;
}
});
}
I know this might be a little late but i experience similar issues where calling the abort method didnt really aborted the request. instead the browser was still waiting for a response that it never uses.
this code resolved that issue.
try {
xhr.onreadystatechange = null;
xhr.abort();
} catch (e) {}
Why should you abort the request?
If each request takes more than five seconds, what will happen?
You shouldn't abort the request if the parameter passing with the request is not changing.
eg:- the request is for retrieving the notification data.
In such situations, The nice approach is that set a new request only after completing the previous Ajax request.
$(document).ready(
var fn = function(){
$.ajax({
url: 'ajax/progress.ftl',
success: function(data) {
//do something
},
complete: function(){setTimeout(fn, 500);}
});
};
var interval = setTimeout(fn, 500);
);
jQuery:
Use this as a starting point - as inspiration.
I solved it like this:
(this is not a perfect solution, it just aborts the last instance and is WIP code)
var singleAjax = function singleAjax_constructor(url, params) {
// remember last jQuery's get request
if (this.lastInstance) {
this.lastInstance.abort(); // triggers .always() and .fail()
this.lastInstance = false;
}
// how to use Deferred : http://api.jquery.com/category/deferred-object/
var $def = new $.Deferred();
// pass the deferrer's request handlers into the get response handlers
this.lastInstance = $.get(url, params)
.fail($def.reject) // triggers .always() and .fail()
.success($def.resolve); // triggers .always() and .done()
// return the deferrer's "control object", the promise object
return $def.promise();
}
// initiate first call
singleAjax('/ajax.php', {a: 1, b: 2})
.always(function(a,b,c) {console && console.log(a,b,c);});
// second call kills first one
singleAjax('/ajax.php', {a: 1, b: 2})
.always(function(a,b,c) {console && console.log(a,b,c);});
// here you might use .always() .fail() .success() etc.
You can use jquery-validate.js . The following is the code snippet from jquery-validate.js.
// ajax mode: abort
// usage: $.ajax({ mode: "abort"[, port: "uniqueport"]});
// if mode:"abort" is used, the previous request on that port (port can be undefined) is aborted via XMLHttpRequest.abort()
var pendingRequests = {},
ajax;
// Use a prefilter if available (1.5+)
if ( $.ajaxPrefilter ) {
$.ajaxPrefilter(function( settings, _, xhr ) {
var port = settings.port;
if ( settings.mode === "abort" ) {
if ( pendingRequests[port] ) {
pendingRequests[port].abort();
}
pendingRequests[port] = xhr;
}
});
} else {
// Proxy ajax
ajax = $.ajax;
$.ajax = function( settings ) {
var mode = ( "mode" in settings ? settings : $.ajaxSettings ).mode,
port = ( "port" in settings ? settings : $.ajaxSettings ).port;
if ( mode === "abort" ) {
if ( pendingRequests[port] ) {
pendingRequests[port].abort();
}
pendingRequests[port] = ajax.apply(this, arguments);
return pendingRequests[port];
}
return ajax.apply(this, arguments);
};
}
So that you just only need to set the parameter mode to abort when you are making ajax request.
Ref:https://cdnjs.cloudflare.com/ajax/libs/jquery-validate/1.14.0/jquery.validate.js
Create a function to call your API. Within this function we define request callApiRequest = $.get(... - even though this is a definition of a variable, the request is called immediately, but now we have the request defined as a variable. Before the request is called, we check if our variable is defined typeof(callApiRequest) != 'undefined' and also if it is pending suggestCategoryRequest.state() == 'pending' - if both are true, we .abort() the request which will prevent the success callback from running.
// We need to wrap the call in a function
callApi = function () {
//check if request is defined, and status pending
if (typeof(callApiRequest) != 'undefined'
&& suggestCategoryRequest.state() == 'pending') {
//abort request
callApiRequest.abort()
}
//define and make request
callApiRequest = $.get("https://example.com", function (data) {
data = JSON.parse(data); //optional (for JSON data format)
//success callback
});
}
Your server/API might not support aborting the request (what if API executed some code already?), but the javascript callback will not fire. This is useful, when for example you are providing input suggestions to a user, such as hashtags input.
You can further extend this function by adding definition of error callback - what should happen if request was aborted.
Common use-case for this snippet would be a text input that fires on keypress event. You can use a timeout, to prevent sending (some of) requests that you will have to cancel .abort().
You should also check for readyState 0. Because when you use xhr.abort() this function set readyState to 0 in this object, and your if check will be always true - readyState !=4
$(document).ready(
var xhr;
var fn = function(){
if(xhr && xhr.readyState != 4 && xhr.readyState != 0){
xhr.abort();
}
xhr = $.ajax({
url: 'ajax/progress.ftl',
success: function(data) {
//do something
}
});
};
var interval = setInterval(fn, 500);
);

Resources