How scrape data from table with CasperJS and PhantomJS - xpath

I am trying to scrape some data for personal use. Here's my code for CasperJS:
var casper = require('casper').create({
verbose: true,
logLevel: 'debug', //debug, info, warning, error
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.94 Safari/537.4'
},
clientScripts: ["vendor/jquery-3.1.0.js", "vendor/lodash.js"]
});
var fs = require('fs');
var url = 'http://24score.com/football/england/premier_league/2015-2016/regular_season/averages/';
var content = [];
function getContent() {
var content = $(x('//*[#id="total2.5"]/table/tbody[1]/tr[1]/td[1]'));
return _.map(content, function(e) {
return e.innerHTML;
})
}
casper.start(url, function() {
});
casper.then(function() {
content = this.evaluate(getContent);
});
casper.run(function() {
this.echo(content).exit();
});
It seems, that I have a problem with HTML element identification, I use here XPath, and nothing work. But when I choose some other element it gives me some output.

Fixed:
__utils__.getElementsByXPath('//*[#id="total2.5"]/table/tbody[1]/tr[1]/td[1]')

Related

Onprepare with jasmine-reporters causes failures while executing with IE

After some time trying to figure out why the beginning of my tests fails (only for IE, with chrome works just fine), I found out that it is caused by the on-prepare function when comes to this part of the code:
jasmine.getEnv().addReporter({
specDone: function (result) {
browser.getCapabilities().then(function (caps)
{
var browserName = caps.get('browserName');
browser.takeScreenshot().then(function (png) {
var stream = fs.createWriteStream('./execution_results/reports/results/screenshots/' + browserName + '-' + result.fullName+ '.png');
stream.write(new Buffer.from(png, 'base64'));
stream.end();
});
});
}
});
If i comment this part, the tests goes smoothly.
My login page is not Angular, so I turn off the sync for the login and turn on again, not sure if this could be related.
How can I force protractor to wait for this part to finish before continuing with the run?
I already tried to add this code in a promise (in conf file) to make protractor wait but even with this i get the jasmine timeout 'TimeoutError: Wait timed out after 20000ms', so I believe I did it wrong.
The error I get is:
Failed: Unable to determine type from: E. Last 1 characters read: E
Build info: version: '3.141.59', revision: 'e82be7d358', time: '2018-11-14T08:25:53'
System info: host: 'xxxxx', ip: 'xx.xx.xx.xx', os.name: 'Windows 10', os.arch: 'amd64', os.version: '10.0', java.version: '10.0.2'
Driver info: driver.version: unknown
Full conf file:
var jasmineReporters = require('./lib/node_modules/jasmine-reporters');
var HTMLReport = require('./lib/node_modules/protractor-html-reporter-2');
var mkdirp = require('./lib/node_modules/mkdirp');
var fs = require('./lib/node_modules/fs-extra');
let date = require('./lib/node_modules/date-and-time');
var environmentToExecute = 'https://myportal'
exports.config = {
seleniumAddress: 'http://'+process.env.AUTOTEST_ADDRESS+'/wd/hub',
framework: 'jasmine2',
specs: ['all my specs'],
suites: {
//All my suites
},
allScriptsTimeout: 20000,
onPrepare: function () {
{
//Here I create the folders (removed to make it shorter)
}
jasmine.getEnv().addReporter(new jasmineReporters.JUnitXmlReporter({
consolidateAll: true,
savePath: './execution_results/reports/xml/',
filePrefix: 'xmlresults'
}));
jasmine.getEnv().addReporter({
specDone: function (result) {
browser.getCapabilities().then(function (caps)
{
var browserName = caps.get('browserName');
browser.takeScreenshot().then(function (png) {
var stream = fs.createWriteStream('./execution_results/reports/results/screenshots/' + browserName + '-' + result.fullName+ '.png');
stream.write(new Buffer.from(png, 'base64'));
stream.end();
});
});
}
});
},
//HTMLReport called once tests are finished
onComplete: function()
{
//I removed this to make it shorter, but basically it is the function
// that comverts the xml in html and build the report
},
jasmineNodeOpts: {
showColors: true, // Use colors in the command line report.
// If true, display spec names.
isVerbose: true,
defaultTimeoutInterval: 100000
},
params: {
//Other files like functions and so on...
},
login:{
//parameters to login
}
},
multiCapabilities:
[
{
'browserName': 'internet explorer',
'version': 11,
},
/*
//chrome, firefox...
*/
],
};//end of Conf.js
Thanks!
I also had issues with asynchronous actions in a Jasmine reporter recently and unfortunately could not figure out how to get them to await promise results properly before moving on. If anyone else has information on this I would greatly appreciate it also.
I did implement a work around using global variables and the AfterAll hook which is able to correctly await promises which may work for you.
I'm assuming that you only need the 'fullname' property of your result so you can try this.
Declare a global properties in your onPrepare and you can assigned this global variable values in your reporter. Assign it the spec fullname value inside of specStarted instead of specDone. Then you can create you screenshot inside you tests afterAll statements which are correctly able to await promise results.
onPrepare: function () {
global.currentlyExecutingSpec = 'tbd';
jasmine.getEnv().addReporter({
specStarted: function (result) {
currentlyExecutingSpec = result.fullName
}
})
jasmine.getEnv().addReporter(new jasmineReporters.JUnitXmlReporter({
consolidateAll: true,
savePath: './execution_results/reports/xml/',
filePrefix: 'xmlresults'
}));
}
Inside your testFiles
afterEach(function(){
browser.getCapabilities().then(function (caps)
{
var browserName = caps.get('browserName');
browser.takeScreenshot().then(function (png) {
var stream =
fs.createWriteStream('./execution_results/reports/results/screenshots/' + browserName + '-' + currentlyExecutingSpec + '.png');
stream.write(new Buffer.from(png, 'base64'));
stream.end();
});
};
});

casperjs unable to download CSV from APEX application

I'm trying to automate download CSV file from APEX application.
var casper = require('casper').create({verbose: true, logLevel: "debug", viewportSize: { width: 1600, height: 400 } });
var url = "https://example.com"
casper.start(url);
casper.then(function () {
this.fill('#wwvFlowForm', {'P101_USERNAME': 'user', 'P101_PASSWORD': 'password'}, false);
});
casper.then(function () {
this.click('#P101_LOGIN');
}).wait(5000).then(function () {
this.echo('downloading file');
this.download('https://example/apex/f?p=1002:173:10072525691961:CSV','report.csv')
});
casper.run();
I am able to login, but when i try to download file i am getting login page html.
I have tried using getBase64 method with same result. Does casper.download using different session?
Screenshots before and after download shows that i am logged in.
Issue was that this apex app is using instance_id in url so working code is:
var casper = require('casper').create({verbose: true, logLevel: "debug",
viewportSize: { width: 1600, height: 400 } });
var url = "https://example.com"
casper.start(url);
casper.then(function () {
this.fill('#wwvFlowForm', {'P101_USERNAME': 'user', 'P101_PASSWORD': 'password'}, false);
});
casper.then(function () {
this.click('#P101_LOGIN');
}).wait(5000).then(function () {
this.echo('downloading file');
var instance_id = this.getCurrentUrl().split(':')[3];
var download_url = url + '/apex/f?p=1002:173:' + instance_id;
this.download('download_url' + ':CSV','report.csv')
});
casper.run();

How can I use NativeScript 3 to capture image and send to a remote server

I'm new to NativeScript and I'm trying to capture an image with the camera module (this works fine), and convert it to base64 (this is not working) and POST to server.
I've googled for days. Any help you can lend would be immensely appreciated.
I've tried this about 16 billion different ways and this is my current code:
viewModel.takePicture1 = function() {
camera.requestPermissions();
var isAvailable = camera.isAvailable();
console.log(isAvailable);
var options = { width: 640, keepAspectRatio: true, saveToGallery: false };
camera.takePicture().then(function (img) {
try{
var imageData = img.toBase64String("jpeg"); // fails here
console.log(imageData);
}catch(err){
console.log("Error: "+err);
}
http.request({
url: "http://[server address]/lab/ns_exp/upload_test.php",
method: "POST",
headers: { "Content-Type": "application/base64" },
content: imageData
}).then(function() {
console.log("Upload successful");
}).catch(function(e) {
console.log("Unsuccessful upload", e);
});
});
}//
Oh, I do want to make clear that I'm not using angular (obviously), so please don't provide an answer that does so. : ) (Vuejs Holdout)
The key here is that base64 needs to know that the image is a JPEG, and what quality the image should be. The code should look like this:
camera.takePicture(cameraOptions)
.then(imageAsset => {
imageSource.fromAsset(imageAsset).then(res => {
myImageSource = res;
var base64 = myImageSource.toBase64String("jpeg", 100);
Just in case someone finds this later and wonders about putting the image (UI) and/or the image (base64) into an observableArray, here is my complete function:
viewModel.takePhoto = function(){
var self = this;
camera.requestPermissions();
var cameraOptions = { width: 640, keepAspectRatio: true, saveToGallery: false };
camera.takePicture(cameraOptions)
.then(imageAsset => {
imageSource.fromAsset(imageAsset).then(res => {
myImageSource = res;
var base64 = myImageSource.toBase64String("jpeg", 100);
self.photoData.push({"data": base64});
var image = new imageModule.Image();
image.src = imageAsset;
self.photoUI.push({"src": image.src});
listView.refresh();
})
}).catch(function (err) {
console.log("Error -> " + err.message);
});
}

CasperJS scraping assistance required

I am trying to go to this page and scrape from each link the 'Title' and 'Authors' for each thesis. So far I have this (my issues that I require assistance with are in the comments within code):
var utils = require('utils');
var casper = require('casper').create({
verbose: true,
logLevel: 'error',
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
},
clientScripts: ['lib/jquery.min.js']
});
var i = 0;
var links = [];
var thesis_data = [];
function getThesisLinks () {
var links = document.querySelectorAll('');//Not sure what should go in ('')
return [].map.call(links, function(link) {
return link.getAttribute('href');
});
}
function loopThroughThesisLinks() {
// Recurses until all links are processed
if (i < links.length) {
this.echo('[LINK #' + i + '] ' + links[i]);
getThesisData.call(this, links[i]);
i++;
this.run(loopThroughThesisLinks);
} else {
utils.dump(thesis_data);
this.exit();
}
}
function getThesisData(link) {
this.start(link, function() {
// Get title of thesis - not sure what element to insert for this.fetchText
var title = this.fetchText('');
// Get name of authors - not sure what element to insert for this.fetchText
var author = this.fetchText('');
// Add the title & author data to the thesis_data array
var data = {
title: title,
author: author
};
thesis_data.push(data);
});
}
casper.start('http://ses.library.usyd.edu.au/handle/2123/345/browse?type=dateissued&sort_by=2&order=DESC&rpp=1495&etal=0&submit_browse=Update', function() {
links = this.evaluate(getThesisLinks);
// Convert relative links to absolute URLs
for (var i = 0; i < links.length; i++) {
links[i] = "http://ses.library.usyd.edu.au/handle/" + links[i];
}
utils.dump(links);
});
casper.run(loopThroughThesisLinks);
Any assistance would be appreciated.
This is a simple CSS selector for all links:
var links = document.querySelectorAll(
'table.misctable > tbody > tr > td:nth-of-type(3) > a');
You can also use XPath like this:
var x = require('casper').selectXPath; // goes to the beginning of the file
var title = this.fetchText(x('//table//tr/td[1][contains(text(),"Title:")]/../td[2]'));
I think you can figure out the authors-query. I probably would have done the crawling differently using casper.thenOpen in a loop, because this is rather hard to read with the additional start and run calls being in different functions.
With casper.thenOpen it would look like this:
var x = require('casper').selectXPath; // goes to the beginning of the file
function loopThroughThesisLinks() {
// Recurses until all links are processed
if (i < links.length) {
this.echo('[LINK #' + i + '] ' + links[i]);
getThesisData.call(this, links[i]);
i++;
this.then(loopThroughThesisLinks);
} else {
utils.dump(thesis_data);
this.exit();
}
}
function getThesisData(link) {
this.thenOpen(link, function() {
var title = this.fetchText(x('//table//tr/td[1][contains(text(),"Title:")]/../td[2]'));
var author = this.fetchText(x('//table//tr/td[1][contains(text(),"Authors:")]/../td[2]'));
// Add the title & author data to the thesis_data array
var data = {
title: title,
author: author
};
thesis_data.push(data);
});
}

CasperJS with JSON.parse

When I run the following code, I get the following:
TypeError: 'undefined' is not a function (evaluating 'this.emit('error', error)')
I asked this earlier, but im rephrasing. It appears that the JSON object is undefined in the casper function.
If I do JSON.parse() outside, then its not undefined.
Thoughts on how to get this working?
var casper = require("casper").create({
verbose: true,
logLevel: 'debug',
});
var site = 'http://my.internalsite.com';
casper.start(site);
casper.run(function() {
var currentURL = this.getCurrentUrl();
this.echo('URL: ' + currentURL);
var json_string = JSON.parse(this.getPageContent());
this.echo(json_string);
this.exit();
});
This could possibly be due to this.exit() getting called before JSON.parse(this.getPageContent())
You could try the following:
var casper = require("casper").create({
verbose: true,
logLevel: 'debug',
});
var site = 'http://xkcd.com/info.0.json';
casper.start(site);
casper.then(function() {
var currentURL = this.getCurrentUrl();
this.echo('URL: ' + currentURL);
var json_string = JSON.parse(this.getPageContent());
require('utils').dump(json_string);
});
casper.run();

Resources