I'm trying to download PDF's from a site. The links are being generated by JS and I'm able to collect the URL's. How do I tell my headless browser to click on these links and download the file(s) ?
var casper = require('casper').create();
var links;
var fs = require('fs');
function getLinks() {
links = document.querySelectorAll('.skb-dabslink a');
return Array.prototype.map.call(links, function (e) {
return e.getAttribute('href')
});
}
casper.start('https://www.skybriefing.com/portal/dabs');
casper.wait(5000) ; //Waiting for the links to display
casper.then(function () {
links = this.evaluate(getLinks);
});
casper.then(function () {
for(var i in links) {
console.log(links[i]);
}
});
casper.then(function () {
console.log(fs) ;
fs.write("links.txt", links, 'w');
})
casper.run()
Related
I am trying to download a file with CasperJS. If using browser, the download begins when user clicks a button and the response headers look like this:
I have tried these two methods with no luck:
1) https://stackoverflow.com/a/26334034
With this approach, the code block inside the if-statement is never executed. If I remove the condition, a bunch of resources are being saved, such as css files and so on. So the event listener is working, but for some reason not triggering when I use CasperJs click function to click the button that should start the download.
2) https://stackoverflow.com/a/30122021/692695
File.csv is saved but it's the web sites sourcecode, not the csv-file I get when I click the button on the website.
All of my code:
'use strict';
var utils = require('utils');
var casper = require('casper').create({
//verbose: true,
//logLevel: "debug",
clientScripts: ["node_modules/jquery/dist/jquery.min.js"]
});
function writeHtml(filename) {
var fs = require('fs');
var content = casper.getHTML();
fs.write(filename, content, 'w');
}
function getUrl() {
var url;
url = $('.tableofcontent_link:contains("Väestö työmarkkina-aseman, sukupuolen ja iän mukaan")').parent().attr('href');
return url;
}
casper.selectOptionByValue = function(selector, valueToMatch){
this.evaluate(function(selector, valueToMatch){
var select = document.querySelector(selector),
found = false;
Array.prototype.forEach.call(select.children, function(opt, i){
if (!found && opt.value.indexOf(valueToMatch) !== -1) {
select.selectedIndex = i;
found = true;
}
});
// dispatch change event in case there is some kind of validation
var evt = document.createEvent("UIEvents"); // or "HTMLEvents"
evt.initUIEvent("change", true, true);
select.dispatchEvent(evt);
}, selector, valueToMatch);
};
var link;
var url = 'http://pxnet2.stat.fi/PXWeb/pxweb/fi/StatFin/StatFin__tym__tyti/?table';
casper.start(url);
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X)');
casper.then(function () {
this.echo("Now at: " + this.getCurrentUrl());
link = 'http://pxnet2.stat.fi' + casper.evaluate(getUrl);
});
casper.then(function () {
this.open(link);
});
casper.then(function() {
this.echo("Now at: " + this.getCurrentUrl());
// Select all data for each item
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl01_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl02_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl03_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl04_VariableValueSelect_VariableValueSelect_SelectAllButton');
casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_VariableSelectorValueSelectRepeater_ctl05_VariableValueSelect_VariableValueSelect_SelectAllButton');
});
casper.then(function() {
// casper.selectOptionByValue('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_OutputFormats_OutputFormats_OutputFormatDropDownList',
// 'FileTypeExcelX');
// Select the format of the file from the select option list at the bottom
casper.selectOptionByValue('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_OutputFormats_OutputFormats_OutputFormatDropDownList',
'FileTypeCsvWithHeadingAndSemiColon');
});
casper.then(function () {
// just for debugging
writeHtml('page1.html');
});
casper.then(function() {
//casper.click('#ctl00_ContentPlaceHolderMain_VariableSelector1_VariableSelector1_ButtonViewTable');
});
casper.then(function() {
// FIRST ATTEMPT TO LOAD THE DATA TO a file called file.csv
var formData = casper.evaluate(function(){
return $('form#aspnetForm').serialize();
});
//this.echo("Params: " + formData);
var targetFile = 'file.csv';
casper.download(link, targetFile, 'POST', formData);
});
casper.then(function () {
// just for debugging
writeHtml('page2.html');
});
// SECCOND ATTEMPT TO LOAD THE DATA TO a file called stats.csv
casper.on('resource.received', function (resource) {
if ((resource.url.indexOf('tyti_001') !== -1) ) {
this.echo(resource.url);
var file;
file = "stats.csv";
try {
this.echo("Attempting to download file " + file);
var fs = require('fs');
casper.download(resource.url, fs.workingDirectory + '/' + file);
} catch (e) {
this.echo(e);
}
}
});
casper.run(function () {
this.echo('End').exit();
});
And my package.json:
{
"scripts": {
"test": "dotest"
},
"pre-commit": ["test"],
"dependencies": {
"jquery": "^3.3.1"
},
"devDependencies": {
"pre-commit": "^1.2.2"
}
}
Explanation of the code:
First visit this page: http://pxnet2.stat.fi/PXWeb/pxweb/fi/StatFin/StatFin__tym__tyti/statfin_tyti_pxt_001.px/?rxid=bd4d5dc1-358d-407e-ae47-13266b79bfd0
There, dynamically pick a specified link and move there.
Select all data by clicking the V-shapen icon (look at the attached screenshot) and then select the format of the file.
I've faced this issue earlier with all versions except phantomjs 2.0.0. I also tried the solutions you shared from SO a year ago and they didn't work as well.
I'm going to assume you're using a phantomjs version other than 2.0.0.
Here's the link to download it
https://bitbucket.org/ariya/phantomjs/downloads/
With it, you will have access to onFileDownload method which you can override and use like below
casper.page.onFileDownload = function(status){
console.log('onFileDownload(' + status + ')');
return "newfile.csv";
};
onFileDownload will be called whenever a file is downloaded as a result of clicking a button (ajax) or as a result of sequential get/post requests.
All you have to do is, trigger the click on the button/link that will initiate download.
Note : My solution is assuming that everything else (site is not blocking phantomjs and your request headers/cookies are as expected)
I have a link button on the page:
<a id="quote" href="quote.html" target="_blank">Quote</a>
At first I click the link:
casper.thenClick('#quote');
But I can't capture the pop-up window. So I get the url of the link and open it in current window:
var url = '';
function getQuoteStartUrl() {
var link = document.querySelector('a#quote');
return link.getAttribute('href');
}
casper.thenOpen(url, function() {
this.echo(url);
this.echo(this.getTitle());
});
The url is correct but the page is empty. Then I try this:
var url = 'http://quote.html';
casper.thenOpen(url, function() {
this.echo(url);
this.echo(this.getTitle());
});
It works.
Finally I know why it doesn't work: It bind steps before the function call. So I try this:
casper.then(function() {
this.echo(url);
this.thenOpen(url);
this.echo(this.getTitle());
});
It works too.
Could you try out the following?
var url = '';
casper.then(function() {
url = this.getElementAttribute('a#quote', 'href');
});
casper.thenOpen(url, function() {
this.echo(url);
this.echo(this.getTitle());
});
It looks as though you may not be setting the url before trying to load it.
My problem is when I check one of the checkboxs and then I search it, the checkbox will change to uncheck. and I don`t know what's wrong with my livesearch, it is not working.
please check this link to test.
http://jsfiddle.net/v921/KmVHf/4/
is is my javascript
var tr = $(".AvailableGroupLab").clone().html();
function filter(element) {
$('.AvailableGroupLab').html(tr);
var value = $(element).val().toLowerCase();
$(".AvailableGroupLab tr").each(function () {
if ($(this).text().toLowerCase().search(value) == -1){
$(this).remove();
}
});
}
Try
function filter(element) {
var $trs = $('.AvailableGroupLab tr').hide();
var regexp = new RegExp($(element).val(), 'i');
var $valid = $trs.filter(function () {
return regexp.test($(this).children(':nth-child(2)').text())
}).show();
$trs.not($valid).hide()
}
$('input:text').on('keyup change', function () {
filter(this);
})
Demo: Fiddle
I'v got two questions. First. How can I reduce this code?
$('#m').click(function() {
var href = $(this).attr('href');
$('#con').hide().load('inc/main.php').fadeIn('normal');
return false;
});
$('#b').click(function() {
var href = $(this).attr('href');
$('#con').hide().load('inc/blog.php').fadeIn('normal');
return false;
});
$('#p').click(function() {
var href = $(this).attr('href');
$('#con').hide().load('inc/portfolio.php').fadeIn('normal');
return false;
});
$('#l').click(function() {
var href = $(this).attr('href');
$('#con').hide().load('inc/lebenslauf.php').fadeIn('normal');
return false;
});
$('#k').click(function() {
var href = $(this).attr('href');
$('#con').hide().load('inc/kontakt.php').fadeIn('normal');
return false;
});
I'm using a lib called perfect scrollbar. It is included this way:
$(document).ready(function(a){a("#scrollbox").perfectScrollbar({wheelSpeed:20,wheelPropagation:!1})});
When main.php is loaded in with this script, the scrollbar is not there like it should be. It's because the document doesn't refresh like usual. What to I need to write to get it working when loaded in?
Write a function & pass each selector & filepath to this function
$('#m').click(some_function()
{
helperfunction($(this), 'inc/main.php');
});
function helperfunction(selector, phpfilepath) {
var href = selector.attr('href');
$('#con').hide().load(phpfilepath).fadeIn('normal');
return false;
}
I have a wordpress theme in which i have to create a page that has a movie that goes loop.
it has 3 menu points which change the div text without reloading the page.
So far no problem.
Click here – put it in content box 2
But when im on a different page and i click for example the second link, it should go to the video page and change the text to the second one.
How can i do this?
Is there a way to do that with?
url/wordpressname/#1
Found a Solution: i found a solution here: http://www.deluxeblogtips.com/2010/05/how-to-ajaxify-wordpress-theme.html
which i changed to fit my needs:
jQuery(document).ready(function($) {
var $mainContent = $("#text"),
siteUrl = "http://" + top.location.host.toString(),
url = '';
$(document).delegate("a[href^='"+siteUrl+"']:not([href*=/wp-admin/]):not([href*=/wp-login.php]):not([href$=/feed/])", "click", function() {
//location.hash = this.pathname;
//return false;
});
$("#searchform").submit(function(e) {
location.hash = '?s=' + $("#s").val();
e.preventDefault();
});
$(window).bind('hashchange', function(){
url = window.location.hash.substring(1);
if (!url) {
return;
}
if (url=="1") {
$mainContent.html('<p>Text1</>');
}
if (url=="2") {
$mainContent.html('<p>Text2</>');
}
if (url=="3") {
$mainContent.html('<p>Text3</>');
}
if (url=="4") {
$mainContent.html('<p>Text4</>');
}
// url = url + "#content";
//$mainContent.animate({opacity: "0.1"}).html('<p>Please wait...</>').load(url, function() {
//$mainContent.animate({opacity: "1"});
//});
});
$(window).trigger('hashchange');
});