CasperJS scraping assistance required - casperjs

I am trying to go to this page and scrape from each link the 'Title' and 'Authors' for each thesis. So far I have this (my issues that I require assistance with are in the comments within code):
var utils = require('utils');
var casper = require('casper').create({
verbose: true,
logLevel: 'error',
pageSettings: {
loadImages: false,
loadPlugins: false,
userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
},
clientScripts: ['lib/jquery.min.js']
});
var i = 0;
var links = [];
var thesis_data = [];
function getThesisLinks () {
var links = document.querySelectorAll('');//Not sure what should go in ('')
return [].map.call(links, function(link) {
return link.getAttribute('href');
});
}
function loopThroughThesisLinks() {
// Recurses until all links are processed
if (i < links.length) {
this.echo('[LINK #' + i + '] ' + links[i]);
getThesisData.call(this, links[i]);
i++;
this.run(loopThroughThesisLinks);
} else {
utils.dump(thesis_data);
this.exit();
}
}
function getThesisData(link) {
this.start(link, function() {
// Get title of thesis - not sure what element to insert for this.fetchText
var title = this.fetchText('');
// Get name of authors - not sure what element to insert for this.fetchText
var author = this.fetchText('');
// Add the title & author data to the thesis_data array
var data = {
title: title,
author: author
};
thesis_data.push(data);
});
}
casper.start('http://ses.library.usyd.edu.au/handle/2123/345/browse?type=dateissued&sort_by=2&order=DESC&rpp=1495&etal=0&submit_browse=Update', function() {
links = this.evaluate(getThesisLinks);
// Convert relative links to absolute URLs
for (var i = 0; i < links.length; i++) {
links[i] = "http://ses.library.usyd.edu.au/handle/" + links[i];
}
utils.dump(links);
});
casper.run(loopThroughThesisLinks);
Any assistance would be appreciated.

This is a simple CSS selector for all links:
var links = document.querySelectorAll(
'table.misctable > tbody > tr > td:nth-of-type(3) > a');
You can also use XPath like this:
var x = require('casper').selectXPath; // goes to the beginning of the file
var title = this.fetchText(x('//table//tr/td[1][contains(text(),"Title:")]/../td[2]'));
I think you can figure out the authors-query. I probably would have done the crawling differently using casper.thenOpen in a loop, because this is rather hard to read with the additional start and run calls being in different functions.
With casper.thenOpen it would look like this:
var x = require('casper').selectXPath; // goes to the beginning of the file
function loopThroughThesisLinks() {
// Recurses until all links are processed
if (i < links.length) {
this.echo('[LINK #' + i + '] ' + links[i]);
getThesisData.call(this, links[i]);
i++;
this.then(loopThroughThesisLinks);
} else {
utils.dump(thesis_data);
this.exit();
}
}
function getThesisData(link) {
this.thenOpen(link, function() {
var title = this.fetchText(x('//table//tr/td[1][contains(text(),"Title:")]/../td[2]'));
var author = this.fetchText(x('//table//tr/td[1][contains(text(),"Authors:")]/../td[2]'));
// Add the title & author data to the thesis_data array
var data = {
title: title,
author: author
};
thesis_data.push(data);
});
}

Related

JSON and HTML troubles

i'm currently having some troubles with displaying the information from a JSON file to html. I'm currently using AJAX to get the data from the JSON file.
The main problem that i'm facing is with displaying all the data into one div.
function Test(){
request.open('GET','/json/anime.json');
request.onreadystatechange = function() {
if((request.readyState===4) && (request.status===200)) {
var json = JSON.parse(request.responseText);
for(var title in json.Title ) {
for(var ep in json.Episode) {
for(var img in json.Image) {
for(var link in json.Link) {
_title = json.Image[title];
episode = json.Image[ep];
image = json.Image[img];
_link = json.Image[link];
var div = document.createElement('div');
div.className = 'card card-inverse';
div.innerHTML = `<img class="card-img img-fluid img-responsive" src="${image}" data-toggle="modal">`;
document.getElementById('anime').appendChild(div);
}
}
}
}
}
}
request.send();
}
The JSON file looks like this...
{
Episode: [
...
],
Image: [
...
],
Link: [
...
],
Title: [
...
]
}
The way above is working if i'm only looping over one of the four arrays, however crashes chrome when trying to do the above task.
Any help would be appreciated.
Thanks
I decided to take a different approach moving away from ajax a little and moving more towards jquery. Below is what has so far worked out for me. Thanks to the people who commented above, really helped me think of ways to tackling it.
function DisplayCards() {
var i = 0;
$.getJSON('/json/anime.json', function(data) {
$.each(data, function(index) {
for(key in data[index]){
e = data.Image.length;
console.log(e);
if(i < e)
{
image = data.Image[key];
link = data.Link[key];
console.log(i += 1);
var div = document.createElement('div');
div.className = 'card card-inverse';
div.innerHTML = `<img class="card-img img-fluid img-responsive" src="${image}" data-toggle="modal">`;
var p = document.createElement('p');
p.innerHTML = 'id="wrapper" class="text"';
document.getElementById('anime').appendChild(div);
}
}
console.log(data);
});
});
}

How do I add html tags in jquery plugins?

I am doing the live search using the jquery plugins. When I tried to search that doesn't exist, it only shows the table. I would like to put some message "No result found" if it doesnt exist. The question is how can I add message "No result found"
Note: In my codes I add some validation, the user need input minimum of 3 characters
/**
**options to have following keys:
**searchText: this should hold the value of search text
**searchPlaceHolder: this should hold the value of search input box placeholder
**/
(function($)
{
$.fn.tableSearch = function(options)
{
if(!$(this).is('table'))
{
return;
}
var tableObj = $(this),
searchText = (options.searchText)?options.searchText:'Search: ',
searchPlaceHolder = (options.searchPlaceHolder)?options.searchPlaceHolder:'',
divObj = $('<div style="font-size:20px;">'+searchText+'</div><br /><br />'),
inputObj = $('<input style="min-width:25%;max-width:50%;margin-left:1%" type="text" placeholder="'+searchPlaceHolder+'" />'),
caseSensitive = (options.caseSensitive===true)?true:false,
searchFieldVal = '',
pattern = '';
inputObj.off('keyup').on('keyup', function(){
searchFieldVal = $(this).val();
if(searchFieldVal.length == 0)
{
tableObj.find('tbody tr').show();
}
else if(searchFieldVal.length >= 3)
{
pattern = (caseSensitive)?RegExp(searchFieldVal):RegExp(searchFieldVal, 'i');
tableObj.find('tbody tr').hide().each(function()
{
var currentRow = $(this);
currentRow.find('td').each(function()
{
var result = "No result";
$("tbody tr").append(result);
if(pattern.test($(this).html()))
{
currentRow.show();
return false;
}
});
});
}
});
tableObj.before(divObj.append(inputObj));
return tableObj;
}
}(jQuery));
Here into JQ plugin(Posted at your question), the handler for empty result is exist. See piece of code from it.
else if(searchFieldVal.length >= 3)
{
pattern = (caseSensitive)?RegExp(searchFieldVal):RegExp(searchFieldVal, 'i');
tableObj.find('tbody tr').hide().each(function()
{
var currentRow = $(this);
currentRow.find('td').each(function()
{
var result = "No result";
$("tbody tr").append(result);
if(pattern.test($(this).html()))
{
currentRow.show();
return false;
}
});
});
}
Paraphrase you mistaken at your end. Re check it.

WMSLayer.getSource() Is not a function?

I just want to try showing information about the wms layer about the layers details using openlayers 3.14
map.on('singleclick', function(evt) {
var url = layers.getSource().getGetFeatureInfoUrl(
evt.coordinate, viewResolution, viewProjection,
{'INFO_FORMAT': 'text/javascript',
'propertyName': 'formal_en'});
if (url) {
var parser = new ol.format.GeoJSON();
$.ajax({
url: url,
dataType: 'jsonp',
jsonpCallback: 'parseResponse'
}).then(function(response) {
var result = parser.readFeatures(response);
if (result.length) {
var info = [];
for (var i = 0, ii = result.length; i < ii; ++i) {
info.push(result[i].get('formal_en'));
}
container.innerHTML = info.join(', ');
} else {
container.innerHTML = ' ';
}
});
}
});
layers is probably an array of ol.layer or it may not even be available in the function's scope.
Either way you need to get a single layer to use .getGetFeatureInfoUrl() against.
So either something like:
layers[0].getSource().getGetFeature....
or if layers isn't available you can get it from the map object with .getLayers()
map.getLayers().forEach(function(layer) {
// optionally check that the layer is the one you want.
if (layer.getProperties().ref === 'myLayer'){
layer.getSource().getGetFeature....
}
}

context: contextMenu.SelectionContext() not working for selected text in input fields

My little project - extension for Firefox that translate text. User select text on page, make right click and see translation right in the context-menu or in popup. In my contextMenu.Item I am using context: contextMenu.SelectionContext() for determin context (for example, user click on image or on selected text).
But this not working if text selected in input field, documentation page not mentioned it. What I need to do for handling selection context for input fields, not only for regular text on page? In this situation I see my context-menu, but in debug I see that selected text was not send to the code of my extension.
I tried this code - nothing.
My current code is:
const { getMostRecentBrowserWindow } = require('sdk/window/utils');
var uuid = require('sdk/util/uuid').uuid();
var uuidstr = uuid.number.substring(1, 37);
var notifications = require("sdk/notifications");
var contextMenu = require("sdk/context-menu");
var Request = require("sdk/request").Request;
var self = require('sdk/self');
var tabs = require('sdk/tabs');
var prefs = require('sdk/simple-prefs').prefs;
var cmitems = null;
var wasTranslatedSecondTime = false;
var inProgress = '...';
var translated = '';
var menuItem = contextMenu.Item({
data: uuidstr, // for 'binding' tooltop's 'id' + text
label: inProgress, // ...
image: self.data.url('ico.png'),
context: contextMenu.SelectionContext(),
contentScript: 'self.on("context", function() {' +
'var selectionText = window.getSelection().toString();' +
'self.postMessage({name:"context", data:selectionText});' +
'return true;' +
'});' +
'self.on("click", function() {' +
'var selectionText = window.getSelection().toString();' +
'self.postMessage({name:"click", data:"https://translate.yandex.ru?text=" + selectionText.replace("&", "%26")});' +
'})',
onMessage: function(message) {
if (message.name == 'context') {
menuItem.label = inProgress; // ...
if (cmitems != undefined) cmitems[0].tooltipText = '';
var input = message.data.replace('&', '%26');
translate('ru', input); // default direction - from EN to RU
} else { // if (message.name == 'click')
tabs.open(message.data);
}
}
});
function translate(lang, input) {
Request({ // key is not referral but API-key: https://api.yandex.com/translate/doc/dg/concepts/api-overview.xml
url: 'https://translate.yandex.net/api/v1.5/tr.json/translate?key=trnsl.1.1.20150627T071448Z.117dacaac1e63b79.6b1b4bb84635161fcd400dace9fb2220d6f344ef&lang=' +
lang + '&text=' + input,
onComplete: function (response) {
translated = response.json.text[0];
if (input == translated && wasTranslatedSecondTime == false) { // if input on Russian and we receive the same text -
translate('en', input); // translate again selected text into English
wasTranslatedSecondTime = true;
} else { // show results
if (prefs.popup) popup(translated);
menuItem.label = translated;
wasTranslatedSecondTime = false;
if (prefs.tooltip) tooltip(translated);
}
}
}).get();
}
function popup(text) {
if (text.length > 0)
notifications.notify({
title: 'translate.yandex.ru',
text: text,
time: 5000
})
}
function tooltip(translated) {
menuItem.data = uuidstr + translated;
cmitems = getMostRecentBrowserWindow().document.querySelectorAll(".addon-context-menu-item[value^='"+uuidstr+"']");
cmitems[0].tooltipText = cmitems[0].value.substring(36);
}
It appears that your question boils down to: How do I get the selected text, even when it is in an input field?
You are currently using var selectionText = window.getSelection().toString(); which is failing when the selected text is in an input field.
In one of my extensions, I use the following to obtain the selected text. It works even when the selected text is in an input field:
/**
* Fix an issue with Firefox that it does not return the text from a selection if
* the selected text is in an INPUT/textbox.
*/
function getSelectedText(win,doc) {
//Adapted from a post by jscher2000 at:
// http://forums.mozillazine.org/viewtopic.php?f=25&t=2268557
//Is supposed to solve the issue of Firefox not getting the text of a selection when
// it is in a textarea/input/textbox.
var ta;
if (win.getSelection && doc.activeElement){
if (doc.activeElement.nodeName == "TEXTAREA" ||
(doc.activeElement.nodeName == "INPUT" &&
doc.activeElement.getAttribute("type").toLowerCase() == "text")
){
ta = doc.activeElement;
return ta.value.substring(ta.selectionStart, ta.selectionEnd);
} else {
//As of Firefox 31.0 this appears to have changed, again.
//Try multiple methods to cover bases with different versions of Firefox.
let returnValue = "";
if (typeof win.getSelection === "function"){
returnValue = win.getSelection().toString();
if(typeof returnValue === "string" && returnValue.length >0) {
return returnValue
}
} //else
if (typeof doc.getSelection === "function"){
returnValue = doc.getSelection().toString();
if(typeof returnValue === "string" && returnValue.length >0) {
return returnValue
}
} //else
if (typeof win.content.getSelection === "function"){
returnValue = win.content.getSelection().toString();
if(typeof returnValue === "string" && returnValue.length >0) {
return returnValue
}
} //else
//It appears we did not find any selected text.
return "";
}
} else {
return doc.getSelection().toString();
}
}

What is the correct JSON structure for this while loop?

I'm a little new to JSON so trying to understand what is the best way to do this. I have two variables: postcode and energyrating that I want to put into JSON and then parse to a for loop.
I can get it to work with one variable but when I have two it doesn't work.
Here is my JSON:
header('Content-type: application/json');
$postcodeArray = array('postcodes' => array("E6 2JG","SE1 2AQ","DA1 1DZ"), 'energyrating' => array("A","B","C","D","E","F","G"));
die(json_encode($postcodeArray));
Here is my jQuery:
function addNew(postcodes) {
if(postcodes.length > 0) {
for(var i = 0; i < postcodes.length; i++) {
var address = postcodes[i];
var rating = energyrating[i];
geocoder.geocode( { 'address': address }, function(results, status) {
if (status == google.maps.GeocoderStatus.OK) {
map.setCenter(results[0].geometry.location);
var image = '../img/markers/' + rating + '.png';
var marker = new google.maps.Marker({
map: map,
position: results[0].geometry.location,
icon: image
});
} else {
alert("Geocode was not successful for the following reason: " + status);
}
});
}
} else {
alert("Sorry, no data was found.");
}
}
How do I get this to work with both variables?
What does not work exactly ?
Reading your code, I would say there is an error into your code. The energyrating argument is missing into your addNew function:
function addNew(postcodes, energyrating) {
Assuming that you call your function like this:
addNew(jsonData.postcodes, jsonData.energyrating);
Use json variables to store the data like that :
$postcodeArray = '{"postcodes":{"0":E6, "1":"2JG", "3":"SE1 2AQ","4":"DA1 1DZ"}, "energyrating":{"0":"A","1":"B","2":"C","3":"D","4":"E","5":"F","6":"G"}}';
In the place of
$postcodeArray = JSON.parse('postcodes' => array("E6 2JG","SE1 2AQ","DA1 1DZ"), 'energyrating' => array("A","B","C","D","E","F","G"));
And you can access the values.
Or try json_decode(postcodes) in the function addNew in first line.

Resources