Puppeteer, looping through links selected by xpath

Puppeteer, looping through links selected by xpath - xpath

I'm new to puppeteer (and not so great at javascript in general) and am trying to write some basic functionality to:
Get all the links from an XPath
Loop through and click those links
Screenshot and save the HTML of the page
Go back, screenshot and save the HTML of the records page to be save in the same directory of the others and start the process over
The error I get is:
Evaluation failed: DOMException: Failed to execute 'querySelector' on 'Document': '0' is not a valid selector
Here's the code I have:
I'm fairly confident all the code works, except for the issues i'm having getting the right things to click on with my XPath. The website I'm getting these from is:
https://hrlb.oregon.gov/bspa/licenseelookup/searchdir.asp?searchby=lastname&searchfor=a&stateselect=none&Submit=Search
code:
const records = await page.$x('//table[2]//tr[td[a]]//td[1]/a');
let int = 0;
for (let record in records) {
await Promise.all([
page.waitForNavigation(),
page.click(record)
]);
await Promise.all([makeDirectory('screenshots/item'+int), makeDirectory('screenshots/item'+int+'/base'), makeDirectory('screenshots/item'+int+'/record')]);
let recordPath = "screenshots/item"+int+"/record/record.html";
let basePath = "screenshots/item"+int+"/base/base.html";
page.screenshot({path: "screenshots/item"+int+"/record/record.png", fullPage: true});
let recordBody = await page.evaluate(() => document.body.innerHTML);
await saveHtml(recordPath, recordBody);
await Promise.all([
page.waitForNavigation(),
page.goBack()
]);
await page.screenshot({path: "screenshots/item"+int+"/base/base.png", fullPage: true});
let baseBody = await page.evaluate(() => document.body.innerHTML);
await saveHtml(basePath, baseBody);
int++;
console.log(record);
}
async function makeDirectory(path) {
mkdirp(path, function(err) {
if (err) throw err;
});
};
async function saveHtml(path, html) {
await fs.writeFile(path, html, (err) => {
if (err) throw err;
});
};
Note: I'm required to use XPath :(
Updated 6/25/18
This now gives me all the links from the xpath selector. Which i then iterate over and just use a page.goto, to go to the correct site.
const linksXPath = '//table[2]//tr[td[a]]//td[1]/a';
const links = await page.evaluate((selector) => {
let results = [];
let query = document.evaluate(selector,
document,
null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (let i=0, length=query.snapshotLength; i<length; ++i) {
results.push(query.snapshotItem(i).href);
}
return results;
}, linksXPath);

I think it is your selector that is the issue.
I believe your selector for the table should be:
"body > table > tbody > tr:nth-child(2) > td > table > tbody > tr:nth-child(1) > td > table.bodytext > tbody"
The easiest way to get the right selector for a page is to use the Chrome Dev Tools.
Inspect the page, and go to the "Elements" tab. From there, you should see all of the HTML elements. Right click on the one you want (I went for <tbody> so you can iterate through the <tr> elements.) and choose copy > copy selector.

My code is now doing what I need it to do, however I wish there was a simpler way of doing this. Also, you'll see as I iterate over the links, i'm using a page.goto function to go there. I still don't know of a way to use page.click. I would have to use the xpath to get all the td's and then click on them, but i was never able to get that to work. So here is the working product:
const puppeteer = require('puppeteer');
const fs = require('fs');
const mkdirp = require('mkdirp');
async function run() {
const pageToClick = 'body > table > tbody > tr:nth-child(3) > td > table > tbody > tr > td > form > table > tbody > tr:nth-child(3) > td > div > input[type="submit"]';
const select = 'body > table > tbody > tr:nth-child(3) > td > table > tbody > tr > td > form > table > tbody > tr:nth-child(1) > td:nth-child(2) > select';
const inputField = 'body > table > tbody > tr:nth-child(3) > td > table > tbody > tr > td > form > table > tbody > tr:nth-child(2) > td:nth-child(2) > input[type="text"]:nth-child(1)';
const linksXPath = '//table[2]//tr[td[a]]//td[1]/a';
const browser = await puppeteer.launch({
headless: true
});
const page = await browser.newPage();
await page.goto('https://hrlb.oregon.gov/bspa/licenseelookup/');
await page.select(select, 'lastname');
await page.focus(inputField);
await page.keyboard.type('a');
await Promise.all([
page.waitForNavigation(),
page.click(pageToClick)
]);
const links = await page.evaluate((selector) => {
let results = [];
let query = document.evaluate(selector,
document,
null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
for (let i=0, length=query.snapshotLength; i<length; ++i) {
results.push(query.snapshotItem(i).href);
}
return results;
}, linksXPath);
const basePic = await page.screenshot({fullPage: true});
let baseBody = await page.evaluate(() => document.body.innerHTML);
let int = 0;
for (i = 0; i < links.length; i++) {
await Promise.all([
page.waitForNavigation(),
page.goto(links[i])
]);
await Promise.all([makeDirectory('screenshots/item'+int), makeDirectory('screenshots/item'+int+'/base'), makeDirectory('screenshots/item'+int+'/record')]);
let recordPath = "screenshots/item"+int+"/record/record.html";
let basePath = "screenshots/item"+int+"/base/base.html";
let basePicPath = "screenshots/item"+int+"/base/base.png";
await page.screenshot({path: "screenshots/item"+int+"/record/record.png", fullPage: true});
let recordBody = await page.evaluate(() => document.body.innerHTML);
await saveFile(recordPath, recordBody);
await Promise.all([
page.waitForNavigation(),
page.goBack()
]);
await saveFile(basePath, baseBody);
await saveFile(basePicPath, basePic);
int++;
}
await page.close();
await browser.close();
}
async function makeDirectory(path) {
mkdirp(path, function(err) {
if (err) throw err;
});
};
async function saveFile(path, html) {
await fs.writeFile(path, html, (err) => {
if (err) throw err;
});
};
run();

Related

response from await browser.tabs.sendMessage is set in chrome, but not in firefox

I have successfully used await browser.tabs.sendMessage in chrome to get response from the listener, but the same code in firefox does not work. await browser.tabs.sendMessage return immediately and sets response to undefined. In content script inject.js, sendResponse should be called after 1000ms timeout.
I attached a minimalistic example. Any idea why await browser.tabs.sendMessage
returns what sendResponse set only in chrome, but not in firefox?
//inject.js
(async () => {
if (typeof browser === "undefined") {
var browser = chrome;
}
browser.runtime.onMessage.addListener((msg, sender, sendResponse) => {
console.log(msg);
setTimeout(function(){
let pageObject = {a:1};
sendResponse(pageObject);
},1000)
return true;
});
})();
//background.js
(async () => {
if (typeof browser === "undefined") {
var browser = chrome;
}
//**code for injecting content scripts on extension reload**
browser.runtime.onInstalled.addListener(async () => {
let manifest = browser.runtime.getManifest();
for (const cs of manifest.content_scripts) {
for (const tab of await browser.tabs.query({ url: cs.matches })) {
browser.scripting.executeScript({
target: { tabId: tab.id },
files: cs.js,
});
}
}
});
async function SendMessageToFront(message) {
let resolve;
const promise = new Promise(r => resolve = r);
browser.tabs.query({}, async function (tabs) {
for (let index = 0; index < tabs.length; index++) {
const tab = tabs[index];
if (tab.url) {
let url = new URL(tab.url)
if (url.hostname.includes("tragetdomain.com")) {
var startTime = performance.now()
let response = await browser.tabs.sendMessage(tab.id, { message: message });
var endTime = performance.now()
console.log(`Call to doSomething took ${endTime - startTime} milliseconds`) // this takes 0ms
console.log("got response");
console.log(response); // this is undefined
console.log(browser.runtime.lastError); // this is empty
resolve(response);
break;
}
}
}
});
return promise;
}
await SendMessageToFront();
})();

I guess for the tests in firefox you do the reload of the background script (F5 or the specific button in devtools)
Just as you have coded the background you have little hope of getting an answer because every time you reload the background you break the wire with all content scripts injected into the page(s).
Move the browser check inside the "SendMessageToFront" function. Move the "SendMessageToFront" function (async is not needed) to the main thread and run that function in the main thread.
/*async*/ function SendMessageToFront(message) {
if (typeof browser === "undefined")
var browser = chrome;
let resolve;
const promise = new Promise(r => resolve = r);
browser.tabs.query({}, async function(tabs) {
for (let index = 0; index < tabs.length; index++) {
const tab = tabs[index];
if (tab.url) {
let url = new URL(tab.url);
if (url.hostname.includes("tragetdomain.com")) {
var startTime = performance.now()
let response = await browser.tabs.sendMessage(tab.id, {'message': message});
var endTime = performance.now()
console.log(`Call to doSomething took ${endTime - startTime} milliseconds`) // this takes 0ms
console.log("got response");
console.log(response); // this is undefined
console.log(browser.runtime.lastError); // this is empty
resolve(response);
break
}
}
}
});
return promise
}
(async _ => {
await SendMessageToFront()
})();
in this way you will get an error message as soon as the background is ready which tells you that the content script on the other side does not exists or it's not ready yet, but now, when the content script will be ready, you should just re-launch the function from the background script devtools
(async _ => {
await SendMessageToFront()
})();
this time you will get the correct answer {a: 1}

Vonage browser to browser audio call return some error

I am using Laravel, and trying add browser to browser audio calling. I am using Vonage (Tookbox) API for this, but I am getting some error.
here is my code:
async function audioCall() {
var publisher;
var targetElement = 'publisher';
var pubOptions = {publishAudio:true, publishVideo:false};
publisher = OT.initPublisher(targetElement, pubOptions, function(error) {
if (error) {
alert("The client cannot publish.");
} else {
console.log('Publisher initialized.');
}
});
// Setting an audio source to a new MediaStreamTrack
const stream = await OT.getUserMedia({
videoSource: null
});
const [audioSource] = stream.getAudioTracks();
publisher.setAudioSource(audioSource).then(() => console.log('Audio source updated'));
// Cycling through microphone inputs
let audioInputs;
let currentIndex = 0;
OT.getDevices((err, devices) => {
audioInputs = devices.filter((device) => device.kind === 'audioInput');
// Find the right starting index for cycleMicrophone
audioInputs.forEach((device, idx) => {
if (device.label === publisher.getAudioSource().label) {
currentIndex = idx;
}
});
});
const cycleMicrophone = () => {
currentIndex += 1;
let deviceId = audioInputs[currentIndex % audioInputs.length].deviceId;
publisher.setAudioSource(deviceId);
};
}
This code return an error on console:
Uncaught SyntaxError: await is only valid in async functions and the top level bodies of modules

I believe the issue is that you have
device.kind === 'audioInput'
and I'm pretty sure device.kind comes out like 'audioinput' (all lowercase).
examples:
https://developer.mozilla.org/en-US/docs/Web/API/MediaDeviceInfo/kind
https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/enumerateDevices#examples
That would make audioInputs empty (try to console.log it to verify) and gives you the error because there is no device.
Try:
device.kind.toLowerCase() === 'audioinput'
Hope it works out.

How can I use multiple await in cypress?

I am trying to fetch multiple data from UI using cypress.
First of all, I thought my selector is incorrect, but I have tried it with the same selector as the below codebase, but it still not working
below is the function
async getData(model?: any) {
const listSelector='[ng-repeat="workflow in vm.workflows track by $index"]';
const dataFromUi = {};
const data = await cy.get(listSelector); // this gives data
const data1 = await cy.get(listSelector); // this doesn't
dataFromUi['Test1'] = data;
dataFromUi['Test2'] = data1;
debugger;
return dataFromUi;
}
I was calling this method from a spec file, below is the calling spec file
describe('app test', () => {
beforeEach(() => {
cy.login();
cy.navigateUsingMenu('dasboard', '');
});
it('',async ()=>{
const result = await getData();
result.Test1 // contains data
result.Test2 // contains undefined
})
})
In data I am getting contents, but data1 returns undefined.

I have found a solution and that is using different methods and different it blocks. The below codebase is working, but I want them in a single block.
The service methods
async getData1(model?: any) {
const listSelector='[ng-repeat="workflow in vm.workflows track by $index"]';
const data = await cy.get(ListSelector);
return data;
}
async getData2(model?: any) {
const listSelector='[ng-repeat="workflow in vm.workflows track by $index"]';
const data1 = await cy.get(ListSelector);
return data1;
}
The spec
describe('app test', () => {
beforeEach(() => {
cy.login();
cy.navigateUsingMenu('dashboard', '');
});
it('',async ()=>{
const result = await getData1();
})
it('',async ()=>{
const result = await getData2();
})
})

Promise all issue ( I get empty response )

How can I solve it? When I try to get a result from promise all, I get an empty array.
async function getMessage(arr) {
let response = [];
for (let count = 0; count < arr.length; count++) {
let protocol = await arr[count].link.split(':')[0];
if (protocol === 'http') {
await http.get(arr[count].link, async (res) => {
response.push(`${arr[count].link} - ${res.statusCode}\n`);
});
} else {
await https.get(arr[count].link, async (res) => {
response.push(`${arr[count].link} - ${res.statusCode}\n`);
});
}
}
return Promise.all(response)
.then((data) => {
//data === []
return data;
});
}

The await operator is used to wait for a Promise.
https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/await
I assume you're using node's native http and https module. It's true they're async functions, but they cannot be used with await straight, since they're using callback not Promise.
Afaik, you can either manually "promisify" it with util.promisify, or use some 3rd party like isomorphic-fetch which already promisified it for you.
Example:
const sayHelloWithoutPromise = () => {
setTimeout(() => console.log('hello'), 0)
}
(async function() {
await sayHelloWithoutPromise()
console.log('world!')
})()
const sayHelloWithPromise = () => {
return new Promise(r =>
setTimeout(() => r(console.log('hello')), 0)
)
}
(async function() {
await sayHelloWithPromise()
console.log('world!')
})()

How to upload a file with puppeteer and dropzone?

I use puppeteer and I have a dropzone form.
I want to add a file from chrome headless to the dropzone form.
How can I do that?
Note:
The form contains some actions in some dropzone events (when added file).

Not sure if I understood the problem correctly, but try this:
const dropZoneInput = await page.$(inputID);
dropZoneInput.uploadFile(absolutePathToFile);

I have it working with puppeteer:
const fileInput = await page.$(
".{yourDropzoneClassName} input[type=file]"
);
await fileInput.uploadFile("path/to/file");

(async () => {
const browser = await puppeteer.launch({
headless: false,
ignoreDefaultArgs: true
});
const page = await browser.newPage();
await page.goto('https://react-dropzone.js.org/');
await page.waitForSelector('input[type=file]');
const fileInput = await page.$('#rsg-root > div > main > section > section:nth-child(3) > section > section:nth-child(1) > article > div:nth-child(2) > div.rsg--preview-60 > div > section > div > input[type=file]');
await fileInput.uploadFile("./test/playground.js");
///trigger event
await fileInput.evaluate(upload => upload.dispatchEvent(new Event('change', { bubbles: true })));
///
await page.close();
})();

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

Puppeteer, looping through links selected by xpath - xpath

Related

response from await browser.tabs.sendMessage is set in chrome, but not in firefox

Vonage browser to browser audio call return some error

How can I use multiple await in cypress?

Promise all issue ( I get empty response )

How to upload a file with puppeteer and dropzone?

Categories

Resources