Ruby Script download photos - ruby

Goal: Download photos from iCloud Shared web album via script: https://www.icloud.com/sharedalbum/#B0Q5oqs3qGtDCal
I have following script from here: https://github.com/dsboulder/icloud-shared-album-download/blob/master/download_album.rb
(I have taken out the image resize)
Problem: Photos seem to download, however they all look all like this (script output further below):
#!/usr/bin/env ruby
# C:\Users\Win10IE11\Desktop\icloud-shared-album-download-master\download_album2.rb B0Q5oqs3qGtDCal
require 'selenium-webdriver'
require 'fileutils'
require 'yaml'
album_name = ARGV[0]
options = Selenium::WebDriver::Chrome::Options.new(args: ['headless'])
driver = Selenium::WebDriver.for(:chrome, options: options)
puts "Downloading album ID #{album_name}:"
dir = "C:/Users/Win10IE11/Downloads/#{album_name}"
movies_dir = "/home/pi/Videos"
FileUtils.mkdir_p(dir)
urls_seen = Set.new
files_seen = Set.new
driver.get("https://www.icloud.com/sharedalbum/##{album_name}")
puts " Navigated to index page: #{driver.current_url}"
sleep 2
driver.find_element(css: "[role=button]").click
sleep 5
c = 0
current_url = driver.current_url
seen_first = false
exit_early = false
until urls_seen.include?(current_url) or c >= 200 or exit_early do
retries = 0
begin
current_url = driver.current_url
puts " Navigated to: #{current_url}"
urls_seen.add(current_url)
i = driver.find_element(css: "img")
puts " Downloading image #{c}: #{i["src"]}"
u = URI.parse(i["src"])
ext = u.path.split(".").last.downcase
filename = "#{current_url.split(";").last}.#{ext}".downcase
path = "#{dir}/#{filename}"
if File.exist?(path)
if c == 0
seen_first = true
puts " Already seen first image, going backwards now"
elsif seen_first and c == 1
exit_early = true
puts " Already seen last image, we're probably done!"
else
puts " Skipping already downloaded file #{path}"
end
else
r = Net::HTTP.get_response(u)
puts " #{r.inspect}"
File.write(path, r.body)
puts " Wrote file of length #{r.body.length} to #{path}"
videos = driver.find_elements(css: ".play-button")
if videos.length > 0
puts " Found video!!!"
videos.first.click
video_src = driver.find_element(css: "video > source")["src"]
u = URI.parse(video_src)
ext = u.path.split(".").last.downcase
filename = "#{current_url.split("#").last.gsub(";", "_")}.#{ext}".downcase
path = "#{movies_dir}/#{filename}"
puts " Downloading from #{video_src} to #{path}"
driver.navigate.refresh
r = Net::HTTP.get_response(u)
File.write(path, r.body)
puts " Wrote #{r.body.length} bytes of video to #{path}"
end
end
c += 1
sleep 1
driver.find_element(css: "body").send_keys(seen_first ? :arrow_left : :arrow_right)
sleep 1
current_url = driver.current_url
rescue => e
puts "Error: #{e.inspect}"
retries += 1
if retries < 4
driver.quit rescue nil
puts "RETRY ##{retries}"
system "pkill -f chromedriver"
driver = Selenium::WebDriver.for(:chrome, options: options)
driver.get(current_url)
sleep 5
retry
end
end
end
puts " Finished #{c} photos in album #{album_name}!"
driver.quit
***Output:
Navigated to: https://www.icloud.com/sharedalbum/#B0Q5oqs3qGtDCal;64D46E01-D439-4FB3-9234-EEADFD92B4B8
Downloading image 22: https://cvws.icloud-content.com/S/AZmmX4aAk6O2XpXCavO3rA4XSNms/IMG_0023.JPG?o=AtHCwB51UajcHVvLEboQsSvM4hK5ZHb25DMLu5rjLgMs&v=1&z=https%3A%2F%2Fp26-content.icloud.com%3A443&x=1&a=BqocZLbrD6m1lXeHN6LXov32oNLDA-UfRgEAAAMxH0Y&e=1538045095&r=900d8d25-0a15-43e5-be59-2a4c9267cfaf-36&s=C3ee21ErkyHFKzq-JWjZkKXpah4
#<Net::HTTPOK 200 OK readbody=true>
Wrote file of length 1248141 to C:/Users/Win10IE11/Downloads/B0Q5oqs3qGtDCal/64d46e01-d439-4fb3-9234-eeadfd92b4b8.jpg

Swapped the function with "open-uri" function, doc found here: https://cobwwweb.com/download-collection-of-images-from-url-using-ruby.html
Swapped old code:
File.write(path, r.body)
with:
File.open(dest, 'wb') { |f| f.write(u.read) }
Here is the fixed code:
#!/usr/bin/env ruby
# C:\Users\Win10IE11\Desktop\icloud-shared-album-download-master\dl5.rb B0Q5oqs3qGtDCal
require 'selenium-webdriver'
require 'fileutils'
require 'yaml'
require 'open-uri'
album_name = ARGV[0]
options = Selenium::WebDriver::Chrome::Options.new(args: ['headless'])
driver = Selenium::WebDriver.for(:chrome, options: options)
puts "Downloading album ID #{album_name}:"
dir = "C:/Users/Win10IE11/Downloads/#{album_name}"
movies_dir = dir
FileUtils.mkdir_p(dir)
urls_seen = Set.new
files_seen = Set.new
driver.get("https://www.icloud.com/sharedalbum/##{album_name}")
puts " Navigated to index page: #{driver.current_url}"
sleep 1
driver.find_element(css: "[role=button]").click
sleep 1
c = 0
current_url = driver.current_url
seen_first = true
exit_early = false
def download_image(url, dest)
open(url) do |u|
File.open(dest, 'wb') { |f| f.write(u.read) }
puts "Saved #{url} to #{dest}"
end
end
until urls_seen.include?(current_url) or c >= 200 or exit_early do
retries = 0
begin
current_url = driver.current_url
puts " Navigated to: #{current_url}"
urls_seen.add(current_url)
i = driver.find_element(css: "img")
# C:\Users\Win10IE11\Desktop\icloud-shared-album-download-master\dl5.rb B0Q5oqs3qGtDCal
puts " count #{c}"
videos = driver.find_elements(css: ".play-button")
if videos.length > 0
#puts " Found video!!!"
videos.first.click
i = driver.find_element(css: "video > source")
url = "#{i["src"]}"
local1 = "#{url.split('/').last}"
local = "#{c}_#{local1.split('?').first}"
download_image(url, "#{dir}/#{local}")
driver.navigate.refresh
sleep 1
else
#puts " not video!!!"
url = "#{i["src"]}"
local1 = "#{url.split('/').last}"
local = "#{c}_#{local1.split('?').first}"
download_image(url, "#{dir}/#{local}")
end
c += 1
sleep 0.1
driver.find_element(css: "body").send_keys(seen_first ? :arrow_left : :arrow_right)
sleep 0.1
current_url = driver.current_url
rescue => e
puts "Error: #{e.inspect}"
retries += 1
if retries < 4
driver.quit rescue nil
puts "RETRY ##{retries}"
system "pkill -f chromedriver"
driver = Selenium::WebDriver.for(:chrome, options: options)
driver.get(current_url)
sleep 1
retry
end
end
end
puts " Finished #{c} photos in album #{album_name}!"
driver.quit

Related

What condition do I check for to raise a custom error with Net::OpenTimeout execution expired error

Open-uri and nokogiri are slow to scrape the site I want hence the Net::OpenTimeout execution expired error. I attempted to code a custom error with rescue however I do not know what condition I can look for to raise that custom error.
I attempted to few if else statements however I really just guessed how to check if I was gonna get that error. I hard coded a condition that failed and thus rescued the error. I am very new to ruby and custom errors. In fact this is my first.
class Scrape
Base = 'http://www.wine.com'
##menu = []
##pages = []
def self.index
index_url = Base + "/list/wine/7155?sortBy=savings&pricemax=90"
#below is where I need to check for the condition to
raise the error
if doc = Nokogiri::HTML(open(index_url))
container = doc.css('.prodList')
wines = container.css('.prodItem')
wines.each do |wine|
##menu << {
:link => wine.css('.prodItemInfo_link').attribute('href').value,
:name => wine.css('.prodItemInfo_name').text,
:rating => (wine.css('.averageRating_average').text.to_i) > 0 ?
(wine.css('.averageRating_average').text) : 'no rating',
:price => wine.css('.productPrice_price-saleWhole').text.strip
}
end
##menu.each do |item|
Bottle.new.create(item)
end
else
begin
raise Custom_error
rescue Custom_error => error
puts error.message
end
end
end
def self.scrape_page(wine_obj)
wine_link = wine_obj.link
individual_page = Base + wine_link
docu = Nokogiri::HTML(open(individual_page))
y = docu.css('.viewMoreModule_text')
more = docu.css('.viewMoreModule_text')
##pages << {
:obj => wine_obj,
:name => docu.css('.pipName').text,
:alcohol_percent => y
x = docu.css('.mobileProdAttrs').css('.prodAlcoholPercent')
y = x.css('.prodAlcoholPercent_percent').text,
:price => docu.css('span.productPrice_price-saleWhole').text,
:origin => docu.css('span.prodItemInfo_originText a').text,
:winemaker_notes => docu.css('.viewMoreModule_text').first.text,
:more => y[2].text,
:rating => docu.css('span.averageRating_average').first.text
}
Page.create_find_by_name( ##pages.last )
end
def self.pages
##pages
end
end
class Cli
def run
puts 'loading from cyberspace'
Scrape.index
Bottle.make_list
controller
end
def controller
input = ''
response = ''
puts ' '
view
while input != 11
response = gets.chomp.to_i
input = "#{response}11".to_i
if input == 111
menu
elsif input == 11
exit
elsif input > 0 && input < 26
find_by_input(input)
elsif input != 0 && input != 111
error_1
end
end
end
def view
puts "welcome to the wine bar"
puts "================="
puts " W I N E "
puts " B A R "
puts "================="
puts " "
puts "type 1 for list of wine"
puts " "
puts "type 0 to exit "
end
def menu
wines = Bottle.list
second_input = ''
while second_input != 0
puts "<<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>"
puts "type the corresponding number to view more wine info"
puts
"__________________________________________________________________"
wines.each do |wine|
puts "#{wine.index}) #{wine.name} #{wine.price}"
end
second_input = gets.chomp.to_i
if second_input > 0 && second_input < 26
find_by_input(second_input)
elsif second_input == 0
exit
second_input = 0
elsif second_input > 25 && second_input != 101
error_1
end
end
puts <<-DOC
the the wine number again
for winemaker notes
DOC
end
def find_by_input(input)
while input != 0
selection = Bottle.find_by_input(input)
puts "NAME: #{selection.name}"
puts "PRICE: $#{selection.price}"
puts "RATING: #{selection.rating}"
puts "________________________________________"
puts " type #{input} again "
puts " for more info "
puts " provided by the winemaker "
# reseting input and extending user control functionality
third_input = ''
third_input = gets.chomp.to_i
if third_input == selection.index
response = Scrape.scrape_page(selection)
view_2(response, third_input)
elsif input == 0
exit
end
end
end
def view_2(response, old_input)
next_input = ''
while next_input != 0
puts "Alcohol Percent: #{response.alcohol_percent}"
puts "Winemaker Notes: #{response.winemaker_notes}"
puts " "
puts "Type #{old_input} again for more!!"
next_input = gets.chomp.to_i
if next_input == old_input
input = 0
next_input = 0
# refacort as it puts out 88 again and should not. Also 0 is not
exiting with correct behavior
# refactor so looking for "#{input}"1 to prevent the recall of
input
more(response)
end
end
end
def more(response)
puts response.more
puts menu
end
def error_1
puts " WHOA coder "
puts "type a better number!"
end
def exit
puts <<-DOC
well that was fun
Thank you for checking out
my first cli program
DOC
end
end ```
```class Page
attr_accessor :alcohol_percent, :price, :name, :origin, :winemaker_notes,
:rating, :more, :obj
##web_pages = []
def self.create_find_by_name(hash)
if answer = ##web_pages.find{ |obj| obj.name == hash[:name]}
answer
else
self.new.create(hash)
end
end
def create(hash)
hash.each do |key, value|
self.send(("#{key}="), value)
end
save
view_more
end
def view_more
##web_pages.last
end
def save
##web_pages << self
end
end
attr_accessor :link, :name, :price, :rating, :index
##bottles = []
def create(hash)
hash.each do |key, words|
self.send(("#{key}="), words )
end
save
end
def save
##bottles << self
end
def self.make_list
##numbered_list = ##bottles.sort{ |x,y| x.price <=>
y.price}.map.with_index(1) do
|w,i| w.index = i
w
end
end
def self.list
##numbered_list
end
def self.find_by_input(input)
a = ##numbered_list.find{ |wine| wine.index == input}
# puts "#{a.name} $#{a.price} rating: #{a.rating}"
# puts "type #{input} again for winemaker notes"
# more = ''
# while more != 0
# more = gets.chomp.to_i
# (input == more) ? (Scrape.scrape_page(a.link)) : (self.list)
# end
end
end
class Scrape
Base = 'http://www.wine.com'
##menu = []
##pages = []
def self.index
index_url = Base + "/list/wine/7155?sortBy=savings&pricemax=90"
if doc = Nokogiri::HTML(open(index_url))
container = doc.css('.prodList')
wines = container.css('.prodItem')
wines.each do |wine|
##menu << {
:link => wine.css('.prodItemInfo_link').attribute('href').value,
:name => wine.css('.prodItemInfo_name').text,
:rating => (wine.css('.averageRating_average').text.to_i) > 0 ?
(wine.css('.averageRating_average').text) : 'no rating',
:price => wine.css('.productPrice_price-saleWhole').text.strip
}
end
##menu.each do |item|
Bottle.new.create(item)
end
else
begin
raise Custom_error
rescue Custom_error => error
puts error.message
end
end
end
def self.scrape_page(wine_obj)
wine_link = wine_obj.link
individual_page = Base + wine_link
docu = Nokogiri::HTML(open(individual_page))
y = docu.css('.viewMoreModule_text')
more = docu.css('.viewMoreModule_text')
##pages << {
:obj => wine_obj,
:name => docu.css('.pipName').text,
alcholo = docu.css('.mobileProdAttrs').css('.prodAlcoholPercent'),
:alcohol_percent => alcholo.css('.prodAlcoholPercent_percent').text,
:price => docu.css('span.productPrice_price-saleWhole').text,
:origin => docu.css('span.prodItemInfo_originText a').text,
:winemaker_notes => docu.css('.viewMoreModule_text').first.text,
:more => y[2].text,
:rating => docu.css('span.averageRating_average').first.text
}
Page.create_find_by_name( ##pages.last )
end
def self.pages
##pages
end
end
When the internet connection is down/too slow the custom error is raised.
When an exception is thrown, the program stops its normal flow. You need to surround the part of the code that can throw an exception with a begin..rescue clause, and attempt to handle it, re-raise it, or raise another exception instead.
In your example, that would be:
begin
Nokogiri::HTML(open(url))
rescue Net::OpenTimeoutError => e
# log the error message if needed, raise your CustomError instead
raise CustomError, e.message
end
You can omit the begin, and put a rescue clause at the end of the method, ruby will interpret this as if the entire method body was wrapped in a begin..rescue block, something like this:
def open_page(url)
return Nokogiri::HTML(open(url))
rescue Net::OpenTimeoutError => e
raise CustomError, e.message
end

else without rescue is useless and expecting keyword_end

This code gives me these errors in my ruby console:
1) warning: else without rescue is useless
2) syntax error, unexpected end-of-input, expecting keyword_end
Why am I getting both of these errors at the same time?
require 'nokogiri'
require 'httparty'
require 'byebug'
require 'awesome_print'
require 'watir'
def input #takes user input and grabs the url for that particular search
puts "1) Enter the job title that you want to search for \n"
j_input = gets.chomp
job = j_input.split(/ /).join("+")
puts "================================= \n"
puts "1/2)Do you want to input city-and-state(1) or zipcode(2)? \n"
choice = gets.chomp
if choice == "1"
puts "2) Enter the city that you want to search for \n"
city_input = gets.chomp
city = city_input.split(/ /).join("+")
puts "================================= \n"
puts "3) Enter the state that you want to search for \n"
state_input = gets.chomp
state = "+" + state_input
puts target_url = "https://www.indeed.com/resumes/?q=#{job}&l=#{city}%2C#{state}&cb=jt"
elsif choice == "2"
puts "Enter the zipcode that you want to search for \n"
zipcode = gets.chomp
puts target_url = "https://www.indeed.com/resumes?q=#{job}&l=#{zipcode}&cb=jt"
else
puts "error"
end
unparsed_page = HTTParty.get(target_url)
parsed_page = Nokogiri::HTML(unparsed_page)
resume_listing = parsed_page.css('div.sre-entry')
per_page = resume_listing.count
resumes = Array.new
counter = 0
result_count = parsed_page.css('div#result_count').text.split(' ')[0].to_f
page_count = (result_count.to_f / per_page.to_f ).ceil
current_count = 0
byebug
if counter <= 0
unparsed_page = HTTParty.get(target_url)
parsed_page = Nokogiri::HTML(unparsed_page)
resume_listing = parsed_page.css('div.sre-entry')
per_page = resume_listing.count
pagination_resume_listing.each do |resume_listing|
#resume_info = {
# title:
# link:
# skills:
# education:
#}
#resumes << resume_info
puts "Added #{resume_info[:title]}"
else
while current_count <= page_count * per_page
pagination_url = "https://www.indeed.com/resumes?q=#{job}&l=#{zipcode}&co=US&cb=jt&start=#{current_count}"
unparsed_pagination_page = HTTParty.get(pagination_url)
pagination_parsed_page = Nokogiri::HTML(unparsed_pagination_page)
pagination_resume_listing = pagination_parsed_page.css('div.sre-entry')
pagination_resume_listing.each do |resume_listing|
#resume_info = {
# title:
# link:
# skills:
# education:
#}
#resumes << resume_info
puts "Added #{resume_info[:title]}"
current_count += 50
end
end
end
end
end
It won't allow me to fix the else without rescue issue without telling me that it expects an extra end at the end of my code. Of course when I put the end there it does nothing and says that it wants another end
I would say that your code is horribly formatted, but it would first have to be formatted at all to be even that much. Once you format it, the answer is quite obvious, you have a mis-placed end.
puts "Added #{resume_info[:title]}"
# Should be and end here for the "do" block above
else
Here is what it should be:
require 'nokogiri'
require 'httparty'
require 'byebug'
require 'awesome_print'
require 'watir'
def input #takes user input and grabs the url for that particular search
puts "1) Enter the job title that you want to search for \n"
j_input = gets.chomp
job = j_input.split(/ /).join("+")
puts "================================= \n"
puts "1/2)Do you want to input city-and-state(1) or zipcode(2)? \n"
choice = gets.chomp
if choice == "1"
puts "2) Enter the city that you want to search for \n"
city_input = gets.chomp
city = city_input.split(/ /).join("+")
puts "================================= \n"
puts "3) Enter the state that you want to search for \n"
state_input = gets.chomp
state = "+" + state_input
puts target_url = "https://www.indeed.com/resumes/?q=#{job}&l=#{city}%2C#{state}&cb=jt"
elsif choice == "2"
puts "Enter the zipcode that you want to search for \n"
zipcode = gets.chomp
puts target_url = "https://www.indeed.com/resumes?q=#{job}&l=#{zipcode}&cb=jt"
else
puts "error"
end
unparsed_page = HTTParty.get(target_url)
parsed_page = Nokogiri::HTML(unparsed_page)
resume_listing = parsed_page.css('div.sre-entry')
per_page = resume_listing.count
resumes = Array.new
counter = 0
result_count = parsed_page.css('div#result_count').text.split(' ')[0].to_f
page_count = (result_count.to_f / per_page.to_f ).ceil
current_count = 0
byebug
if counter <= 0
unparsed_page = HTTParty.get(target_url)
parsed_page = Nokogiri::HTML(unparsed_page)
resume_listing = parsed_page.css('div.sre-entry')
per_page = resume_listing.count
pagination_resume_listing.each do |resume_listing|
#resume_info = {
# title:
# link:
# skills:
# education:
#}
#resumes << resume_info
puts "Added #{resume_info[:title]}"
end
else
while current_count <= page_count * per_page
pagination_url = "https://www.indeed.com/resumes?q=#{job}&l=#{zipcode}&co=US&cb=jt&start=#{current_count}"
unparsed_pagination_page = HTTParty.get(pagination_url)
pagination_parsed_page = Nokogiri::HTML(unparsed_pagination_page)
pagination_resume_listing = pagination_parsed_page.css('div.sre-entry')
pagination_resume_listing.each do |resume_listing|
#resume_info = {
# title:
# link:
# skills:
# education:
#}
#resumes << resume_info
puts "Added #{resume_info[:title]}"
current_count += 50
end
end
end
end
Lesson here is to ALWAYS format your code, for everyone's sake, most of all your own. There is no excuse to not be formatted, and not doing so leads to trivial problems like this that are difficult to find.
NOTE
I did not test this or run it, simply formatted, which made the mis-matched end obvious.

How to Hash content to write in file as format mentioned as below?

I have wrote my ruby script for that. In that you can check "all_data" has all required content.
#!/usr/bin/env ruby
require 'docx'
file_data = []
name_file = "test"
t = ""
array_desc = []
heading_hash = {}
all_data = {}
temp = ""
output = ""
folder_name = ""
directory_name = ""
flag = true
count = 0
md_file_name = ''
Dir.glob("**/*.docx") do |file_name|
doc = Docx::Document.open(file_name)
first_table = doc.tables[0]
doc.tables.each do |table|
table.rows.each do |row| # Row-based iteration
row.cells.each_with_index do |cell, i|
if i == 2
file_data << cell.text.gsub('=','')
end
end
end
end
file_data.each_with_index do |l, d|
if l.include? file_data[d]
if ((l.strip)[0].to_i != 0)
md_file_name = file_data[d].split(".")
#start folder name
if flag
directory_name = md_file_name[0].to_i
flag = false
end
count +=1
t = file_data[d+1]
if(array_desc.size > 0)
heading_hash[temp] = array_desc
all_data[md_file_name[0].strip] = heading_hash
array_desc = []
end
else
if(t != l)
array_desc << l
temp = t
end
end
end
end
if(array_desc.size> 0)
heading_hash[temp] = array_desc
all_data[md_file_name[0].strip] = heading_hash
array_desc = []
end
all_data.each do |k, v|
v.each do |(hk, hv)|
if hk != ""
chapter_no = k
if (k[0,1] == 0.to_s)
chapter_no = k
else
chapter_no = "0#{k}"
end
Dir.mkdir("#{chapter_no}") unless File.exists?("#{chapter_no}")
output_name = "#{chapter_no}/#{File.basename("01", '.*')}.md"
output = File.open(output_name, 'w')
# output << "#"+"#{hk}\n\n"
# output << "#{hv} \n\n"
hv.each do |des|
# puts des
end
end
end
end
end
source docx file
download above file and put sctip and docx (source file) in same folder. When you will run script form terminal ($./script.rb) you will see folder name as 01,02.....etc. And inside there will be file with md extension.
I want to output as below description:
## FOLDER 01 > FILE 01.md, here data in file like hk as heading (for Heading you can put # before hk)and hv
## FOLDER 02 > FILE 01.md, here data in file like hk as heading (for Heading you can put # before hk)and hv
Please use my code and check that is working or not.
Dir.glob("**/*.docx") do |file_name|
doc = Docx::Document.open(file_name)
first_table = doc.tables[0]
doc.tables.each do |table|
table.rows.each do |row|
row.cells.each_with_index do |cell, i|
if i == 2
file_data << cell.text.gsub('=','')
end
end
end
end
file_data.each_with_index do |l, d|
if ((l.strip)[0].to_i != 0)
md_file_name = file_data[d].split(".")
#start folder name
if flag
directory_name = md_file_name[0].to_i
flag = false
end
count +=1
t = file_data[d+1]
if(array_desc.size > 0)
heading_hash[temp] = array_desc
array_desc=[]
all_data[file_data[d+1]] = array_desc
end
else
if(t != l)
array_desc << l
temp = t
end
end
end
chapter_no = 1
all_data.each do |k, v|
Dir.mkdir("#{chapter_no}") unless File.exists?("#{chapter_no}")
output_name = "#{chapter_no}/#{File.basename("01", '.*')}.md"
output = File.open(output_name, 'a')
output << "#"+"#{k}\n\n"
v.each do |d|
output << "#{d} \n"
end
chapter_no= chapter_no+1
end
end
It will give exact output as you shared above. Let me know if you need more help.

Ruby Watir: cannot launch browser in a thread in Linux

I'm trying to run this code in Red Hat Linux, and it won't launch a browser. The only way I can get it to work is if i ALSO launch a browser OUTSIDE of the thread, which makes no sense to me. Here is what I mean:
require 'watir-webdriver'
$alphabet = ["A", "B", "C"]
$alphabet.each do |z|
puts "pshaw"
Thread.new{
Thread.current["testPuts"] = "ohai " + z.to_s
Thread.current["myBrowser"] = Watir::Browser.new :ff
puts Thread.current["testPuts"] }
$browser = Watir::Browser.new :ff
end
the output is:
pshaw
(launches browser)
ohai A
(launches browser)
pshaw
(launches browser)
ohai B
(launches browser)
pshaw
(launches browser)
ohai C
(launches browser)
However, if I remove the browser launch that is outside of the thread, as so:
require 'watir-webdriver'
$alphabet = ["A", "B", "C"]
$alphabet.each do |z|
puts "pshaw"
Thread.new{
Thread.current["testPuts"] = "ohai " + z.to_s
Thread.current["myBrowser"] = Watir::Browser.new :ff
puts Thread.current["testPuts"] }
end
The output is:
pshaw
pshaw
pshaw
What is going on here? How do I fix this so that I can launch a browser inside a thread?
EDIT TO ADD:
The solution Justin Ko provided worked on the psedocode above, but it's not helping with my actual code:
require 'watir-webdriver'
require_relative 'Credentials'
require_relative 'ReportGenerator'
require_relative 'installPageLayouts'
require_relative 'PackageHandler'
Dir[(Dir.pwd.to_s + "/bmx*")].each {|file| require_relative file } #this includes all the files in the directory with names starting with bmx
module Runner
def self.runTestCases(orgType, *caseNumbers)
$testCaseArray = Array.new
caseNumbers.each do |thisCaseNum|
$testCaseArray << thisCaseNum
end
$allTestCaseResults = Array.new
$alphabet = ["A", "B", "C"]
#count = 0
#multiOrg = 0
#peOrg = 0
#eeOrg = 0
#threads = Array.new
$testCaseArray.each do |thisCase|
$alphabet[#count] = Thread.new {
puts "working one"
Thread.current["tBrowser"] = Watir::Browser.new :ff
puts "working two"
if ((thisCase.declareOrg().downcase == "multicurrency") || (thisCase.declareOrg().downcase == "mc"))
currentOrg = $multicurrencyOrgArray[#multiOrg]
#multiOrg += 1
elsif ((thisCase.declareOrg().downcase == "enterprise") || (thisCase.declareOrg().downcase == "ee"))
currentOrg = $eeOrgArray[#eeOrg]
#eeOrg += 1
else #default to single currency PE
currentOrg = $peOrgArray[#peOrg]
#peOrg += 1
end
setupOrg(currentOrg, thisCase.testCaseID, currentOrg.layoutDirectory)
runningTest = thisCase.actualTest()
if runningTest.crashed != "crashed" #changed this to read the attr_reader isntead of the deleted caseStatus method from TestCase.rb
cleanupOrg(thisCase.testCaseID, currentOrg.layoutDirectory)
end
#threads << Thread.current
}
#count += 1
end
#threads.each do |thisThread|
thisThread.join
end
writeReport($allTestCaseResults)
end
def self.setupOrg(thisOrg, caseID, layoutPath)
begin
thisOrg.logIn
pkg = PackageHandler.new
basicInstalled = "false"
counter = 0
until ((basicInstalled == "true") || (counter == 5))
pkg.basicInstaller()
if Thread.current["tBrowser"].text.include? "You have attempted to access a page"
thisOrg.logIn
else
basicInstalled = "true"
end
counter +=1
end
if !((caseID.include? "bmxb") || (caseID.include? "BMXB"))
moduleInstalled = "false"
counter2 = 0
until ((moduleInstalled == "true") || (counter == 5))
pkg.packageInstaller(caseID)
if Thread.current["tBrowser"].text.include? "You have attempted to access a page"
thisOrg.logIn
else
moduleInstalled = "true"
end
counter2 +=1
end
end
installPageLayouts(layoutPath)
rescue
$allTestCaseResults << TestCaseResult.new(caseID, caseID, 1, "SETUP FAILED!" + "<p>#{$!}</p><p>#{$#}</p>").hashEmUp
writeReport($allTestCaseResults)
end
end
def self.cleanupOrg(caseID, layoutPath)
begin
uninstallPageLayouts(layoutPath)
pkg = PackageHandler.new
pkg.packageUninstaller(caseID)
Thread.current["tBrowser"].close
rescue
$allTestCaseResults << TestCaseResult.new(caseID, caseID, 1, "CLEANUP FAILED!" + "<p>#{$!}</p><p>#{$#}</p>").hashEmUp
writeReport($allTestCaseResults)
end
end
end
The output it's generating is:
working one
working one
working one
It's not opening a browser or doing any of the subsequent code.
It looks like the code is having the problem mentioned in the Thread class documentation:
If we don't call thr.join before the main thread terminates, then all
other threads including thr will be killed.
Basically your main thread is finishing pretty instantaneously. However, the threads, which create browsers, take a lot longer than that. As result the threads get terminated before the browser opens.
By adding a long sleep at the end, you can see that your browsers can be opened by your code:
require 'watir-webdriver'
$chunkythread = ["A", "B", "C"]
$chunkythread.each do |z|
puts "pshaw"
Thread.new{
Thread.current["testwords"] = "ohai " + z.to_s
Thread.current["myBrowser"] = Watir::Browser.new :ff
puts Thread.current["testwords"] }
end
sleep(300)
However, for more reliability, you should join all the threads at the end:
require 'watir-webdriver'
threads = []
$chunkythread = ["A", "B", "C"]
$chunkythread.each do |z|
puts "pshaw"
threads << Thread.new{
Thread.current["testwords"] = "ohai " + z.to_s
Thread.current["myBrowser"] = Watir::Browser.new :ff
puts Thread.current["testwords"] }
end
threads.each { |thr| thr.join }
For the actual code example, putting #threads << Thread.current will not work. The join will be evaluating like #threads is empty. You could try doing the following:
$testCaseArray.each do |thisCase|
#threads << Thread.new {
puts "working one"
Thread.current["tBrowser"] = Watir::Browser.new :ff
# Do your other thread stuff
}
$alphabet[#count] = #threads.last
#count += 1
end
#threads.each do |thisThread|
thisThread.join
end
Note that I am not sure why you want to store the threads in $alphabet. I put in the $alphabet[#count] = #threads.last, but could be removed if not in use.
I uninstalled Watir 5.0.0 and installed Watir 4.0.2, and now it works fine.

Ruby script error

I've got a ruby script
#!/usr/bin/ruby
require 'rubygems'
require 'mechanize'
require 'nokogiri'
require 'highline/import'
require 'stringio'
#Change based on Semester
$term = '09'
$year = '2012'
$frequency = 4 #Number of Seconds between check requests
$agent = Mechanize.new
$agent.redirect_ok = true
$agent.user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.11 Safari/535.19"
$agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
#Uber simway to colorize outputin
class String
def color(c)
colors = {
:black => 30,
:red => 31,
:green => 32,
:yellow => 33,
:blue => 34,
:magenta => 35,
:cyan => 36,
:white => 37
}
return "\e[#{colors[c] || c}m#{self}\e[0m"
end
end
#Logins, Gets the Courses, Returns Courses Obj with Name/URL/Tools for each
def login(username, password)
#Login to the system!
page = $agent.get("https://auth.vt.edu/login?service=https://webapps.banner.vt.edu/banner-cas-prod/authorized/banner/SelfService")
login = page.forms.first
login.set_fields({
:username => username,
:password => password
})
if (login.submit().body.match(/Invalid username or password/)) then
return false
else
return true
end
end
#Gets Course Information
def getCourse(crn)
begin
courseDetails = Nokogiri::HTML( $agent.get(
"https://banweb.banner.vt.edu/ssb/prod/HZSKVTSC.P_ProcComments?CRN=#{crn}&TERM=#{$term}&YEAR=#{$year}"
).body)
rescue
return false #Failed to get course
end
#Flatten table to make it easier to work with
course = {}
dataSet = false
course[:title] = courseDetails.css('td.title').last.text.gsub(/-\ +/, '')
course[:crn] = crn
courseDetails.css('table table tr').each_with_index do |row|
#If we have a dataSet
case dataSet
when :rowA
[ :i, :days, :end, :begin, :end, :exam].each_with_index do |el, i|
if row.css('td')[i] then
course[el] = row.css('td')[i].text
end
end
when :rowB
[ :instructor, :type, :status, :seats, :capacity ].each_with_index do |el, i|
course[el] = row.css('td')[i].text
end
end
dataSet = false
#Is there a dataset?
row.css('td').each do |cell|
case cell.text
when "Days"
dataSet = :rowA
when "Instructor"
dataSet = :rowB
end
end
end
return course
end
#Registers you for the given CRN, returns true if successful, false if not
def registerCrn(crn)
#Follow Path
$agent.get("https://banweb.banner.vt.edu/ssb/prod/twbkwbis.P_GenMenu?name=bmenu.P_MainMnu")
reg = $agent.get("https://banweb.banner.vt.edu/ssb/prod/hzskstat.P_DispRegStatPage")
dropAdd = reg.link_with(:href => "/ssb/prod/bwskfreg.P_AddDropCrse?term_in=#{$year}#{$term}").click
#Fill in CRN Box and Submit
crnEntry = dropAdd.form_with(:action => '/ssb/prod/bwckcoms.P_Regs')
crnEntry.fields_with(:id => 'crn_id1').first.value = crn
crnEntry['CRN_IN'] = crn
add = crnEntry.submit(crnEntry.button_with(:value => 'Submit Changes')).body
if add =~ /#{crn}/ && !(add =~ /Registration Errors/) then
return true
else
return false
end
end
#Main loop that checks the availaibility of each courses and fires to registerCrn on availaibility
def checkCourses(courses)
requestCount = 0
startTime = Time.new
loop do
system("clear")
requestCount += 1
nowTime = Time.new
puts "Checking Availaibility of CRNs".color(:yellow)
puts "--------------------------------\n"
puts "Started:\t#{startTime.asctime}".color(:magenta)
puts "Now: \t#{nowTime.asctime}".color(:cyan)
puts "Request:\t#{requestCount} (Once every #{$frequency} seconds)".color(:green)
puts "--------------------------------\n\n"
courses.each_with_index do |c, i|
puts "#{c[:crn]} - #{c[:title]}".color(:blue)
course = getCourse(c[:crn])
next unless course #If throws error
puts "Availaibility: #{course[:seats]} / #{course[:capacity]}".color(:red)
if (course[:seats] =~ /Full/) then
else
if (registerCrn(c[:crn])) then
puts "CRN #{c[:crn]} Registration Sucessfull"
courses.slice!(i)
else
puts "Couldn't Register"
end
end
print "\n"
end
sleep $frequency
end
end
#Add courses to be checked
def addCourses
crns = []
loop do
system("clear")
puts "Your CRNs:".color(:red)
crns.each do |crn|
puts " -> #{crn[:title]} (CRN: #{crn[:crn]})".color(:magenta)
end
#Prompt for CRN
alt = (crns.length > 0) ? " (or just type 'start') " : " "
input = ask("\nEnter a CRN to add it#{alt}".color(:green) + ":: ") { |q| q.echo = true }
#Validate CRN to be 5 Digits
if (input =~ /^\d{5}$/) then
#Display CRN Info
c = getCourse(input.to_s)
puts "\nCourse: #{c[:title]} - #{c[:crn]}".color(:red)
puts "--> Time: #{c[:begin]}-#{c[:end]} on #{c[:days]}".color(:cyan)
puts "--> Teacher: #{c[:instructor]}".color(:cyan)
puts "--> Type: #{c[:type]} || Status: #{c[:status]}".color(:cyan)
puts "--> Availability: #{c[:seats]} / #{c[:capacity]}\n".color(:cyan)
#Add Class Prompt
add = ask("Add This Class? (yes/no)".color(:yellow) + ":: ") { |q| q.echo = true }
crns.push(c) if (add =~ /yes/)
elsif (input == "start") then
checkCourses(crns)
end
end
end
def main
system("clear")
puts "Welcome to CourseAdd by mil".color(:blue)
username = ask("PID ".color(:green) + ":: ") { |q| q.echo = true }
password = ask("Password ".color(:green) + ":: " ) { |q| q.echo = "*" }
system("clear")
if login(username, password) then
addCourses
else
puts "Invalid PID/Password"
exit
end
end
main
but when I run ruby Untitled.rb it give me this error.
/System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/rubygems/custom_require.rb:31:in `gem_original_require': no such file to load -- mechanize (LoadError)
from /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/lib/ruby/1.8/rubygems/custom_require.rb:31:in `require'
from /Users/user/Desktop/Untitled.rb:3
What does this mean and how can I fix it? I'm not sure if I need to be doing this through an IDE or if terminal works. I'm brand new to ruby so I honestly have not a clue what the issue could be.
You need to install mechanize. In your terminal, type:
gem install mechanize
Retry your script when it finishes installing. If you have other gems that are missing, you can use the same command to install them.
gem install <gem name>

Resources