Ruby <NoMethodError> - ruby

I'm using the following script to download FFFFound entire directory images but im having an issue, after the first batch of files Im getting this error message:
ffffound_mirror_db.rb:45in 'block in populate_db': undefined method 'inner_html' for nil:NilClass <NoMethodError>
from ffffound_mirror_db.rb:39:in 'each'
from ffffound_mirror_db.rb:39:in 'populate_db'
from ffffound_mirror_db.rb:190:in <main>
I'm trying to download all the 99 pages of a directory so the offset has to increase 25 every batch, eg: 0, 25, 50, 75, 100
#!/usr/bin/ruby
require 'rubygems'
require 'etc'
require 'hpricot'
require 'json'
require 'open-uri'
require 'sqlite3'
require 'time'
require 'date'
require 'fileutils'
def populate_db(db, user, type)
domain = "http://ffffound.com/"
offset = 0
images_sql = <<EOS
INSERT OR REPLACE INTO
images (id, url, src, title, orig_url, orig_src, count, date, related)
values (:id, :ffffound_url, :ffffound_img, :title, :orig_url, :orig_img, :count, :date, :rel)
EOS
images_ins = db.prepare(images_sql)
# related_ins = db.prepare( "insert into related values (?, ?, ?)" )
img = []
while
if user == "all" # wow, this is naughty
doc = Hpricot(open("#{ domain }/?offset=#{ offset }&"))
else
doc = Hpricot(open("#{ domain }/home/#{ user }/#{ type }/?offset=#{ offset }&"))
end
images = (doc/"blockquote.asset")
puts "Got #{ images.size.to_s } images at offset #{ offset.to_s }"
break if (images.size == 0)
images.each do |image|
# can I make this block into a method somehow?
info = {}
# image title
title_elem = (image/"div.title")
info[:title] = title_elem.at("a").inner_html
# original source image
src_elem = (image/"div.title")
info[:orig_url] = src_elem.at("a")["href"]
# from description, break out img url, date posted (relative!), count
desc_elem = (image/"div.description")
desc = desc_elem.inner_html
info[:orig_img] = desc.gsub(/<br ?\/?>.*/, "")
datestr = desc.gsub(/.*<br ?\/?>/, "")
datestr = datestr.gsub(/<a h.*/, "")
datestr = datestr+" +0900" # ffffound uses Japanese time, UTC +0900
begin
dt = Time.parse(datestr)
rescue
end
info[:date] = dt.to_i
count = desc_elem.at("a").inner_text
count = count.gsub(/[\D]/, "")
info[:count] = count
# ffffound image URL and page URL, and ffffound ID (could generate
# URL from ID but would lose ?c form; src would lose _m)
image_block = (image/"table td")
ffffound_url = image_block.at("a")['href']
ffffound_img = image_block.at("img")['src']
id = ffffound_img
id = ffffound_img.split('/')[6]
id = id.gsub(/_.*/, "")
info[:id] = id
info[:ffffound_url] = ffffound_url
info[:ffffound_img] = ffffound_img
download_file(ffffound_img, id)
# might as well get related asset IDs
rel = Array.new
relateds = (image/"div.related_to_item_xs")
relateds.each do |related|
path = related.at("a")['href']
id = path[ path.index(/\//, 2)+1 .. -1 ]
rel.push(id)
# TODO normalised table for related IDs
end
info[:rel] = rel.join(",")
img.unshift(info)
# put in db
images_ins.execute(info)
end
break if (images.size < 25) # more efficient than doing another fetch
offset = offset + 25
end
puts "Got #{ img.size } images"
end
def create_db(db)
images = <<EOC
CREATE TABLE IF NOT EXISTS
images (id TEXT PRIMARY KEY,
url TEXT,
src TEXT,
title TEXT,
orig_url TEXT,
orig_src TEXT,
date INTEGER,
count INTEGER,
related TEXT,
posted BOOL);
EOC
related = <<EOC
CREATE TABLE IF NOT EXISTS
related (id INTEGER PRIMARY KEY,
source INTEGER
related INTEGER);
EOC
tumblr = <<EOC
CREATE TABLE tumblr (id INTEGER PRIMARY KEY,
ffffound_id TEXT,
tumblr_id INTEGER,
create_status INTEGER,
edit_status INTEGER);
EOC
db.execute(images)
db.execute(related)
return true
end
def download_file(url, id)
# TODO file type awareness
# does it exist?
if not File.exist?('images/'+id+'.jpg')
writeOut = open("images/"+id+'.jpg', 'wb')
writeOut.write(open(url).read)
writeOut.close
puts ' downloaded ' + id
end
end
def create_paths()
['images', 'db'].each do |path|
if not File.exist?(path)
FileUtils.mkdir(path)
end
end
end
# this needs work (is there a more idiomatic way to do this?)
user = ARGV[0]
type = ARGV[1] || 'found'
if not user
puts "A ffffound username must be supplied"
exit
else
if user == "--all"
puts "Invoked for all posts"
user = "all"
end
puts "Invoked for posts by #{user} of type #{type}"
end
create_paths()
path = 'db/ffffound-'+user+'.db' # ick
db = SQLite3::Database.new(path)
create_db(db)
populate_db(db, user, type)
exit
# puts img.to_json
# DONE puts img.to_database_table(s)

In these 2 parts:
title_elem = (image/"div.title")
info[:title] = title_elem.at("a").inner_html
desc_elem = (image/"div.description")
desc = desc_elem.inner_html
You don't check if elem exists, but calling inner_html right after. So, basically, if desc_elem is nil, you call nil.inner_html which raises exception, because there is no such method for nil.
To fix this, replace corresponding lines with(for Ruby without Rails)
title_elem = (image/"div.title")
info[:title] = title_elem.at("a").inner_html unless title_elem.at("a").nil?
desc_elem = (image/"div.description")
desc = desc_elem.inner_html unless desc_elem.nil?
In Rails there is a .try method, which prevents such exception raising, so
title_elem = (image/"div.title")
info[:title] = title_elem.at("a").try(:inner_html)
desc_elem = (image/"div.description")
desc = desc_elem.try(:inner_html)
is a solution if you are using Rails.

Related

Can mechanize for Ruby filter the contents of a <select> by class?

Sorry, but I didn't find the documentation enlightening at all. Basically, I am trying to iterate through a where some options are not valid. The ones I want have 'class="active"'. Can I do that with mechanize? Here's what I have so far:
class Scraper
def init
mech = Mechanize.new
page = mech.get('url')
#Now go through the <select> to get product numbers for the different flavors
form = page.form_with(:id => 'twister')
select = form.field_with(:name => 'dropdown_selected_flavor_name')
select.options.each do |o|
if (o.text != "")
value = o
end
productNumber = trim_pn(value.to_s[2..12])
puts productNumber
end
end
#Checks validity of product number and removes excess characters if necessary
def trim_pn(pn)
if (pn[0] == ",")
pn = pn[1..-1]
end
return pn
end
end
p = Scraper.new
p.init
All that does is grabs the product number and removes some extra info that I don't want. I thought replacing the .each do with this:
select.options_with(:class => 'active').each do |o|
if (o.text != "")
value = o
end
end
But that throws "undefined method 'dom_class' for Mechanize:Form:Option blah blah." Is there are different way I should be approaching this?

What causes the error "undefined local variable or method `csv' for main:Object"?

I am trying to write a rake task for importing a CSV file into multiple models. The code compiles without error, but I get this error message when I attempt to run it:
rake aborted! NameError: undefined local variable or method csv' for
main:Object
/Users/rickcasey/Projects/Programming/wfrails/lib/tasks/import_partial.rake:28:in
block in '
Here is the script:
desc "Imports the CSV file "
task :import_partial => :environment do
require 'csv'
csv.foreach('public/partial.csv', :headers => true) do |row|
# create records in independent tables
# create the Company object
this_company_name = row.to_hash.slice(*%w[county_name])
if !(Company.exists?(company_name: this_company_name))
Companies.create(row.to_hash.slice(*%w[company_name operator_num]))
end
thecompany = Company.find(this_company_name)
company_id = thecompany.id
# create the County object
this_county_name = row.to_hash.slice(*%w[county])
if !(County.exists?(county_name: this_county_name))
Counties.create(county_name: this_county_name)
end
thecounty = County.find(this_county_name)
county_id = thecounty.id
# create the GasType object
this_gastype_name = row.to_hash.slice(*%w[gas_type])
if !(GasType.exists?(gastype_name: this_gastype_name))
GasType.create(gastype_name: this_gastype_name)
end
thegastype = GasType.find(this_gastype_name)
gastype_id = thegastype.id
# create the Field object
this_field_name = row.to_hash.slice(*%w[field])
if !(Field.exists?(field_name: this_field_name))
Field.create(field_name: this_field_name, field_code: field_code)
end
thefield = Field.find(this_field_name)
field_id = thefield.id
# create the Formations object
this_formation_name = row.to_hash.slice(*%w[formation])
if !(Formation.exists?(formation_name: this_formation_name))
Counties.create(formation: this_formation_name, formation_code: formation_code)
end
theformation = Formation.find(this_formation_name)
formation_id = theformation.id
# debugging:
puts "company_id:", company_id
puts "county_id:", county_id
puts "gastype_id:", gastype_id
puts "field_id:", field_id
puts "formation_id:", formation_id
# create records in dependent tables:
# Use the record id's from above independent table create records containing foreign keys:
#Facilities.create(row.to_hash.slice(*%w[dir_e_w dir_n_s dist_e_w dist_n_s facility_name facility_num ground_elev lat long meridian qtrqtr range sec twp utm_x utm_y])
#Wells.create(row.to_hash.slice(*%w[api_county_code api_seq_num first_prod_date form_status_date formation_status sidetrack_num spud_date status_date td_date test_date wbmeasdepth wbtvd well_bore_status well_name])
end
end
My environment is: ruby 2.1.2p95, Rails 4.1.1
This is quite unclear, and have not found an example of similar error with an answer I understand yet....any help much appreciated!
I believe the error is in this line
csv.foreach('public/partial.csv', :headers => true) do |row|
It should be
CSV.foreach('public/partial.csv', :headers => true) do |row|
I believe the class name is uppercase - CSV.foreach, not csv.foreach.

Calling multiple methods on a CSV object

I have constructed an Event Manager class that performs parsing actions on a CSV file, and produces html letters using erb. It is part of a jumpstart labs tutorial
The program works fine, but I am unable to call multiple methods on an object without the earlier methods interfering with the later methods. As a result, I have opted to create multiple objects to call instance methods on, which seems like a clunky inelegant solution. Is there a better way to do this, where I can create a single new object and call methods on it?
Like so:
eventmg = EventManager.new("event_attendees.csv")
eventmg.print_valid_phone_numbers
eventmg_2 = EventManager.new("event_attendees.csv")
eventmg_2.print_zipcodes
eventmg_3 = EventManager.new("event_attendees.csv")
eventmg_3.time_targeter
eventmg_4 = EventManager.new("event_attendees.csv")
eventmg_4.day_of_week
eventmg_5 = EventManager.new("event_attendees.csv")
eventmg_5.create_thank_you_letters
The complete code is as follows
require 'csv'
require 'sunlight/congress'
require 'erb'
class EventManager
INVALID_PHONE_NUMBER = "0000000000"
Sunlight::Congress.api_key = "e179a6973728c4dd3fb1204283aaccb5"
def initialize(file_name, list_selections = [])
puts "EventManager Initialized."
#file = CSV.open(file_name, {:headers => true,
:header_converters => :symbol} )
#list_selections = list_selections
end
def clean_zipcode(zipcode)
zipcode.to_s.rjust(5,"0")[0..4]
end
def print_zipcodes
puts "Valid Participant Zipcodes"
#file.each do |line|
zipcode = clean_zipcode(line[:zipcode])
puts zipcode
end
end
def clean_phone(phone_number)
converted = phone_number.scan(/\d/).join('').split('')
if converted.count == 10
phone_number
elsif phone_number.to_s.length < 10
INVALID_PHONE_NUMBER
elsif phone_number.to_s.length == 11 && converted[0] == 1
phone_number.shift
phone_number.join('')
elsif phone_number.to_s.length == 11 && converted[0] != 1
INVALID_PHONE_NUMBER
else
phone_number.to_s.length > 11
INVALID_PHONE_NUMBER
end
end
def print_valid_phone_numbers
puts "Valid Participant Phone Numbers"
#file.each do |line|
clean_number = clean_phone(line[:homephone])
puts clean_number
end
end
def time_targeter
busy_times = Array.new(24) {0}
#file.each do |line|
registration = line[:regdate]
prepped_time = DateTime.strptime(registration, "%m/%d/%Y %H:%M")
prepped_time = prepped_time.hour.to_i
# inserts filtered hour into the array 'list_selections'
#list_selections << prepped_time
end
# tallies number of registrations for each hour
i = 0
while i < #list_selections.count
busy_times[#list_selections[i]] += 1
i+=1
end
# delivers a result showing the hour and the number of registrations
puts "Number of Registered Participants by Hour:"
busy_times.each_with_index {|counter, hours| puts "#{hours}\t#{counter}"}
end
def day_of_week
busy_day = Array.new(7) {0}
d_of_w = ["Monday:", "Tuesday:", "Wednesday:", "Thursday:", "Friday:", "Saturday:", "Sunday:"]
#file.each do |line|
registration = line[:regdate]
# you have to reformat date because of parser format
prepped_date = Date.strptime(registration, "%m/%d/%y")
prepped_date = prepped_date.wday
# adds filtered day of week into array 'list selections'
#list_selections << prepped_date
end
i = 0
while i < #list_selections.count
# i is minus one since days of week begin at '1' and arrays begin at '0'
busy_day[#list_selections[i-1]] += 1
i+=1
end
#busy_day.each_with_index {|counter, day| puts "#{day}\t#{counter}"}
prepared = d_of_w.zip(busy_day)
puts "Number of Registered Participants by Day of Week"
prepared.each{|date| puts date.join(" ")}
end
def legislators_by_zipcode(zipcode)
Sunlight::Congress::Legislator.by_zipcode(zipcode)
end
def save_thank_you_letters(id,form_letter)
Dir.mkdir("output") unless Dir.exists?("output")
filename = "output/thanks_#{id}.html"
File.open(filename,'w') do |file|
file.puts form_letter
end
end
def create_thank_you_letters
puts "Thank You Letters Available in Output Folder"
template_letter = File.read "form_letter.erb"
erb_template = ERB.new template_letter
#file.each do |line|
id = line[0]
name = line[:first_name]
zipcode = clean_zipcode(line[:zipcode])
legislators = legislators_by_zipcode(zipcode)
form_letter = erb_template.result(binding)
save_thank_you_letters(id,form_letter)
end
end
end
The reason you're experiencing this problem is because when you apply each to the result of CSV.open you're moving the file pointer each time. When you get to the end of the file with one of your methods, there is nothing for anyone else to read.
An alternative is to read the contents of the file into an instance variable at initialization with readlines. You'll get an array of arrays which you can operate on with each just as easily.
"Is there a better way to do this, where I can create a single new object and call methods on it?"
Probably. If your methods are interfering with one another, it means you're changing state within the manager, instead of working on local variables.
Sometimes, it's the right thing to do (e.g. Array#<<); sometimes not (e.g. Fixnum#+)... Seeing your method names, it probably isn't.
Nail the offenders down and adjust the code accordingly. (I only scanned your code, but those Array#<< calls on an instance variable, in particular, look fishy.)

ruby cgi wont return method calls, but will return parameters

my environment: ruby 1.9.3p392 (2013-02-22 revision 39386) [x86_64-linux]
The thing is, I can make ruby return the parameters sent over GET. but when i'm trying to use them as arguements to my methods in if/else, ruby wont return anything and I end up with a blank page.
ph and pm return correctly:
http://127.0.0.1/cgi-bin/test.rb?hostname=node00.abit.dk&macadd=23:14:41:51:63
returns:
node00.abit.dk 23:14:41:51:63
Connection to the database (MySQL) works fine
When I test the method newHostName it outputs correctly:
puts newHostName
returns (which is correct)
node25.abit.dk
the code:
#!/usr/bin/ruby
require 'cgi'
require 'sequel'
require 'socket'
require 'timeout'
DB = Sequel.connect(:adapter=>'mysql', :host=>'localhost', :database=>'nodes', :user=>'nodeuser', :password=>'...')
#cgi-part to work
#takes 2 parameters:
#hostname & macadd
cgi = CGI.new
puts cgi.header
p = cgi.params
ph = p['hostname']
pm = p['macadd']
def nodeLookup(hostnameargv)
hostname = DB[:basenode]
h = hostname[:hostname => hostnameargv]
h1 = h[:hostname]
h2 = h[:macadd]
ary = [h1, h2]
return ary
end
def lastHostName()
#TODO: replace with correct sequel-code and NOT raw SQL
DB.fetch("SELECT hostname FROM basenode ORDER BY id DESC LIMIT 1") do |row|
return row[:hostname]
end
end
def newHostName()
org = lastHostName
#Need this 'hack' to make ruby grep for the number
#nodename e.g 'node01.abit.dk'
var1 = org[4]
var2 = org[5]
var3 = var1 + var2
sum = var3.to_i + 1
#puts sum
sum = "node" + sum.to_s + ".abit.dk"
return sum
end
def insertNewNode(newhost, newmac)
newnode = DB[:basenode]
newnode.insert(:hostname => newhost, :macadd => newmac)
return "#{newnode.count}"
end
#puts ph
#puts pm
#puts newHostName
cgi.out() do
cgi.html do
begin
if ph == "node00.abit.dk"
puts newHostName
else
puts nodeLookup(ph)
end
end
end
end
I feel like im missing something here. Any help is very much appreciated!
//M00kaw
What about modify last lines of your code as followed? CGI HTML generation methods take a block and yield the return value of the block as their content. So you should make newHostName or nodeLookup(ph) as the return value of the block passed to cgi.html(), rather than puts sth, which prints the content to your terminal and return nil. That's why cgi.html() got an empty string (nil.to_s).
#puts newHostName
cgi.out() do
cgi.html do
if ph == "node00.abit.dk"
newHostName
else
nodeLookup(ph)
end
end
end
p.s. It's conventional to indent your ruby code with 2 spaces :-)

Ruby: Method doesn't run outside file where defined - "can't convert Symbol into Integer (TypeError)"

I'm a ruby newbie and this is my first (command-line for now) program.
First, some source.
file: AccessDb.rb
require 'mongo'
require 'json'
(...)
class AccessDb
def initialize dbname, collection #, username, password
#dbname = dbname
#collection = collection
#conn = Mongo::Connection.new
#db = #conn[dbname]
#coll = #db[collection]
end
def upsert_by_meta json
# print json
#coll.update({ :hash_md5 => json[:hash_md5] }, json, :upsert => true)
end
end
using
file: Downloader.rb
require 'curb'
require 'yaml'
require 'json'
(...)
class Downloader
def initialize(directory)
#PASS=nil
#COOKIE=nil
#filename=nil
#full_file_location = nil
#target_dir = directory
File.exists? #target_dir # File.directory? #target_dir
#c = Curl::Easy.new
curl_setup
#mongo = AccessDb.new "meta","meta"
end
def parse_link_info(url)
json = {}
json[:link_requested] = url
if #c.last_effective_url != url
then json[:link_final] = #c.last_effective_url end
json[:link_filename_requested] = #filename
#final_filename = #c.last_effective_url.split(/\?/).first.split(/\//).last
if #final_filename != #filename
then json[:link_filename_delivered] = #final_filename end
json[:link_filetime] = Time.at(#c.file_time).utc.to_s
json[:content_lenght] = #c.downloaded_content_length
json[:content_type] = #c.content_type
#hash = MovieHasher::compute_hash(#save_location)
#hash = MovieHasher::compute_hash(#save_location)
if !#hash.nil?
then json[:hash_bigfile] = #hash end
json[:hash_md5] = Digest::MD5.hexdigest(File.read(#save_location))
JSON.pretty_generate(json)
end
(json is some generated json file)
using code from Downloader.rb in the AccessDb.rb tests works perfectly, but when the method is used in Downloader.rb I get the following output:
D:/Dropbox/#code/PracaInz-Program/AccessDb.rb:20:in `[]': can't convert Symbol into Integer (TypeError)
from D:/Dropbox/#code/PracaInz-Program/AccessDb.rb:20:in `upsert_by_meta'
from D:/Dropbox/#code/PracaInz-Program/Downloader.rbw:158:in `block in add_links'
from D:/Dropbox/#code/PracaInz-Program/Downloader.rbw:148:in `each'
from D:/Dropbox/#code/PracaInz-Program/Downloader.rbw:148:in `add_links'
from D:/Dropbox/#code/PracaInz-Program/Downloader.rbw:189:in `<main>'
[Finished in 4.9s with exit code 1]
in a method code that perfectly works tested inside one file.. How can I write it so it can use symbols but works outside that specific file. Thx
def parse_link_info(url)
json = {}
json[:link_requested] = url
if #c.last_effective_url != url
then json[:link_final] = #c.last_effective_url end
json[:link_filename_requested] = #filename
#final_filename = #c.last_effective_url.split(/\?/).first.split(/\//).last
if #final_filename != #filename
then json[:link_filename_delivered] = #final_filename end
json[:link_filetime] = Time.at(#c.file_time).utc.to_s
json[:content_lenght] = #c.downloaded_content_length
json[:content_type] = #c.content_type
#hash = MovieHasher::compute_hash(#save_location)
#hash = MovieHasher::compute_hash(#save_location)
if !#hash.nil?
then json[:hash_bigfile] = #hash end
json[:hash_md5] = Digest::MD5.hexdigest(File.read(#save_location))
JSON.pretty_generate(json)
end
def add_links(url_array,cred=nil,ref=nil,cookie=nil)
link_setup(cred,ref,cookie)
url_array.each do |single_url|
#c.url=single_url
#filename = single_url.split(/\?/).first.split(/\//).last
#save_location = #target_dir + '\\' + #filename
# puts #save_location
#c.perform
File.open(#save_location,"wb").write #c.body_str
json = parse_link_info single_url
# puts json
id = #mongo.upsert_by_meta json
puts id
json["_id"] = id
File.open(#save_location + :"meta.json","w").write json
end
end
EDIT: more code (json definition), full trace
parse_link_info returns JSON.pretty_generate(json), i.e a string.
You pass the result of that into upsert_by_meta as its json parameter, which tries to access it as a hash (you do json[:hash_md5]) which you can't do with a string. String's [] method expects you to pass an integer (or a pair of integers) hence the method about not being able to convert the symbol to an integer
Looks like the code you have shown would work if parse_link_info just returned your json object rather than calling JSON.pretty_generate

Resources