My media converter app crashes when handling file names with spaces - ffmpeg

I wrote an app that uses ffmpeg to convert media files (.wav, .avi, .mp3, ... etc.). It works only with file names that have no spaces. When a file name with spaces is encountered, the app immediately closes. Can someone tell me if the string I'm using to call ffmpeg is correct, or need some characters escaped? Below is a fragment of the code:
...
...
...
#Select Media
os.chdir("c:\\d-Converter\\ffmpeg\\bin")
wrkdir = os.getcwd()
filelist = os.listdir(wrkdir)
self.formats1 = []
for filename in filelist:
(head, filename) = os.path.split(filename)
if filename.endswith(".avi") or filename.endswith(".mp4") or filename.endswith(".flv") or filename.endswith(".mov") or filename.endswith(".mpeg4") or filename.endswith(".mpeg") or filename.endswith(".mpg2") or filename.endswith(".wav") or filename.endswith(".mp3"):
self.formats1.append(filename)
self.format_combo1=wx.ComboBox(panel, size=(140, -1),value='Select Media', choices=self.formats1, style=wx.CB_DROPDOWN, pos=(300,50))
self.Bind(wx.EVT_COMBOBOX, self.fileFormats, self.format_combo1)
...
...
...
def fileFormats(self, e):
myFormats = {'audio': ('Select Format','.mp3', '.ogg', '.wav', '.wma'), 'video': ('Select Format','.flv','.mpg', '.mp4', '.mpeg')}
bad_file = ['Media not supported']
myFile = self.format_combo1.GetValue()
f_exten = (x for x in myFormats['audio'] + myFormats['video'] if myFile.endswith(x))
extension = f_exten.next()
if extension in myFormats['audio']:
self.format_combo2.SetItems(myFormats['audio'])
elif extension in myFormats['video']:
self.format_combo2.SetItems(myFormats['video'])
else:
self.format_combo2.SetItems(bad_file)
...
...
...
def convertButton(self, e):
unit1 = self.format_combo1.GetValue()
if unit1:
unit1 = self.repl_Wspace(unit1)
#Media Formats
unit2 = self.format_combo2.GetValue()
unit3 = self.format_combo3.GetValue()
unit4 = None
unit5 = self.format_combo5.GetValue()
bitRate = self.format_combo6.GetValue()
unit6 = bitRate
if unit3 == '-qmax':
unit4 = self.format_combo4.GetValue()
else:
pass
os.chdir("c:\\d-Converter\\ffmpeg\\bin")
wrkdir = os.getcwd()
newfile = unit1
stripped = newfile.strip('mpeg3aviovfl4w2c.') #Strips the extension from the original file name
progname='c:\\d-Converter\\ffmpeg\\bin\\ffmpeg.exe' + ' -i '
preset1_a='-vn -ar 44100 -ac 2 -ab'
preset1_b='-f mp3 '
preset_mp3='.mp3'
chck_unit1 = self.my_endswith(unit1)
while True:
if unit5 == 'video to mp3':
if unit6 == 'k/bs' or unit6 == '':
amsg = wx.MessageDialog(None, 'You must select a bit rate.', 'Media Converter', wx.ICON_INFORMATION)
amsg.ShowModal()
amsg.Destroy()
break
elif unit5 == 'video to mp3' and unit6 != 'k/bs' or unit6 != '':
self.button.Disable()
self.button2.Enable()
self.format_combo1.Disable()
self.format_combo2.Disable()
self.format_combo3.Disable()
self.format_combo4.Disable()
self.format_combo5.Disable()
self.format_combo6.Disable()
startWorker(self.LongTaskDone, self.LongTask3, wargs=(progname, wrkdir, unit1, preset1_a, unit6, preset1_b, stripped, preset_mp3))
break
elif unit1 != unit1.endswith(".mpg") or unit1.endswith(".mpeg") or unit1.endswith(".avi") or unit1.endswith(".mp4") or unit1.endswith(".flv"):
bmsg = wx.MessageDialog(None, 'You must select a valid format to convert to .mp3.', 'Media Converter', wx.ICON_INFORMATION)
bmsg.ShowModal()
bmsg.Destroy()
break
else:
pass
if unit1 == 'Select Media' or unit1 == '':
amsg = wx.MessageDialog(None, 'You must select a media file!', 'Media Converter', wx.ICON_INFORMATION)
amsg.ShowModal()
amsg.Destroy()
break
elif unit2 == 'Select Format' or unit2 == '' or unit2 == chck_unit1:
amsg = wx.MessageDialog(None, 'You must select a valid format', 'Media Converter', wx.ICON_INFORMATION)
amsg.ShowModal()
amsg.Destroy()
break
elif unit3 == 'Select Quality' or unit3 == '':
amsg = wx.MessageDialog(None, 'You must select quality', 'Media Converter', wx.ICON_INFORMATION)
amsg.ShowModal()
amsg.Destroy()
break
elif unit3 != 'Select Quality' or unit3 != '':
self.format_combo5.Disable()
if unit3 == '-qmax':
if unit4 == '0' or unit4 == '':
amsg = wx.MessageDialog(None, 'You must select number between 1-8.', 'Media Converter', wx.ICON_INFORMATION)
amsg.ShowModal()
amsg.Destroy()
break
else:
self.button.Disable()
self.button2.Enable()
self.format_combo1.Disable()
self.format_combo2.Disable()
self.format_combo3.Disable()
self.format_combo4.Disable()
self.format_combo5.Disable()
startWorker(self.LongTaskDone, self.LongTask2, wargs=(progname,wrkdir,unit1,unit3,unit4,stripped,unit2))
break
elif unit3 == '-sameq':
self.button.Disable()
self.button2.Enable()
self.format_combo1.Disable()
self.format_combo2.Disable()
self.format_combo3.Disable()
self.format_combo4.Disable()
self.format_combo5.Disable()
startWorker(self.LongTaskDone, self.LongTask, wargs=(progname,wrkdir,unit1,unit3,stripped,unit2))
break
def LongTask(self, progname, wrkdir, unit1, unit3, stripped, unit2):
convert_file1 = progname + wrkdir + '\\' + unit1 + ' ' + unit3 + ' ' + stripped + unit2
self.statusbar.SetStatusText("Converting: " + unit1 + "...")
os.system(convert_file1)
print convert_file1
def LongTask2(self, progname, wrkdir, unit1, unit3, unit4, stripped, unit2):
convert_file2 = progname + wrkdir + '\\' + unit1 + ' ' + unit3 + ' ' + unit4 + ' ' + stripped + unit2
self.statusbar.SetStatusText("Converting: " + unit1 + "...")
os.system(convert_file2)
...
...
...

Don't use os.system to execute your command. Instead, use subprocess, with each of your arguments as a separate entry in the arguments list:
import subprocess
progname='c:\\d-Converter\\ffmpeg\\bin\\ffmpeg.exe'
subprocess.check_call([progname, '-i', ... other args here])
This will ensure your arguments aren't interpreted incorrectly, and aren't susceptible to injection attacks.

Related

Ruby For loop isn't running as expected

I have this array of arrays:
WIN_COMBINATIONS = [
[0,1,2],
[3,4,5],
[6,7,8],
[0,3,6],
[0,4,8],
[6,4,2],
[1,4,7],
[2,5,8]
]
and I am defining this method:
def won?(board)
for x in WIN_COMBINATIONS
win_index_1 = x[0]
win_index_2 = x[1]
win_index_3 = x[2]
p1 = board[win_index_1]
p2 = board[win_index_2]
p3 = board[win_index_3]
if p1 == 'X' && p2 == 'X' && p3 == 'X'
return x
else
false
end
end
end
and when
board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
The won? method returns every item in WIN_COMBINATIONS instead of false. I have no idea why and I would appreciate it if someone would please help.
In ruby each block, returns something even if you don't explicitly return a value, in your case the for block is returning the collection over you are iterating I think that your logic is correct but you need to make a small change, something like:
def won?(board)
for x in WIN_COMBINATIONS
win_index_1 = x[0]
win_index_2 = x[1]
win_index_3 = x[2]
p1 = board[win_index_1]
p2 = board[win_index_2]
p3 = board[win_index_3]
return x if p1 == "X" && p2 == "X" && p3 == "X"
end
false
end
The code above will return x if any of the sequences are valid, false in any other case (although you should follow the convention and return true or false if you are using the signature ?, other option is remove the ? and return nil instead of false). Hope this helps! 👍
Every block of code returns a value even if there is no explicit return keyword. for loop returns itself. With board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '] you never hit the return x statement and you get WIN_COMBINATIONS back.
If you replace return false with return [], it will allow you to avoid 'expected a collection that can be converted to an array with #to_ary or #to_a, but got false'. But Ruby has a bunch of nice methods to work with arrays.
By convention, methods that end with ? should return a boolean value. Your method returns an array when the if statement is true.
In your case, I'd rename the method and use the find method to iterate through WIN_COMBINATIONS:
def find_win_combination(board)
# You can use decomposition as in the next example `do |wi_0, wi_1, wi_2|`
win_combination = WIN_COMBINATIONS.find do |combination|
win_index_0 = combination[0]
win_index_1 = combination[1]
win_index_2 = combination[2]
# If it's true the find method will return the win combination
board[win_index_0] == "X" && board[win_index_1] == "X" && board[win_index_2] == "X"
end
# return an empty array if win_combination is not found
win_combination || []
end
board = [' ', ' ', ' ', 'X', 'X', 'X', ' ', ' ', ' ']
> find_win_combination(board)
=> [3, 4, 5]
board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
> find_win_combination(board)
=> []
If you need won? method to return a boolean method, just use any? method on WIN_COMBINATIONS. The result of any? will be returned as a result of won`?
def won?(board)
# Here is a decomposition of each item of `WIN_COMBINATIONS` into three variables
WIN_COMBINATIONS.any? do |wi_0, wi_1, wi_2|
board[wi_0] == "X" && board[wi_1] == "X" && board[wi_2] == "X"
end
end
board = [' ', ' ', ' ', 'X', 'X', 'X', ' ', ' ', ' ']
> won?(board)
=> true
board = [' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ']
> won?(board)
=> false

Ruby mediainfo find unique properites

I'm trying to scan a large hard drive (think 17TBs) to find a storage estimate for video files within specific folders. In addition, I am trying to find specific unique properties of the video files. The purpose of this is to make a case for a digital asset management system that can support the video files that we have going back to 2009. I'm using mediainfo to inspect each video.
I have the file size/storage count working, but i'm having trouble adding the hashes of video properties to an array in my loop. My goal is for media info to look at the specific properties of each video, put them into a hash and add that hash to an array. Then, once I have collected all the hashes of video properties, I would call uniq! on the array so that it would show me the unique video properties.
The output of my code currently returns the video properties for the last video over and over again. I can't see what I'm doing wrong. Any advice?
require 'yaml'
#!/usr/bin/ruby
library_path = ARGV[0]
files_list = Dir.glob("#{library_path}/**/*")
total_capture_scatch_and_exports_size = 0
video_audit = Hash.new()
video_info = []
codecs = Hash.new()
files_list.each do |filepath|
filename = File.basename(filepath.to_s)
filepath.chomp!
puts filename
folders_to_scan = ["/capture scratch/", "/capturescratch/", "/capture-scratch/", "/capture_scratch/", "exports", "export"]
file_size = File.size(filepath)
file_extension = File.extname(filepath)
if
folders_to_scan.any? { |folder| filepath.downcase.include? folder }
if
File.file?(filepath) && filename[0] != "." && file_extension != ".TIFF" && file_extension != ".TIF" && file_extension != ".tif" && file_extension != ".tiff" && file_extension != ".jpg" && file_extension != ".jpeg" && file_extension != ".JPG" && file_extension != ".JPEG"
duration = %x[mediainfo --Inform="General;%Duration/String3%" '#{filepath}'].chomp!
format = %x[mediainfo --Inform="General;%Format%" '#{filepath}'].chomp!
commercial_name = %x[mediainfo --Inform="General;%Format_Commercial_IfAny%" '#{filepath}'].chomp!
format_profile = %x[mediainfo --Inform="General;%Format_Profile%" '#{filepath}'].chomp!
writing_library = %x[mediainfo --Inform="General;%Encoded_Library%" '#{filepath}'].chomp!
video_audit[:filepath] = filepath
video_audit[:filename] = filename
video_audit[:duration] = duration
video_audit[:file_size] = file_size
video_audit[:format] = format
video_audit[:commercial_name] = commercial_name
video_audit[:format_profile] = format_profile
video_audit[:writing_library] = writing_library
video_audit[:file_extension] = file_extension
codecs[:filename] = filename
codecs[:format] = format
codecs[:commercial_name] = commercial_name
codecs[:format_profile] = format_profile
codecs[:writing_library] = writing_library
codecs[:file_extension] = file_extension
end
end
puts video_audit.to_yaml
puts codecs
video_info << codecs
total_capture_scatch_and_exports_size += file_size
end
puts "THE VIDEO INFO IS=======>>>> #{video_info}"
puts "THE UNIQUE CODECS ARE: #{video_info.uniq!}"
#1000**3 is for gigabytes (this is how finder on OSX calculates storage on the Drobo Harddrives)use 1024**3 ofr gibibytes
puts "The total filesize is : #{total_capture_scatch_and_exports_size/(1000**3).to_f} GB"
I figured it out. I was creating new hashes outside of the loop. A new hash needed to be created for each iteration so that it could then be added to the video_info array. Then I needed to remove the bang operator when I called uniq on video_info at the end of the script. Here's my final code:
require 'json'
#developed by Maile Thiesen
#!/usr/bin/ruby
library_path = ARGV[0]
files_list = Dir.glob("#{library_path}/**/*")
total_capture_scatch_and_exports_size = 0
counter = 0
video_info = []
files_list.each do |filepath|
filename = File.basename(filepath.to_s)
codecs = {}
filepath.chomp!
folders_to_scan = ["/capture scratch/", "/capturescratch/", "/capture-scratch/", "/capture_scratch/", "exports", "export"]
file_size = File.size(filepath)
file_extension = File.extname(filepath)
if
folders_to_scan.any? { |folder| filepath.downcase.include? folder }
if
File.file?(filepath) && filename[0] != "." && file_extension != ".TIFF" && file_extension != ".TIF" && file_extension != ".tif" && file_extension != ".tiff" && file_extension != ".jpg" && file_extension != ".jpeg" && file_extension != ".JPG" && file_extension != ".JPEG"
duration = %x[mediainfo --Inform="General;%Duration/String3%" '#{filepath}'].chomp!
format = %x[mediainfo --Inform="General;%Format%" '#{filepath}'].chomp!
commercial_name = %x[mediainfo --Inform="General;%Format_Commercial_IfAny%" '#{filepath}'].chomp!
format_profile = %x[mediainfo --Inform="General;%Format_Profile%" '#{filepath}'].chomp!
writing_library = %x[mediainfo --Inform="General;%Encoded_Library%" '#{filepath}'].chomp!
codecs[:format] = format
codecs[:commercial_name] = commercial_name
codecs[:format_profile] = format_profile
codecs[:writing_library] = writing_library
codecs[:file_extension] = file_extension
total_capture_scatch_and_exports_size += file_size
counter += 1
video_info << codecs
end
end
end
puts "THE UNIQUE CODECS ARE: #{JSON.pretty_generate(video_info.uniq)}"
puts "THE TOTAL FILESIZE IS : #{total_capture_scatch_and_exports_size/(1000**3).to_f} GB"

ruby syntax error, unexpected keyword_end, expecting end-of-input (SyntaxError)

I keep getting this error (syntax error, unexpected keyword_end, expecting end-of-input) when running my code. I've looked and it seems like all the ends match up. I seem to be missing something. Any help would be appreciated.
$charClass = $charClass.to_i
$lexeme= Array.new(100)
$nextChar = $nextChar.to_s
$lexLen = $lexLen.to_i
$token = $token.to_i
$nextToken = $nextToken.to_i
$input = File.read('user input.txt')
class String
def valid_integer?
true if Integer(self) rescue false
end
end
LETTER = 0
DIGIT = 1
UNKNOWN = 99
INT_LIT = 10
IDENT = 11
ASSIGN_OP = 20
ADD_OP = 21
SUB_OP = 22
MULT_OP = 23
DIV_OP = 24
LEFT_PAREN = 25
RIGHT_PAREN = 26
def lookup (ch)
case ch
when '('
addChar
$nextToken = LEFT_PAREN
when ')'
addChar
$nextToken = RIGHT_PAREN
when '+'
addChar
$nextToken = ADD_OP
when '-'
addChar
$nextToken = SUB_OP
when '*'
addChar
$nextToken = MULT_OP
when '/'
addChar
$nextToken = DIV_OP
else
addChar
$nextToken = EOF;
end
return $nextToken
end
def addChar
if $lexLen <= 98
$lexeme[$lexLen+1] = $nextChar;
$lexeme[$lexLen] = 0
else
puts "Error - $lexeme is too long"
end
end
def getChar()
if (($nextChar = $input.split("")) != EOF)
if $nextChar.match(/^[[:alpha:]]$/)
$charClass = LETTER
elsif $nextChar.valid_integer?
$charClass = DIGIT
else $charClass = UNKNOWN
end
$charClass = EOF
end
end
def getNonBlank()
while $nextChar =~ /^\s*$/
getchar()
end
end
def lex
$lexLen = 0
getNonBlank
case $charClass
when LETTER
addChar
getChar
while $charClass == LETTER || $charClass == DIGIT
addChar
getChar
end
$nextToken = IDENT
when DIGIT
addChar
getChar
while $charClass == DIGIT
addChar
getChar
end
next$token = INT_LIT
when UNKNOWN
lookup($nextChar)
getChar
when EOF
$nextToken = EOF
$lexeme[0] = 'E'
$lexeme[1] = 'O'
$lexeme[2] = 'F'
$lexeme[3] = 0
end
puts "Next $token is #{$nextToken}, Next $lexeme is #{$lexeme}"
return $nextToken
end
if $input.nil?
puts "ERROR = cannot open input"
else
getChar()
do lex()
while ($nextToken != EOF)
end
end
end
The do in line 132 is not supposed to be there. There are many other syntax errors in the code.

How do you use a variable to specify the width in ruby's printf method?

I'm trying to use a variable to specify the width in the printf method like so:
puts "MAXES #{last_name_max} #{first_name_max} #{email_address_max} #{zipcode_max} #{city_max} #{state_max} #{street_max} #{homephone_max}"
printf "%-#{last_name_max}s %-#{first_name_max}s %-#{email_address_max}s %-#{zipcode_max}s %-#{city_max}s %-#{state_max}s %-#{street_max}s %-#{homephone_max}s\n", 'LAST NAME', 'FIRST NAME', 'EMAIL', 'ZIPCODE', 'CITY', 'STATE', 'ADDRESS', 'PHONE'
queue.each do |attendee|
printf "%-#{last_name_max}s %-#{first_name_max}s %-#{email_address_max}s %-#{zipcode_max}s %-#{city_max}s %-#{state_max}s %-#{street_max}s %-#{homephone_max}s\n", attendee[:last_name], attendee[:first_name], attendee[:email_address], attendee[:zipcode], attendee[:city], attendee[:state], attendee[:street], attendee[:homephone]
end
I've been googling around and playing with irb, and I can't figure out what's wrong. This is the output I get:
Enter command: queue print
MAXES 7 6 33 5 11 2 18 12
LAST NAME FIRST NAME EMAIL ZIPCODE CITY STATE ADDRESS PHONE
Hasegan Audrey ffbbieucf#jumpstartlab.com 95667 Placerville CA 1570 Old Ranch Rd. 530-919-3000
Zielke Eli jbrabeth.buckley#jumpstartlab.com 92037 La Jolla CA 3024 Cranbrook Ct 858 405 3000
Tippit Meggie dgsanshamel#jumpstartlab.com 94611 Piedmont CA 28 Olive Ave. 510 282 4000
Enter command: q
In irb, printf("%-#{width}s", "ad") works, so I think that you could interpolate variables in printf. I printed out the variables I'm using in the line before, so they should be correct. And when I use numbers instead of the variables - printf "%-20s %-20s... - it works. I don't know what else could be wrong.
This is my full code:
require 'CSV'
puts 'Welcome to Event Reporter!'
print 'Enter command: '
command = gets.chomp
def clean(attribute, type)
if (type == 'regdate')
elsif (type == 'first_name')
elsif (type == 'last_name')
elsif (type == 'email_address')
elsif (type == 'homephone')
homephone = attribute
homephone = homephone.to_s.gsub(/\D/, '')
if (homephone.length < 10)
homephone = '0000000000'
elsif (homephone.length == 11)
if (homephone[0] == '1')
homephone[0] = ''
else
homephone = '0000000000'
end
elsif (homephone.length > 11)
homephone = '0000000000'
end
return homephone
elsif (type == 'street')
elsif (type == 'city')
elsif (type == 'state')
elsif (type == 'zipcode')
zipcode = attribute.to_s.rjust(5, "0")[0..4]
return zipcode
end
return attribute
end
queue = []
while (command != 'q') do
command = command.split
if (command[0] == 'load')
command[1] ? filename = command[1] : filename = 'event_attendees.csv'
attendees = CSV.open filename, headers: true, header_converters: :symbol
puts "Loaded #{filename}"
elsif (command[0] == 'find')
attribute = command[1]
criteria = command[2]
attendees.rewind
attendees.each do |attendee|
attendee_attribute = clean(attendee[attribute.to_sym], attribute)
queue << attendee if criteria.to_s.downcase.strip == attendee_attribute.to_s.downcase.strip
end
elsif (command[0] == 'queue')
if command[1] == 'count'
puts "Count: #{queue.length}"
elsif command[1] == 'clear'
queue = []
puts 'Queue cleared.'
elsif (command[1] == 'print')
queue.to_a.sort_by! {|obj| obj[command[3]]} if command[2] == 'by'
last_name_max, first_name_max, email_address_max, zipcode_max, city_max, state_max, street_max, homephone_max = 0, 0, 0, 0, 0, 0, 0, 0
queue.each do |attendee|
last_name_max = attendee[:last_name].length if attendee[:last_name].length > last_name_max.to_i
first_name_max = attendee[:first_name].length if attendee[:first_name].length > first_name_max.to_i
email_address_max = attendee[:email_address].length if attendee[:email_address].length > email_address_max.to_i
zipcode_max = attendee[:zipcode].length if attendee[:zipcode].length > zipcode_max.to_i
city_max = attendee[:city].length if attendee[:city].length > city_max.to_i
state_max = attendee[:state].length if attendee[:state].length > state_max.to_i
street_max = attendee[:street].length if attendee[:street].length > street_max.to_i
homephone_max = attendee[:homephone].length if attendee[:homephone].length > homephone_max.to_i
end
puts "MAXES #{last_name_max} #{first_name_max} #{email_address_max} #{zipcode_max} #{city_max} #{state_max} #{street_max} #{homephone_max}"
printf "%-#{last_name_max}s %-#{first_name_max}s %-#{email_address_max}s %-#{zipcode_max}s %-#{city_max}s %-#{state_max}s %-#{street_max}s %-#{homephone_max}s\n", 'LAST NAME', 'FIRST NAME', 'EMAIL', 'ZIPCODE', 'CITY', 'STATE', 'ADDRESS', 'PHONE'
queue.each do |attendee|
printf "%-#{last_name_max}s %-#{first_name_max}s %-#{email_address_max}s %-#{zipcode_max}s %-#{city_max}s %-#{state_max}s %-#{street_max}s %-#{homephone_max}s\n", attendee[:last_name], attendee[:first_name], attendee[:email_address], attendee[:zipcode], attendee[:city], attendee[:state], attendee[:street], attendee[:homephone]
end
elsif (command[1] == 'save')
output_file = CSV.open(command[3], 'w')
output_file << ['last_name', 'first_name', 'email_address', 'zipcode', 'city', 'state', 'street', 'homephone']
queue.each do |attendee|
output_file << [attendee[:last_name], attendee[:first_name], attendee[:email_address], attendee[:zipcode], attendee[:city], attendee[:state], attendee[:street], attendee[:homephone]]
end
end
elsif (command[0] == 'help')
puts "load <filename.csv>\nqueue count\nqueue clear\nqueue print\nqueue print by <attribute>\nqueue save to <filename.csv>\nfind <attribute> <criteria>" if !command[1]
puts 'Loads <filename.csv> (event_attendees.csv if not specified)' if command[1] == 'load'
if (command[1] == 'queue')
puts 'The cumulative number of attendees who match the criteria searched for.' if command[2] == 'count'
puts 'Clears the queue of matched attendees.' if command[2] == 'clear'
puts 'Prints out a table of the matched attendees.' if command[2] == 'print' && !command[3]
puts 'Prints out a table of the matched attendees sorted by <attribute>' if command[2] == 'print' && command[3] == 'by'
puts 'Saves the queue to the file.' if command[2] == 'save'
end
puts 'Adds matches to the queue' if command[1] == 'find'
end
print 'Enter command: '
command = gets.chomp
end
When computing the max length of a field, you didn't count the header in.
puts "MAXES #{last_name_max} #{first_name_max}"
#=> 7 6
"LAST NAME" => length 9
"FIRST NAME" => length 10
printf "%-#d" won't truncate the output to the width as specified in #. The first two field overflows, and the following fields moved further as expected.

Limitation on number of result in search engine scraping

I am scraping bing search engine using Mechanize. But I get only max 200 results programmatically if I execute same search query on bing.com it returns 1400 results. What is gotcha here?
def generate_profiles_from_group(options={})
raise "TypeError", "Invalid Arguments" unless options.is_a? Hash
group = options[:group] if options.has_key? :group
query = build_query(options)
page = bing_search(query)
contacts_stack = extract_contacts_from_bing_page page: page
bing_links_stack = bing_links page
return contacts_stack, bing_links_stack
end
def extract_contacts_from_bing_page(options)
page = options[:page]
company = options[:company] || nil
title = options[:title] || nil
stack = []
while true
page.parser.search('h3 a').each do |cite|
text = cite.text
unless text == ""
name_array = text.split(' ')
if name_array.size >= 2
name = name_array[0]+' '+name_array[1]
unless name=~/[^a-zA-Z',\s]/i
stack << {name: name, company: company, title: title}
end
end
end
end
keyw = page.parser.xpath('//*[contains(concat( " ", #class, " " ), concat( " ", "sb_pagN", " " ))]').text
break if keyw == ""
page = #agent.click page.link_with(text: keyw )
end
stack
end
def bing_links page
stack = []
while true
page.parser.xpath('//cite').each do |cite|
stack << cite.text unless cite.text == ""
end
keyw = page.parser.xpath('//*[contains(concat( " ", #class, " " ), concat( " ", "sb_pagN", " " ))]').text
break if keyw == ""
sleep(10+rand(40))
page = #agent.click page.link_with(text: keyw )
end
stack
end
def build_query(options)
name = options[:name] if options.has_key? :name
title = options[:title] if options.has_key? :title
company = options[:company] if options.has_key? :company
group = options[:group] if options.has_key? :group
if name && company
return "site:linkedin.com \"#{name}\" \"at #{company}\""
elsif name && title
return "site:linkedin.com \"#{name}\" \"#{title}\""
elsif title && company
return "site:linkedin.com/ \"#{title}\" \"at #{company}\""
elsif group
return "site:linkedin.com \"groups and association\" + \"#{group}\""
end
end

Resources