How to download a delayed file via HTTP in Ruby?

How to download a delayed file via HTTP in Ruby? - ruby

I use the following Ruby function to download various files via HTTP:
def http_download(uri, filename)
bytes_total = nil
begin
uri.open(
read_timeout: 500,
content_length_proc: lambda { |content_length|
bytes_total = content_length
},
progress_proc: lambda { |bytes_transferred|
if bytes_total
print("\r#{bytes_transferred} of #{bytes_total} bytes")
else
print("\r#{bytes_transferred} bytes (total size unknown)")
end
}
) do |file|
open filename, 'w' do |io|
file.each_line do |line|
io.write line
end
end
end
rescue => e
puts e
end
end
I also want to download files (csv, kml, zip, geojson) from this website. However, there is some kind of delay set up. When I click the download link in the browser it takes a bit until the download window appears. I suppose the file needs to be processed on the server before it can be served.
How can I modify my script to take the delay into account?
I am running Ruby 2.2.2.

Here is the modification according to the post and comment:
require 'open-uri'
def http_download(uri, filename)
bytes_total = nil
index = 1
begin
open(
uri,
read_timeout: 500,
content_length_proc: lambda { |content_length|
bytes_total = content_length
},
progress_proc: lambda { |bytes_transferred|
if bytes_total
print("\r#{bytes_transferred} of #{bytes_total} bytes")
else
print("\r#{bytes_transferred} bytes (total size unknown)")
end
}
) do |io|
# if "application/json" == io.content_type
if io.is_a? StringIO
raise " --> Failed, server is processing. Retry the request ##{index}"
else # Tempfile
puts "\n--> Succeed, writing to #{filename}"
File.open(filename, 'w'){|wf| wf.write io.read}
end
end
rescue => e
puts e
return if e.is_a? OpenURI::HTTPError # Processing error
index += 1
return if index > 10
sleep index and retry
end
end

Related

Stream-like processing of files in Ruby

I would like to understand how I can add filters/transformations in a stream starting from a source, going to a sink.
As an example, consider reading a CSV file. That would be
CSV.foreach(file) { |row| ... }
If I want to read from a zipped file, that would become
stream = Zlib::GzipReader.open('/tmp/foo.csv.gz')
csv = CSV.new(stream)
csv.each { |row| ... }
Now, possibly I would like to add transformations on between gunzip and csv. What is the best way to achieve this goal in Ruby?
Currently I'm doing this:
gzip = Zlib::GzipReader.open('/tmp/foo.psv.gz') # Pipe Separated Value (fake example).
trans = IOTransform.new(gzip) { |line| line&.tr('|', ',') } # A simple example.
csv = CSV.new(trans)
csv.each do |row|
puts row.inspect
end
using the following class
class IOTransform
def initialize(io, &block)
#io = io
#block = block
#inbuf = +''
end
def gets(...)
if !#io.eof?
#inbuf << #io.gets(...)
end
res = nil
remainder = nil
#inbuf&.each_line { |line|
if line[-1] == "\n"
res ||= +''
res << #block.call(line)
else
remainder = line
end
}
#inbuf = remainder
res
end
end
but it feels like I really missed something: I very much doubt that there's no easy way to process streams in Ruby.
Thanks in advance!

Ruby: Write CSV line by line instead of whole file

I am using the follow code to write to a CSV file. It writes the whole file at once. I would like to write the CSV file line by line by amending the file. How can I adjust my code?
CSV.open("#{#app_path}/Data_#{#filename}", "w") do |csv|
data_array.each do |r|
csv << r
end
end

As I understand, the problem is not the csv file, but the size of the array (and that after each fail you have to rebuild the array).
My attempt at solving that would be to process the array in chunks like below:
def process_array_by_chunks(array, starting_index = 0, chunk_size)
return if array.empty?
current_index = starting_index
size = array.size
stop = false
while !stop do
puts "doing index: #{current_index}"
yield(array[current_index, chunk_size])
stop = true if current_index >= size
current_index = current_index + chunk_size
end
rescue StandardError => e
puts "failed at index: #{current_index}"
puts "data left to process: "
return array[current_index, size]
end
# call function with a block in which we write csv file
process_array_by_chunks(array, start, chunk_size) do | array|
CSV.open(path, "w") do |csv|
array.each do |r|
csv << r
end
end
end
if that blows up for some reason the function will return an array with all the items that were not yet processed.

Disable creating files and directories while execution

I've just started learning ruby and I'm trying to edit the following script. The code as is downloads files while creating directories.
def download_files
puts "Downloading #{#base_url} to #{backup_path} from site..."
puts
file_list_by_time = get_file_list_by_time
if file_list_by_time.count == 0
puts "No files to download. Possible reasons:\n\t* Accept regex didn't let any files through (Accept Regex: \"#{#accept_regex.to_s}\")\n\t* Site is not in site."
return
end
count = 0
file_list_by_time.each do |file_remote_info|
count += 1
file_url = file_remote_info[:file_url]
file_id = file_remote_info[:file_id]
file_time = file_remote_info[:time]
file_path_elements = file_id.split('/')
if file_id == ""
dir_path = backup_path
file_path = backup_path + 'index.html'
elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'
dir_path = backup_path + file_path_elements[0..-1].join('/')
file_path = backup_path + file_path_elements[0..-1].join('/') + '/index.html'
else
dir_path = backup_path + file_path_elements[0..-2].join('/')
file_path = backup_path + file_path_elements[0..-1].join('/')
end
unless File.exists? file_path
open('myfile.txt', 'a') do |f|
f.puts ("http://example.com/#{file_time}id_/#{file_url}")
end
begin
structure_dir_path dir_path
open(file_path, "wb") do |file|
begin
rescue OpenURI::HTTPError => e
puts "#{file_url} # #{e}"
file.write(e.io.read)
rescue StandardError => e
puts "#{file_url} # #{e}"
end
end
rescue StandardError => e
puts "#{file_url} # #{e}"
end
puts "#{file_url} -> #{file_path} (#{count}/#{file_list_by_time.size})"
else
puts "#{file_url} # #{file_path} already exists. (#{count}/#{file_list_by_time.size})"
end
end
puts
puts "Download complete, saved in #{backup_path} (#{file_list_by_time.size} files)"
end
end
I want to change it to just save the full file URL to a text file i.e. save the URL without downloading the file.
I added the following to the code which works perfectly. I now just need to stop the script from downloading the files.
open('myfile.txt', 'a') do |f|
f.puts ("http://example.com/#{file_time}id_/#{file_url}")
end
i tried removing the part from structure_dir_path dir_path to the end but i keep getting an error message. Any ideas how can i implement that correctly?

Return Early from Method
To stop processing, just return from anywhere inside your method, including from within a block. For example:
open('myfile.txt', 'a') do |f|
url = "http://example.com/#{file_time}id_/#{file_url}"
f.puts url
return url
end

Downloading binary files in Ruby

I am able to download my file, but I end up with a file that says it cannot be executed on my version of Windows. Here is my code:
Net::HTTP.start(url_base) do |http|
if response.code == '200'
self.size = response['content-length'].to_i
local_file = save_dir + File::SEPARATOR + strip_file_name(url_path)
if !application_exists?(local_file, response['content-length'].to_i) || overwrite_file?
build_display
self.progress_bar = ProgressBar.new(response['content-length'].to_i, PROGRESSBAR_SIZE_WIDTH)
File.open(local_file, 'w') { |f|
http.get(URI.escape(url_path)) do |str|
f.write str
#counter += str.length
#window.setpos(PROGRESSBAR_LOCATION_TOP, 1)
#window << (self.progress_bar.show(#counter)).center(WINDOW_WIDTH - 2)
#window.refresh
char = #window.getch
http.finish if char == 'c' # cancel download
end
}
end
end
end
If I download using open-uri:
file = open(url,
:content_length_proc => lambda { |content_length|
if content_length && 0 < content_length
self.progress_bar = ProgressBar.new(content_length, PROGRESSBAR_SIZE_WIDTH)
end
},
:progress_proc => lambda { |size|
##counter += str.length
#window.setpos(PROGRESSBAR_LOCATION_TOP, 1)
#window << (self.progress_bar.show(size)).center(WINDOW_WIDTH - 2)
#window.refresh
char = #window.getch
f.close if char == 'c' # cancel download
},
:read_timeout => 10)
FileUtils::cp(file, local_file)
Everything works....the problem is, I need to be able to cancel the download mid-stream. How can I a) download using net::http in a 'single-chunk' style, as I think open-uri does, or b) cancel a download mid-stream using open-uri?

Strange behavior with instance variables in Shoes

Hey, all. I'm working on making a GUI for a Ruby project using Shoes.
I've got a class called Manager (as in memory manager) that loads a 'process list' from a file, splits it up and assigns things to different 'pages' in memory when a certain execution call is made. I really don't think this part matters too much, though. It all works as a terminal application just fine.
However, Shoes is just baffling me. Here's what I've got so far:
Shoes.app(:title => "Paging Simulator", :width => 800, :height => 450) do
#manager = Manager.new
stack(:width => 200) do
#exec_list = stack {
title "Execution Queue", :size => 14
#exec_lines = para "click button to load", :size => 9
#file_button = button "Load Process List"
#file_button.click {
filename = ask_open_file
# #manager.set_exec_list filename
# alert "this makes no sense"
#exec_lines.text = #manager.exec_list.join "\n"
# exec_lines.text = File.read filename
}
}
end
end
What happens when I run this:
The program view loads as expected. I get a header, a paragraph that says "click button....", and a button. I click the button and I select the file. But this is where things get weird.
If I run the last commented line exec_lines.text = File.read filename it does as I would like, but my manager doesn't get any of the information it needs.
If I run the #manager.set_exec_list filename line, nothing from that line on in the block gets run, including the alert, or any other code I try to put in there.
if I run as shown above, however, I get the output I expect, but I don't get to set my data from the file that I select.
I've tried to figure this out from the Shoes Rules page, but this doesn't seem to be an issue that they address, and their "it changes/doesn't change self" I think I grasp, but it's confusing and I don't think it's exactly related to this problem.
Does anyone have any idea how to get this to work? I'm kind of down to crunch time on this project and I can't seem to get any other Ruby GUI toolkit to even run, so I think I'm pretty stuck with Shoes.
Thanks.
Update
I've tried running ruby-debug on the code when I make the call to #manager.set_exec_list filename, and stepping through it shows that this call is made, but the code never actually (from what I can tell) jumps into that method, and acts like it's the last line of code in the block. Do I need to include these classes inside the Shoes.app block?
Update Nope. That does nothing different.
update fullsource code follows:
#!/usr/bin/env shoes
require 'rubygems'
require 'ruby-debug'
class MemSegment
attr_accessor :filled, :pid, :seg, :seg_id
def initialize(filled=false, pid=nil, seg=nil, seg_id=0)
#filled = filled
#pid = pid.to_i
#seg = seg.to_s
#seg_id = seg_id.to_i
self
end
def fill(pid, seg, seg_id)
#filled = true; #pid = pid; #seg = seg; #seg_id = seg_id;
self
end
def clear
self.filled = false; self.pid = nil; self.seg = nil;
self
end
def filled?
#filled
end
def to_s
filled? ? "#{seg} #{seg_id} for pid #{pid}" : "Free"
end
end
class SimProc
include Enumerable
attr_accessor :pid, :code, :data
def initialize(pid, code, data)
#pid = pid.to_i
#code = code.to_i
#data = data.to_i
end
def each
yield :code, code
yield :data, data
end
def to_s
"[SimProc :pid => #{pid}, :code => #{code}, :data => #{data}]"
end
def to_a
[#pid, #code, #data]
end
end
class Manager
attr_reader :segments, :processes, :exec_list, :exec_object
def initialize
#exec_list = [[1, 2], [3, 4], [5, 6]]
#processes = {}
#segments = Array.new(8) { MemSegment.new }
end
def print_activity
#segments.each_with_index {|s, index| puts "Seg #{index} => #{s}" }
#processes.each_value {|s| puts s }
end
def load_process(pcb, exec_index)
if pcb.size == 3
p = SimProc.new(*pcb)
bad_load = false
#processes.store p.pid, p
#processes[p.pid].each do |proc_seg, bsize|
(bsize / 512.0).ceil.times do |seg_id|
#segments.each_with_index do |s, index|
if !s.filled
#find the first empty memory segment
s.fill p.pid, proc_seg, seg_id
break
# if all slots are filled and we couldn't place a proc block
elsif index == #segments.size - 1
bad_load = true
puts "Cannot find a place for #{proc_seg} segment of size #{bsize}. Requeueing..."
break;
end
end
break if bad_load
end
end
# recover pages and queue the process for later
if bad_load
#segments.each_with_index do |seg, seg_index|
# clear any segments that didn't get loaded properly
if seg.pid == p.pid
seg.clear
puts "Seg #{seg_index} => segment cleared: #{seg}"
end
end
# reinsert this process after the next in the execution list
# it will attempt to load and run after the next process is performed
#exec_list.insert(exec_index + 2, p.to_a)
end
print_activity
elsif pcb.size == 2 and pcb[1] == -1
# a process is exiting
puts "removing pid #{pcb[0]}"
#segments.each { |s| s.clear if s.pid == pcb[0] }
#processes.delete pcb[0]
print_activity
end
end
def set_exec_list(filename)
file = File.open filename
file.each { |pcb| #exec_list << pcb.split.map(&:to_i) } unless file.nil?
filename
end
def main
exseq = File.open('exseq2.txt')
set_exec_list exseq
# this is the object that will be used to run each process with .next
#exec_object = #exec_list.each_with_index
# #exec_list.each_with_index { |pcb, exec_index| load_process(pcb, exec_index) }
(#exec_list.size + 1).times do
load_process(*#exec_object.next)
end
end
end
=begin
manager = Manager.new
manager.main
=end
#=begin
Shoes.app(:title => "Paging Simulator", :width => 800, :height => 450) do
#manager = Manager.new
stack(:width => 200) do
#exec_list = stack {
title "Execution Queue", :size => 14
#exec_lines = para "click button to load", :size => 9
#file_button = button "Load Process List"
debugger
#file_button.click {
filename = ask_open_file
#manager.set_exec_list filename
# alert "this makes no sense"
# #exec_lines.text = #manager.exec_list
# #exec_lines.text = File.read filename
#exec_lines.text = #manager.exec_list.join "\n"
}
}
end
end
#=end

So, a few things:
#1, I don't have the implementation of Manager, so I can't tell you why it breaks. Did you try checking the Shoes console for any errors? Hit control-/ to bring that up. If 'nothing runs after it hits that line,' that's probably the issue.
#2, this does work for me, as long as you change exec_lines to #exec_lines on the last line. Here's what I tried:
class Manager;end
Shoes.app(:title => "Paging Simulator", :width => 800, :height => 450) do
#manager = Manager.new
stack(:width => 200) do
#exec_list = stack {
title "Execution Queue", :size => 14
#exec_lines = para "click button to load", :size => 9
#file_button = button "Load Process List"
#file_button.click {
filename = ask_open_file
#alert "this makes no sense"
#exec_lines.text = File.read filename
}
}
end
end
Hope that helps!

Develop Reference

ruby bash windows laravel spring algorithm oracle macos go visual-studio

How to download a delayed file via HTTP in Ruby? - ruby

Related

Stream-like processing of files in Ruby

Ruby: Write CSV line by line instead of whole file

Disable creating files and directories while execution

Downloading binary files in Ruby

Strange behavior with instance variables in Shoes

Categories

Resources