Extracting ZIP files in a directory that doesn't exist - ruby

I want to extract one single content type file from a ZIP package into a directory that doesn't yet exist. My code so far:
require 'zip'
Dir.mkdir 'new_folder'
#I create the folder
def unzip_file (file_path, destination)
Zip::File.open(file_path) { |zip_file|
zip_file.glob('*.xml'){ |f| #I want to extract .XML files only
f_path = File.join(Preprocess, f.name)
FileUtils.mkdir_p(File.dirname(f_path))
puts "Extract file to %s" % f_path
zip_file.extract(f, f_path)
}
}
end
The folder gets successfully created, but no extraction is done at any directory. I suspect that there is something wrong within the working directory. Any help?

I believe you forgot to call your unzip method to begin with...
Nevertheless, this is how I would do this:
require 'zip'
def unzip_file (file_path, destination)
Zip::File.open(file_path) do |zip_file|
zip_file.each do |f| #I want to extract .XML files only
next unless File.extname(f.name) == '.xml'
FileUtils.mkdir_p(destination)
f_path = File.join(destination, File.basename(f.name))
puts "Extract file to %s" % f_path
zip_file.extract(f, f_path)
end
end
end
zip_file = 'random.zip' # change this to zip file's name (full path or even relative path to zip file)
out_dir = 'new_folder' # change this to the name of the output folder
unzip_file(zip_file, out_dir) # this runs the above method, supplying the zip_file and the output directory
EDIT
Adding an additional method called unzip_files that call unzip_file on all zipped files in a directory.
require 'zip'
def unzip_file (file_path, destination)
Zip::File.open(file_path) do |zip_file|
zip_file.each do |f| #I want to extract .XML files only
next unless File.extname(f.name) == '.xml'
FileUtils.mkdir_p(destination)
f_path = File.join(destination, File.basename(f.name))
puts "Extract file to %s" % f_path
zip_file.extract(f, f_path)
end
end
end
def unzip_files(directory, destination)
FileUtils.mkdir_p(destination)
zipped_files = File.join(directory, '*.zip')
Dir.glob(zipped_files).each do |zip_file|
file_name = File.basename(zip_file, '.zip') # this is the zipped file name
out_dir = File.join(destination, file_name)
unzip_file(zip_file, out_dir)
end
end
zipped_files_dir = 'zips' # this is the folder containing all the zip files
output_dir = 'output_dir' # this is the main output directory
unzip_files(zipped_files_dir, output_dir)

Related

RubyZip docx issues with write_buffer instead of open

I'm adapting the RubyZip recursive zipping example (found here) to work with write_buffer instead of open and am coming across a host of issues. I'm doing this because the zip archive I'm producing has word documents in it and I'm getting errors on opening those word documents. Therefore, I'm trying the work-around that RubyZip suggests, which is using write_buffer instead of open (example found here).
The problem is, I'm getting errors because I'm using an absolute path, but I'm not sure how to get around that. I'm getting the error "#//', name must not start with />"
Second, I'm not sure what to do to mitigate the issue with word documents. When I used my original code, which worked and created an actual zip file, any word document in that zip file had the following error upon opening: "Word found unreadable content in Do you want to recover the contents of this document? If you trust the source of this document, click Yes." The unreadable content error is the reason why I went down the road of attempting to use write_buffer.
Any help would be appreciated.
Here is the code that I'm currently using:
require 'zip'
require 'zip/zipfilesystem'
module AdvisoryBoard
class ZipService
def initialize(input_dir, output_file)
#input_dir = input_dir
#output_file = output_file
end
# Zip the input directory.
def write
entries = Dir.entries(#input_dir) - %w[. ..]
path = ""
buffer = Zip::ZipOutputStream.write_buffer do |zipfile|
entries.each do |e|
zipfile_path = path == '' ? e : File.join(path, e)
disk_file_path = File.join(#input_dir, zipfile_path)
#file = nil
#data = nil
if !File.directory?(disk_file_path)
#file = File.open(disk_file_path, "r+b")
#data = #file.read
unless [#output_file, #input_dir].include?(e)
zipfile.put_next_entry(e)
zipfile.write #data
end
#file.close
end
end
zipfile.put_next_entry(#output_file)
zipfile.put_next_entry(#input_dir)
end
File.open(#output_file, "wb") { |f| f.write(buffer.string) }
end
end
end
I was able to get word documents to open without any warnings or corruption! Here's what I ended up doing:
require 'nokogiri'
require 'zip'
require 'zip/zipfilesystem'
class ZipService
# Initialize with the directory to zip and the location of the output archive.
def initialize(input_dir, output_file)
#input_dir = input_dir
#output_file = output_file
end
# Zip the input directory.
def write
entries = Dir.entries(#input_dir) - %w[. ..]
::Zip::File.open(#output_file, ::Zip::File::CREATE) do |zipfile|
write_entries entries, '', zipfile
end
end
private
# A helper method to make the recursion work.
def write_entries(entries, path, zipfile)
entries.each do |e|
zipfile_path = path == '' ? e : File.join(path, e)
disk_file_path = File.join(#input_dir, zipfile_path)
if File.directory? disk_file_path
recursively_deflate_directory(disk_file_path, zipfile, zipfile_path)
else
put_into_archive(disk_file_path, zipfile, zipfile_path, e)
end
end
end
def recursively_deflate_directory(disk_file_path, zipfile, zipfile_path)
zipfile.mkdir zipfile_path
subdir = Dir.entries(disk_file_path) - %w[. ..]
write_entries subdir, zipfile_path, zipfile
end
def put_into_archive(disk_file_path, zipfile, zipfile_path, entry)
if File.extname(zipfile_path) == ".docx"
Zip::File.open(disk_file_path) do |zip|
doc = zip.read("word/document.xml")
xml = Nokogiri::XML.parse(doc)
zip.get_output_stream("word/document.xml") {|f| f.write(xml.to_s)}
end
zipfile.add(zipfile_path, disk_file_path)
else
zipfile.add(zipfile_path, disk_file_path)
end
end
end

How to extract only one file type within a .ZIP file on another directory

Similar questions have been already asked before on this subject, but I'm unable to work it out. I have a batch of .ZIP files from which I only need to extract the .TXT files from all of them, and then move them to another location. I've tried this:
unzip test.zip '*.txt' #This would only be useful for extracting one single .ZIP file, not for each one.
And this:
require 'FileUtils'
require 'zip'
def unzip_file (file, destination)
Zip::File.open(file_path) { |zip_file|
zip_file.each { |f|
f_path=File.join("destination_path", f.name)
FileUtils.mkdir_p(File.dirname(f_path)) #Don't quite understand this line
zip_file.extract(f, f_path) #Extraction is done here, but where?
}
}
end
Neither option was successful, could you please suggest?
Your method has a parameter destination, but it is never used.
Your code
f_path=File.join("destination_path", f.name)
defines a folder named destination_path followed by the path of the file in the zip. Probably you want the content of the parameter destination.
With
FileUtils.mkdir_p(File.dirname(f_path))
you create the target path with all directories (a mkdir would only create one directory and if the parent directory does not exist you get an error).
In summary: Try this code:
require 'FileUtils'
require 'zip'
def unzip_file (file, destination)
Zip::File.open(file_path) { |zip_file|
zip_file.each { |f|
f_path=File.join(destination, f.name)
FileUtils.mkdir_p(File.dirname(f_path))
puts "Extract file to %s" % f_path
zip_file.extract(f, f_path)
}
}
end
After your comment:
To get only the txt-files inside the zip you can replace each with glob:
require 'zip' #Already loads FileUtils
#~ require 'FileUtils'
def unzip_file (file_path, destination)
Zip::File.open(file_path) { |zip_file|
zip_file.glob('*.txt'){ |f|
f_path=File.join(destination, f.name)
FileUtils.mkdir_p(File.dirname(f_path))
puts "Extract file to %s" % f_path
zip_file.extract(f, f_path)
}
}
end
Then you can call the method for each zip-file in Folder_A and define the destination Folder_B:
Dir['Folder_A/*.zip'].each{|zipfile|
unzip_file(zipfile, 'Folder_B')
}

How to take the result from another method

I have a directory structure with sub-directories:
../../../../../MY_PROJECT/TEST_A/cats/
../../../../../MY_PROJECT/TEST_B/dogs/
../../../../../MY_PROJECT/TEST_A/tigers/
../../../../../MY_PROJECT/TEST_A/elephants/
each of which has a file that ends with ".sln":
../../../../../MY_PROJECT/TEST_A/cats/cats.sln
../../../../../MY_PROJECT/TEST_B/dogs/dogs.sln
...
These files contain information specific to their directory. I would like to do the following:
Create a file "myfile.txt" within each sub-directory, and write some strings to them:
../../../../../MY_PROJECT/TEST_A/cats/myfile.txt
../../../../../MY_PROJECT/TEST_B/dogs/myfile.txt
../../../../../MY_PROJECT/TEST_A/tigers/myfile.txt
../../../../../MY_PROJECT/TEST_A/elephants/myfile.txt
Copy a specific string in the ".sln" files to the myfile.txt of certain directories using the following method:
def parse_sln_files
sln_files = Dir["../../../../../MY_PROJECT/TEST_*/**/*.sln"]
sln_files.each do |file_name|
File.open(file_name) do |f|
f.each_line { |line|
if line =~ /C Source files ="..\\/ #"
path = line.scan(/".*.c"/)
puts path
end
}
end
end
end
I would like to do something like this:
def create_myfile
Dir['../../../../../MY_PROJECT/TEST_*/*/'].each do |dir|
File.new File.join(dir, 'myfile.txt'), 'w+'
Dir['../../../../../TEST/TEST_*/*/myfile.txt'].each do |path|
File.open(path,'w+') do |f|
f.puts "some text...."
f.puts "some text..."
f.puts # here I would like to return the result of parse_sln_files
end
end
end
end
Any suggestions on how to express this?
It seems like you want to read list of C file names from a Visual C++ Solution file, and store in a separate file in the same directory. You may have to merge the two loops that you have shown in your code, and do something like this:
def parse_sln_and_store_source_files
sln_files = Dir["../../../../../MY_PROJECT/TEST_*/**/*.sln"]
sln_files.each do |file_name|
#### Lets collect source file names in this array
source_file_names = []
File.open(file_name) do |f|
f.each_line { |line|
if line =~ /C Source files ="..\\/ #"
path = line.scan(/".*.c"/)
############ Add path to array ############
source_file_names << path
end
}
end
#### lets create `myfile.txt` in same dir as that of .sln
test_file = File.expand_path(File.dirname(file_name)) + "/myfile.txt"
File.open(test_file,'w+') do |f|
f.puts "some text...."
f.puts "some text..."
##### Iterate over source file names & write to file
source_file_names.each { |n| f.puts n }
end
end
end
This can be done bit more elegantly with few more refactoring. Also note that this is not tested code, hopefully, you get the gist of what I am suggesting.

ruby Copy files based on date modified

I have files (with different extensions) that are added every 10 minutes on a windows share (A) and want to copy them to linux server (B) and do some operations on them with a script.
Using ruby and FilesUtils How can i create a script that will copy only the last added files (or have a watcher that will copy the files to folder B whenever they are added to my folder A)
update this is what i have so far
require 'fileutils'
require 'time'
class Copier
def initialize(from,to)
puts "copying files... puts #{Time.now} \n"
my_files = Dir["#{from}/*.*"].sort_by { |a| File.stat(a).mtime }
my_files.each do |filename|
name = File.basename(filename)
orig = "#{filename}"
dest = "#{to}/#{name}"
FileUtils.cp(orig, dest)
puts "cp file : from #{orig} => to #{dest}"
end
end
end
Copier.new("/mnt/windows_share", "linux_folder")
But it copies all the files each time it is called...
This is what I ended up doing to get the files modified in the last 10 minutes and then copy them from the a windows share folder to the linux folder:
require 'fileutils'
require 'time'
class Copier
def initialize(from,to)
puts "copying files... puts #{Time.now} \n"
my_files = Dir["#{from}/*.*"].select { |fname| File.mtime(fname) > (Time.now - (60*10)) })
my_files.each do |filename|
name = File.basename(filename)
orig = "#{filename}"
dest = "#{to}/#{name}"
FileUtils.cp(orig, dest)
puts "cp file : from #{orig} => to #{dest}"
end
end
end
Copier.new("/mnt/windows_share", "linux_folder")

Unzipping a file and ignoring 'junk files' added by OS X

I am using code like the following to unzip files in Ruby:
def unzip_file (file)
Zip::ZipFile.open(file) do |zip_file|
zip_file.each do |f|
puts f.name if f.file?
end
end
end
I would like to ignore all files generated by compress zip in Mac such as: .DS_Store, etc. How can I best do it?
I believe that this does what you want:
Zip::ZipFile.open(file) do |zip_file|
names = zip_file.select(&:file?).map(&:name)
names.reject!{|n| n=~ /\.DS_Store|__MACOSX|(^|\/)\._/ }
puts names
end
That regular expression says,
Throw away files
that have .DS_Store in the name,
that have __MACOSX in the name,
or that have ._ at the beginning of the name (^) or right after a /.
That should cover all the 'junk' files and hopefully not hit any others.
If you want more than just the names—if you want to process the non-junk files—then instead you might do the following:
Zip::ZipFile.open(file) do |zip_file|
files = zip_file.select(&:file?)
files.reject!{|f| f.name =~ /\.DS_Store|__MACOSX|(^|\/)\._/ }
puts files.map(&:names) # or do whatever else you want with the array of files
end

Resources