I have a CSV that I like to save all my hash values on it. I am using nokogiri sax to parse a xml document and then save it to a CSV.
The sax parser:
require 'rubygems'
require 'nokogiri'
require 'csv'
class MyDocument < Nokogiri::XML::SAX::Document
HEADERS = [ :titles, :identifier, :typeOfLevel, :typeOfResponsibleBody,
:type, :exact, :degree, :academic, :code, :text ]
def initialize
#infodata = {}
#infodata[:titles] = Array.new([])
end
def start_element(name, attrs)
#attrs = attrs
#content = ''
end
def end_element(name)
if name == 'title'
Hash[#attrs]["xml:lang"]
#infodata[:titles] << #content
#content = nil
end
if name == 'identifier'
#infodata[:identifier] = #content
#content = nil
end
if name == 'typeOfLevel'
#infodata[:typeOfLevel] = #content
#content = nil
end
if name == 'typeOfResponsibleBody'
#infodata[:typeOfResponsibleBody] = #content
#content = nil
end
if name == 'type'
#infodata[:type] = #content
#content = nil
end
if name == 'exact'
#infodata[:exact] = #content
#content = nil
end
if name == 'degree'
#infodata[:degree] = #content
#content = nil
end
if name == 'academic'
#infodata[:academic] = #content
#content = nil
end
if name == 'code'
Hash[#attrs]['source="vhs"']
#infodata[:code] = #content
#content = nil
end
if name == 'ct:text'
#infodata[:beskrivning] = #content
#content = nil
end
end
def characters(string)
#content << string if #content
end
def cdata_block(string)
characters(string)
end
def end_document
File.open("infodata.csv", "ab") do |f|
csv = CSV.generate_line(HEADERS.map {|h| #infodata[h] })
csv << "\n"
f.write(csv)
end
end
end
creating new an object for every file that is store in a folder(47.000xml files):
parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
counter = 0
Dir.glob('/Users/macbookpro/Desktop/sax/info_xml/*.xml') do |item|
parser.parse(File.open(item, 'rb'))
counter += 1
puts "Writing file nr: #{counter}"
end
The issue:
I dont get a new line for every new set of values. Any ideas?
3 xml files for trying the code:
https://gist.github.com/2378898
https://gist.github.com/2378901
https://gist.github.com/2378904
You need to open the file using "a" mode (opening a file with "w" clears any previous content).
Appending an array to the csv object will automatically insert newlines. Hash#values returns an array of the values, but it would be safer to force the order. Flattening the array will potentially lead to misaligned columns (e.g. [[:title1, :title2], 'other-value'] will result in [:title1, :title2, 'other-value']). Try something like this:
HEADERS = [:titles, :identifier, ...]
def end_document
# with ruby 1.8.7
File.open("infodata.csv", "ab") do |f|
csv = CSV.generate_line(HEADERS.map { |h| #infodata[h] })
csv << "\n"
f.write(csv)
end
# with ruby 1.9.x
CSV.open("infodata.csv", "ab") do |csv|
csv << HEADERS.map { |h| #infodata[h] }
end
end
The above change can be verified by executing the following:
require "csv"
class CsvAppender
HEADERS = [ :titles, :identifier, :typeOfLevel, :typeOfResponsibleBody, :type,
:exact, :degree, :academic, :code, :text ]
def initialize
#infodata = { :titles => ["t1", "t2"], :identifier => 0 }
end
def end_document
#infodata[:identifier] += 1
# with ruby 1.8.7
File.open("infodata.csv", "ab") do |f|
csv = CSV.generate_line(HEADERS.map { |h| #infodata[h] })
csv << "\n"
f.write(csv)
end
# with ruby 1.9.x
#CSV.open("infodata.csv", "ab") do |csv|
# csv << HEADERS.map { |h| #infodata[h] }
#end
end
end
appender = CsvAppender.new
3.times do
appender.end_document
end
File.read("infodata.csv").split("\n").each do |line|
puts line
end
After running the above the infodata.csv file will contain:
"[""t1"", ""t2""]",1,,,,,,,,
"[""t1"", ""t2""]",2,,,,,,,,
"[""t1"", ""t2""]",3,,,,,,,,
I guess you need an extra loop. Something similar to
CSV.open("infodata.csv", "wb") do |csv|
csv << #infodata.keys
#infodata.each do |key, value|
csv << value
end
end
Related
I am trying to use for loop and if condition in creating a file using File.open and puts function. My code is
I want to write these entries only if it is not null. How to do it?
Edit: Full code is
require 'fileutils'
require 'json'
require 'open-uri'
require 'pp'
data = JSON.parse('data')
array = data
if array &.any?
drafts_dir = File.expand_path('../drats', dir)
FileUtils.mkdir_p(drafts_dir)
array.each do |entry|
File.open(File.join(drafts_dir, "#{entry['twitter']}.md"), 'wb') do |draft|
keys = 1.upto(6).map { |i| "key_#{i}" }
values = keys.map { |k| "<img src='#{entry['image']} alt='image'>" if entry['image']}
# you can also do values = entry.values_at(*keys)
str = values.reject do |val|
val.nil? || val.length == 0
end.join("\n")
draft.puts str
end
end
end
I need the the file `mark.md` as
https://somesite.com/image.png' alt='image'>
https://twitter.com/mark'>mark
and `kevin.md` likewise.
you can build the string from an array, rejecting the null values:
keys = 1.upto(6).map { |i| "key_#{i}" }
values = keys.map { |k| entry[k] }
# you can also do values = entry.values_at(*keys)
str = values.reject do |val|
val.nil? || val.length == 0
end.join("\n")
draft.puts str
update in response to your changed question. Do this:
array.each do |entry|
File.open(File.join(drafts_dir, "#{entry['twitter']}.md"), 'wb') do |draft|
next unless ['image', 'twitter'].all? { |k| entry[k]&.length > 1 }
str = [
"<img src='#{entry['image']} alt='image'>",
"<a href='https://twitter.com/#{entry['twitter']}'>#{entry['twitter']}</a>"
].join("\n")
draft.puts str
end
end
Assuming, your entry is hash.
final_string = ''
entry.each_value { |value| final_string << "#{value}\n" }
puts final_string
Read a csv format file and construct a new class with the name of the file dynamically. So if the csv is persons.csv, the ruby class should be person, if it's places.csv, the ruby class should be places
Also create methods for reading and displaying each value in "csv" file and values in first row of csv file will act as name of the function.
Construct an array of objects and associate each object with the row of a csv file. For example the content of the csv file could be
name,age,city
abd,45,TUY
kjh,65,HJK
Previous code :
require 'csv'
class Feed
def initialize(source_name, column_names = [])
if column_names.empty?
column_names = CSV.open(source_name, 'r', &:first)
end
columns = column_names.reduce({}) { |columns, col_name| columns[col_name] = []; columns }
define_singleton_method(:columns) { column_names }
column_names.each do |col_name|
define_singleton_method(col_name.to_sym) { columns[col_name] }
end
CSV.foreach(source_name, headers: true) do |row|
column_names.each do |col_name|
columns[col_name] << row[col_name]
end
end
end
end
feed = Feed.new('input.csv')
puts feed.columns #["name", "age", "city"]
puts feed.name # ["abd", "kjh"]
puts feed.age # ["45", "65"]
puts feed.city # ["TUY", "HJK"]
I am trying to refine this solution using class methods and split code into smaller methods. Calling values outside the class using key names but facing errors like "undefined method `age' for Feed:Class". Is that a way I can access values outside the class ?
My solution looks like -
require 'csv'
class Feed
attr_accessor :column_names
def self.col_name(source_name, column_names = [])
if column_names.empty?
#column_names = CSV.open(source_name, :headers => true)
end
columns = #column_names.reduce({}) { |columns, col_name| columns[col_name] = []; columns }
end
def self.get_rows(source_name)
col_name(source_name, column_names = [])
define_singleton_method(:columns) { column_names }
column_names.each do |col_name|
define_singleton_method(col_name.to_sym) { columns[col_name] }
end
CSV.foreach(source_name, headers: true) do |row|
#column_names.each do |col_name|
columns[col_name] << row[col_name]
end
end
end
end
obj = Feed.new
Feed.get_rows('Input.csv')
puts obj.class.columns
puts obj.class.name
puts obj.class.age
puts obj.class.city
Expected Result -
input = Input.new
p input.name # ["abd", "kjh"]
p input.age # ["45", "65"]
input.name ='XYZ' # Value must be appended to array
input.age = 25
p input.name # ["abd", "kjh", "XYZ"]
p input.age # ["45", "65", "25"]
Let's create the CSV file.
str =<<END
name,age,city
abd,45,TUY
kjh,65,HJK
END
FName = 'temp/persons.csv'
File.write(FName, str)
#=> 36
Now let's create a class:
klass = Class.new
#=> #<Class:0x000057d0519de8a0>
and name it:
class_name = File.basename(FName, ".csv").capitalize
#=> "Persons"
Object.const_set(class_name, klass)
#=> Persons
Persons.class
#=> Class
See File::basename, String#capitalize and Module#const_set.
Next read the CSV file with headers into a CSV::Table object:
require 'csv'
csv = CSV.read(FName, headers: true)
#=> #<CSV::Table mode:col_or_row row_count:3>
csv.class
#=> CSV::Table
See CSV#read. We may now create the methods name, age and city.
csv.headers.each { |header| klass.define_method(header) { csv[header] } }
See CSV#headers, Module::define_method and CSV::Row#[].
We can now confirm they work as intended:
k = klass.new
k.name
#=> ["abd", "kjh"]
k.age
#=> ["45", "65"]
k.city
#=> ["TUY", "HJK"]
or
p = Persons.new
#=> #<Persons:0x0000598dc6b01640>
p.name
#=> ["abd", "kjh"]
and so on.
I have two CSV files with some common headers and others that only appear in one or in the other, for example:
# csv_1.csv
H1,H2,H3
V11,V22,V33
V14,V25,V35
# csv_2.csv
H1,H4
V1a,V4b
V1c,V4d
I would like to merge both and obtain a new CSV file that combines all the information for the previous CSV files. Injecting new columns when needed, and feeding the new cells with null values.
Result example:
H1,H2,H3,H4
V11,V22,V33,
V14,V25,V35,
V1a,,,V4b
V1c,,,V4d
Challenge accepted :)
#!/usr/bin/env ruby
require "csv"
module MergeCsv
class << self
def run(csv_paths)
csv_files = csv_paths.map { |p| CSV.read(p, headers: true) }
merge(csv_files)
end
private
def merge(csv_files)
headers = csv_files.flat_map(&:headers).uniq.sort
hash_array = csv_files.flat_map(&method(:csv_to_hash_array))
CSV.generate do |merged_csv|
merged_csv << headers
hash_array.each do |row|
merged_csv << row.values_at(*headers)
end
end
end
# Probably not the most performant way, but easy
def csv_to_hash_array(csv)
csv.to_a[1..-1].map { |row| csv.headers.zip(row).to_h }
end
end
end
if(ARGV.length == 0)
puts "Use: ruby merge_csv.rb <file_path_csv_1> <file_path_csv_2>"
exit 1
end
puts MergeCsv.run(ARGV)
I have the answer, I just wanted to help people that is looking for the same solution
require "csv"
module MergeCsv
def self.run(csv_1_path, csv_2_path)
merge(File.read(csv_1_path), File.read(csv_2_path))
end
def self.merge(csv_1, csv_2)
csv_1_table = CSV.parse(csv_1, :headers => true)
csv_2_table = CSV.parse(csv_2, :headers => true)
return csv_2_table.to_csv if csv_1_table.headers.empty?
return csv_1_table.to_csv if csv_2_table.headers.empty?
headers_in_1_not_in_2 = csv_1_table.headers - csv_2_table.headers
headers_in_1_not_in_2.each do |header_in_1_not_in_2|
csv_2_table[header_in_1_not_in_2] = nil
end
headers_in_2_not_in_1 = csv_2_table.headers - csv_1_table.headers
headers_in_2_not_in_1.each do |header_in_2_not_in_1|
csv_1_table[header_in_2_not_in_1] = nil
end
csv_2_table.each do |csv_2_row|
csv_1_table << csv_1_table.headers.map { |csv_1_header| csv_2_row[csv_1_header] }
end
csv_1_table.to_csv
end
end
if(ARGV.length != 2)
puts "Use: ruby merge_csv.rb <file_path_csv_1> <file_path_csv_2>"
exit 1
end
puts MergeCsv.run(ARGV[0], ARGV[1])
And execute it from the console this way:
$ ruby merge_csv.rb csv_1.csv csv_2.csv
Any other, maybe cleaner, solution is welcome.
Simplied first answer:
How to use it:
listPart_A = CSV.read(csv_path_A, headers:true)
listPart_B = CSV.read(csv_path_B, headers:true)
listPart_C = CSV.read(csv_path_C, headers:true)
list = merge(listPart_A,listPart_B,listPart_C)
Function:
def merge(*csvs)
headers = csvs.map {|csv| csv.headers }.flatten.compact.uniq.sort
csvs.flat_map(&method(:csv_to_hash_array))
end
def csv_to_hash_array(csv)
csv.to_a[1..-1].map do |row|
Hash[csv.headers.zip(row)]
end
end
I had to do something very similar
to merge n CSV files that the might share some of the columns but some may not
if you want to keep a structure and do it easily,
I think the best way is to convert to hash and then re-convert to CSV file
my solution:
#!/usr/bin/env ruby
require "csv"
def join_multiple_csv(csv_path_array)
return nil if csv_path_array.nil? or csv_path_array.empty?
f = CSV.parse(File.read(csv_path_array[0]), :headers => true)
f_h = {}
f.headers.each {|header| f_h[header] = f[header]}
n_rows = f.size
csv_path_array.shift(1)
csv_path_array.each do |csv_file|
curr_csv = CSV.parse(File.read(csv_file), :headers => true)
curr_h = {}
curr_csv.headers.each {|header| curr_h[header] = curr_csv[header]}
new_headers = curr_csv.headers - f_h.keys
exist_headers = curr_csv.headers - new_headers
new_headers.each { |new_header|
f_h[new_header] = Array.new(n_rows) + curr_csv[new_header]
}
exist_headers.each {|exist_header|
f_h[exist_header] = f_h[exist_header] + curr_csv[exist_header]
}
n_rows = n_rows + curr_csv.size
end
csv_string = CSV.generate do |csv|
csv << f_h.keys
(0..n_rows-1).each do |i|
row = []
f_h.each_key do |header|
row << f_h[header][i]
end
csv << row
end
end
return csv_string
end
if(ARGV.length < 2)
puts "Use: ruby merge_csv.rb <file_path_csv_1> <file_path_csv_2> .. <file_path_csv_n>"
exit 1
end
csv_str = join_multiple_csv(ARGV)
f = File.open("results.csv", "w")
f.write(csv_str)
puts "CSV merge is done"
I have the following class:
require 'strscan'
class ConfParser
include Enumerable
class Error < StandardError; end
VERSION = '0.0.1'
SECTION_REGEX = /^\[ # Opening bracket
([^\]]+) # Section name
\]$ # Closing bracket
/x
PARAMETER_REGEX = /^\s*([^:]+) # Option
:
(.*?)$ # Value
/x
attr_accessor :filename, :sections
CONFIG_DIRECTORY = "./config"
ENCODING = "UTF-8"
def self.read(filename, opts = {})
new(opts.merge(:filename => filename))
end
def initialize(opts = {})
#filename = opts.fetch(:filename)
#separator = opts.fetch(:separator, ":")
#file = "#{CONFIG_DIRECTORY}/#{#filename}"
#content = nil
#config = Hash.new { |h,k| h[k] = Hash.new }
load
end
def load
raise_error("First line of config file contain be blank") if first_line_empty?
f = File.open(#file, 'r')
#content = f.read
parse!
ensure
f.close if f && !f.closed?
end
def sections
#config.keys
end
def [](section)
return nil if section.nil?
#config[section.to_s]
end
def []=( section, value )
#config[section.to_s] = value
end
private
def parse!
#_section = nil
#_current_line = nil
property = ''
string = ''
#config.clear
scanner = StringScanner.new(#content)
until scanner.eos?
#_current_line = scanner.check(%r/\A.*$/) if scanner.bol?
if scanner.scan(SECTION_REGEX)
#_section = #config[scanner[1]]
else
tmp = scanner.scan_until(%r/([\n"#{#param}#{#comment}] | \z | \\[\[\]#{#param}#{#comment}"])/mx)
raise_error if tmp.nil?
len = scanner[1].length
tmp.slice!(tmp.length - len, len)
scanner.pos = scanner.pos - len
string << tmp
end
end
process_property(property, string)
logger #config
end
def process_property( property, value )
value.chomp!
return if property.empty? and value.empty?
return if value.sub!(%r/\\\s*\z/, '')
property.strip!
value.strip!
parse_error if property.empty?
current_section[property.dup] = unescape_value(value.dup)
property.slice!(0, property.length)
value.slice!(0, value.length)
nil
end
def logger log
puts "*"*50
puts log
puts "*"*50
end
def first_line_empty?
File.readlines(#file).first.chomp.empty?
end
def raise_error(msg = 'Error processing line')
raise Error, "#{msg}: #{#_current_line}"
end
def current_section
#_section ||= #config['header']
end
end
The above class parses files that are setup like so:
[header]
project: Hello World
budget : 4.5
accessed :205
[meta data]
description : This is a tediously long description of the Hello World
project that you are taking. Tedious isn't the right word, but
it's the first word that comes to mind.
correction text: I meant 'moderately,' not 'tediously,' above.
[ trailer ]
budget:all out of budget.
You start running it like this:
require 'conf_parser'
cf = ConfParser.read "/path/to/conf/file"
For some reason when the parse! method runs, an infinite loop occurs and I can't figure out why. Any reason why this would be happening? I have never used StringScanner before, so it may be my lack of knowledge of the class
At the risk of stating the obvious, you are most likely never satisfying scanner.eos?, which in turn would mean that you're not advancing the scan pointer to the end of the string. Since the only change to scanner.pos in the else branch of parse! is to decrement it (i.e. by len), this would be understandable. If the if branch doesn't advance it to the end, you'll never terminate.
I've got the following code snippet which I'm trying to return to Shoes as an array of integers and strings
Shoes.setup do
gem 'snmp'
end
class SNMPManager
require 'snmp'
include SNMP
attr_accessor :hostname, :framerate
def initialize(params = {})
#hostname = params.fetch(:hostname, 'localhost')
#framerate = params.fetch(:framerate, 25)
end
def getValues
Manager.open(:host => #hostname, :mib_modules => ["SNMPv2-MIB"]) do |manager|
poolArray = []
ifTable = ObjectId.new("1.3.6.1.4.1.15248.2.5.1.3.1")
manager.walk(ifTable) do |row|
poolArray.push(row.inspect)
end
groups = poolArray.group_by{ |s| s.split(',').first[/\d+$/] }
#values = groups.map{ |key, ary| ary.map{ |s| s[/value=(\S+)/, 1] } }
end
end
def to_s
return #values
end
end
Shoes.app do
##hostname = edit_line
#man =SNMPManager.new(:host => #hostname)
man = SNMPManager.new
man.getValues
puts 'SNMP Space Monitor'
man.to_s.each {|v|
#note = para "#{v[1]} \t(Pool: #{v[0]}) \tCapacity: #{v[3].to_i} \tFree Protons: #{v[2].to_i} \tPercent Free: %#{(v[3].to_f/v[2].to_f*100).round(2)}"
}
end
For some reason Shoes is displaying it like:
But if I run the ruby script normally, and output it with puts, it displays correctly, and calculates the response correctly.
I know I'm missing something obvious, but can't see what it is.
I'm trying to cast the array to another array, but that seems to do bugger all.
1)
#values = Array(#values)
That is a bit odd. All the Array constructor does is create a copy of #values. The only reason you would do that is if #values contains a reference to some array that is referenced by another variable, and you don't want that other variable to be able to change #values. But then why didn't you make a copy when you first assigned the array to #values?
2) Your to_s method does not return a string.
This is a great way to use Shoes.
It looks like Shoes is giving you paras containing array_of_integers_and_strings.to_s.
Complete code would be super helpful.
Resolved the issue by switching from 'red shoes' to 'green shoes'
No other changes required, but here's my current code (which successfully times out when run as a console app. but not as a shoes app - go figure)
require 'green_shoes'
gem 'snmp'
class SNMPManager
require 'snmp'
include SNMP
attr_accessor :hostname, :framerate
def initialize(params = {})
#hostname = params.fetch(:hostname, 'localhost')
#framerate = params.fetch(:framerate, 25)
end
def getValues
Manager.open(:host => #hostname) do |manager|
#poolArray = []
poolsTable = ObjectId.new("1.3.6.1.4.1.15248.2.5.1.3.1")
manager.walk(poolsTable) do |row|
#poolArray.push(row.inspect)
end
if #poolArray.empty?
puts "Manager Down"
#poolArray = [
'[name=1.3.6.1.4.1.15248.2.5.1.3.1.1.8650, value=8650 (INTEGER)]',
'[name=1.3.6.1.4.1.15248.2.5.1.3.1.1.8651, value=8651 (INTEGER)]',
'[name=1.3.6.1.4.1.15248.2.5.1.3.1.2.8650, value=QNewsAK (OCTET STRING)]',
'[name=1.3.6.1.4.1.15248.2.5.1.3.1.2.8651, value=QSuite4AK (OCTET STRING)]',
'[name=1.3.6.1.4.1.15248.2.5.1.3.1.3.8650, value=46835255 (INTEGER)]',
'[name=1.3.6.1.4.1.15248.2.5.1.3.1.3.8651, value=11041721 (INTEGER)]',
'[name=1.3.6.1.4.1.15248.2.5.1.3.1.4.8650, value=8442357 (INTEGER)]',
'[name=1.3.6.1.4.1.15248.2.5.1.3.1.4.8651, value=5717570 (INTEGER)]',
'[name=1.3.6.1.4.1.15248.2.5.1.3.1.5.8650, value=0 (INTEGER)]',
'[name=1.3.6.1.4.1.15248.2.5.1.3.1.5.8651, value=0 (INTEGER)]',
]
else puts "Manager Up"
end
groups = #poolArray.group_by{ |s| s.split(',').first[/\d+$/] }
#values = groups.map{ |key, ary| ary.map{ |s| s[/value=(\S+)/, 1] } }
end
end
def to_s
return #values
end
protected
end
Shoes.app(:title=>'SNMP Space Monitor') do
man = SNMPManager.new
stack do
every(300) {
man.getValues
man.to_s
man.to_s.each {|v|
para "#{v[1]} \t(Pool: #{v[0]}) \tCapacity: #{v[2].to_i} \tFree Protons: #{v[3].to_i} \tPercent Free: #{(v[3].to_f/v[2].to_f*100).round(2)}%"
}
}
end
end