how to remove duplicate times in ruby - ruby

how can i make it to read the same time just one time only. this is my codes
state = station_info_state[/[^_]+/]
query = " select * from #{state} where MAIN_ID = #{main_id} order by date_taken ASC, TIME ASC " #order by date_taken ASC, TIME ASC "
state = DB["#{query}"]
#last_daily_rainfall = 0
CSV.open("./rainfall_tideda/#{#station_info_station_id}_rf.csv", "w+") do |csv|
csv << ["#{#station_info_station_id}", "INCREMENTAL", "#{#station_info_station_name}"]
state.each do |line|
time_taken = line[:'time'].to_i
time_taken = format('%04d', time_taken).to_s
simulated_time_taken = time_taken.to_s.gsub(/.{2}(?=.)/, '\0:')
date_taken = line[:'date_taken'].to_s
date_taken = Date.parse("#{date_taken}").to_s
date_taken = date_taken.gsub( "-", "/" )
current_daily_rainfall = line[:'daily_rainfall']
if(current_daily_rainfall >= 0 && current_daily_rainfall != '-9999')
this_daily_rainfall = current_daily_rainfall - #last_daily_rainfall
if(this_daily_rainfall > 0)
csv << [ "#{date_taken}", "#{simulated_time_taken}", "#{this_daily_rainfall}" ]
else
if(this_daily_rainfall != '-9999' && this_daily_rainfall == 0)
csv << [ "#{date_taken}", "#{simulated_time_taken}", "0" ]
end
if(this_daily_rainfall != '-9999' && this_daily_rainfall < 0)
csv << [ "#{date_taken}", "#{simulated_time_taken}", "#{current_daily_rainfall}" ]
end
end
#last_daily_rainfall = line[:'daily_rainfall']
end
end
end
duplicate time image
the result are shown in the image description. In the result, you can see got many duplicate time

You can make a hash which stores the dates/times you've already seen:
processed_date_times = Set.new
Then, in your iteration, add entries:
processed_date_times.add([date_taken, simulated_time_taken])
And then only add the entry if it hasn't been processed yet:
unless processed_date_times.member?([date_taken, simulated_time_taken])
# ... add row to csv
end

Related

How to Hash content to write in file as format mentioned as below?

I have wrote my ruby script for that. In that you can check "all_data" has all required content.
#!/usr/bin/env ruby
require 'docx'
file_data = []
name_file = "test"
t = ""
array_desc = []
heading_hash = {}
all_data = {}
temp = ""
output = ""
folder_name = ""
directory_name = ""
flag = true
count = 0
md_file_name = ''
Dir.glob("**/*.docx") do |file_name|
doc = Docx::Document.open(file_name)
first_table = doc.tables[0]
doc.tables.each do |table|
table.rows.each do |row| # Row-based iteration
row.cells.each_with_index do |cell, i|
if i == 2
file_data << cell.text.gsub('=','')
end
end
end
end
file_data.each_with_index do |l, d|
if l.include? file_data[d]
if ((l.strip)[0].to_i != 0)
md_file_name = file_data[d].split(".")
#start folder name
if flag
directory_name = md_file_name[0].to_i
flag = false
end
count +=1
t = file_data[d+1]
if(array_desc.size > 0)
heading_hash[temp] = array_desc
all_data[md_file_name[0].strip] = heading_hash
array_desc = []
end
else
if(t != l)
array_desc << l
temp = t
end
end
end
end
if(array_desc.size> 0)
heading_hash[temp] = array_desc
all_data[md_file_name[0].strip] = heading_hash
array_desc = []
end
all_data.each do |k, v|
v.each do |(hk, hv)|
if hk != ""
chapter_no = k
if (k[0,1] == 0.to_s)
chapter_no = k
else
chapter_no = "0#{k}"
end
Dir.mkdir("#{chapter_no}") unless File.exists?("#{chapter_no}")
output_name = "#{chapter_no}/#{File.basename("01", '.*')}.md"
output = File.open(output_name, 'w')
# output << "#"+"#{hk}\n\n"
# output << "#{hv} \n\n"
hv.each do |des|
# puts des
end
end
end
end
end
source docx file
download above file and put sctip and docx (source file) in same folder. When you will run script form terminal ($./script.rb) you will see folder name as 01,02.....etc. And inside there will be file with md extension.
I want to output as below description:
## FOLDER 01 > FILE 01.md, here data in file like hk as heading (for Heading you can put # before hk)and hv
## FOLDER 02 > FILE 01.md, here data in file like hk as heading (for Heading you can put # before hk)and hv
Please use my code and check that is working or not.
Dir.glob("**/*.docx") do |file_name|
doc = Docx::Document.open(file_name)
first_table = doc.tables[0]
doc.tables.each do |table|
table.rows.each do |row|
row.cells.each_with_index do |cell, i|
if i == 2
file_data << cell.text.gsub('=','')
end
end
end
end
file_data.each_with_index do |l, d|
if ((l.strip)[0].to_i != 0)
md_file_name = file_data[d].split(".")
#start folder name
if flag
directory_name = md_file_name[0].to_i
flag = false
end
count +=1
t = file_data[d+1]
if(array_desc.size > 0)
heading_hash[temp] = array_desc
array_desc=[]
all_data[file_data[d+1]] = array_desc
end
else
if(t != l)
array_desc << l
temp = t
end
end
end
chapter_no = 1
all_data.each do |k, v|
Dir.mkdir("#{chapter_no}") unless File.exists?("#{chapter_no}")
output_name = "#{chapter_no}/#{File.basename("01", '.*')}.md"
output = File.open(output_name, 'a')
output << "#"+"#{k}\n\n"
v.each do |d|
output << "#{d} \n"
end
chapter_no= chapter_no+1
end
end
It will give exact output as you shared above. Let me know if you need more help.

Calculate elements in CSV file

I am new to Ruby and now I have an issue while I try to calculate some elements.
I've got 6 CSV files with the same headers and the question is how to find the total amount of payments for each payed month.
01-test.csv
Payment date,Payable month,House,Apartment,Amount of payment
2014-09-14,2014-08,Panel,84,5839.77
2014-09-14,2014-08,Brick,118,4251.63
2014-09-14,2014-08,Brick,97,471.5
2014-09-14,2014-08,Panel,53,236.22
2014-09-14,2014-08,Panel,83,4220.77
.......
02-test.csv
Payment date,Payable month,House,Apartment,Amount of payment
2014-10-01,2014-08,Brick,34,1522.59
2014-10-01,2014-08,Brick,117,1285.57
2014-10-01,2014-08,Brick,136,1925.97
2014-10-01,2014-08,Brick,24,1032.95
2014-10-01,2014-08,Brick,113,957.01
.......
Here is my code:
def create_month_array(payments)
months = []
months = payments.uniq { |a| a[:payed_for]
months
end
def payed_for_each_month(payments, months)
sums = Array.new(months.length){|a| a = 0}
months.each{|a|
if(a[:payed_for] == payments.each{|x| x[:payed_for]})
.....
end
}
p sum
sum.round(2)
end
Thanks for any hints.
Suppose the data were read from files into strings.
str1 =<<_
2014-09-14,2014-08,Panel,84,5839.77
2014-09-14,2014-08,Brick,118,4251.63
2014-09-14,2014-09,Brick,97,471.5
2014-09-14,2014-10,Panel,53,236.22
2014-09-14,2014-10,Panel,83,4220.77
_
str2 =<<_
2014-10-01,2014-08,Brick,34,1522.59
2014-10-01,2014-09,Brick,117,1285.57
2014-10-01,2014-09,Brick,136,1925.97
2014-10-01,2014-10,Brick,24,1032.95
2014-10-01,2014-11,Brick,113,957.01
_
We can then combine the strings into a single string, convert it an array of lines and then use a counting hash to aggregate values for each payable month, which I assume to be the values of the second field. See Hash::new, specifically when new is assigned an argument equal to the default value (here 0).
(str1 + str2).lines.each_with_object(Hash.new(0)) do |line,h|
_, payable_month, _, _, amount = line.split(',')
h[payable_month] += amount.to_f
end
#=> {"2014-08"=>11613.990000000002, (5839.77 + 4251.63 + 1522.59)
# "2014-09"=>3683.04, ( 471.5 + 1285.57 + 1925.97)
# "2014-10"=>5489.9400000000005, ( 236.22 + 4220.77 + 1032.95)
# "2014-11"=>957.01} ( 957.01)
If a hash h is defined
h = Hash.new(0)
Ruby expands h[payable_month] += amount.to_f to
h[payable_month] = h[payable_month] + amount.to_f
If h has no key payable_month, h[payable_month] on the right of the equality sign returns the default value. Hence,
h[payable_month] = 0 + amount.to_f
#=> amount.to_f
Note we could have alternatively written
(str1.lines + str2.lines).each_with_object(Hash.new(0))...
or we could have read each file line-by-line and written all those lines to one file.
To combine all CSV data across multiple files use the following:
csv_files = ["01-test.csv", "02-test.csv", "03-test.csv", "04-test.csv", "05-test.csv", "06-test.csv"]
csv_data = CSV.generate(headers: :first_row) do |csv|
csv << CSV.open(csv_files.first).readline
csv_files.each do |csv_file|
CSV.read(csv_file)[1..-1].each { |row| csv << row }
end
end
To then to the calculate the sum of each "Payable month" (or "Payment date",
it was not clear which was the payed month), you do the following
Interpret the data, using Ruby's CSV library
data = CSV.parse(csv_data, headers: true)
Group the data by the payed month
month_array = data.group_by { |row| row["Payable month"] }
# month_array = data.group_by { |row| row["Payment date"][0..6] }
Chose either line and comment out the other
For each month get the sum/reduce of all the "Amount of payment" into a
total for that month within our collection of totals
payed_for_each_month = month_array.each_with_object({}) do |(month, rows), totals|
totals[month] = rows.reduce(0.0) { |sum, row| sum + row["Amount of payment"].to_f }
end
This produces the final result with the presented data
payed_for_each_month
# => {"2014-08"=>21743.98}
If "Payment date" month was used instead the totals would produce the following:
month_array = data.group_by { |row| row["Payment date"][0..6] }
# ...
payed_for_each_month
# => {"2014-09"=>15019.890000000001,
# "2014-10"=>6724.09}
All the code together:
data = CSV.parse(csv_data, headers: true)
month_array = data.group_by { |row| row["Payable month"] }
# month_array = data.group_by { |row| row["Payment date"][0..6] }
payed_for_each_month = month_array.each_with_object({}) do |(month, rows), totals|
totals[month] = rows.reduce(0.0) { |sum, row| sum + row["Amount of payment"].to_f }
end
payed_for_each_month
# => {"2014-08"=>21743.98}
References:
group_by
reduce
each_with_object

Read one column from csv and make new column with previous column data

In "id" column in source image, first number before "-" id chapter number in target image and after "-" that is verse in target image. In verse I am creating chunk of data from after "-". I tried with few line bit not understanding how I should move ahead.
I need to read this source column image data from csv file.
And want to make this format target column image
chapter = []
verse = []
CSV.foreach('test_file.csv') do |row|
if (row[3] != "id" && row[3] != nil)
chapter << row[3].partition("-").first
verse << row[3].partition("-").last
end
end
I have wrote answer below.
I am reading csv file with same content.
chapter = []
verse = []
temp = []
pos = 0
CSV.foreach("test_file.csv") do |row|
puts row
if (row[1] != "id" && row[1] != nil)
chapter << row[1].partition("-").first
if(temp.size>0)
no = row[1].partition("-").last
verse_no = (no.to_i - 1)
updated_verse = verse_no < 10 ? "0"+verse_no.to_s : verse_no.to_s
verse.insert(pos, temp[pos]+ "-" + updated_verse)
temp << no
pos = pos + 1
else
temp << row[1].partition("-").last
end
end
end
puts verse
puts chapter

local variable vs instance variable Ruby initialize

I have a class in Ruby where I pass in a Hash of commodity prices. They are in the form
{"date (string)" => price (float), etc, etc}
and in the initialise method I convert the dates to Dates like so:
#data = change_key_format(dates)
But I notice that that method seems to change the original argument as well. Why is that? Here is the code:
def initialize(commodity_name, data)
puts "creating ...#{commodity_name}"
#commodity_name = commodity_name
#data = change_hash_keys_to_dates(data)
#dates = array_of_hash_keys(data)
puts data ######## UNCHANGED
#data = fix_bloomberg_dates(#data, #dates)
puts data ######## CHANGED -------------------- WHY???
#get_price_data
end
def fix_bloomberg_dates(data, dates)
#Fixes the bad date from bloomberg
data.clone.each do |date, price|
#Looks for obvious wrong date
if date < Date.strptime("1900-01-01")
puts dates[1].class
date_gap = (dates[1] - dates[2]).to_i
last_date_day = dates[1].strftime("%a %d %b")
last_date_day = last_date_day.split(" ")
last_date_day = last_date_day[0].downcase
#Correct the data for either weekly or daily prices
#Provided there are no weekend prices
if date_gap == 7 && last_date_day == "fri"
new_date = dates[1] + 7
data[new_date] = data.delete(date)
elsif date_gap == 1 && last_date_day == "thu"
new_date = dates[1] + 4
data[new_date] = data.delete(date)
else
new_date = dates[1] + 1
data[new_date] = data.delete(date)
end
end
end
return data
end
def change_hash_keys_to_dates(hash)
hash.clone.each do |k,v|
date = Date.strptime(k, "%Y-%m-%d")
#Transforms the keys from strings to dates format
hash[date] = hash.delete(k)
end
return hash
end
def array_of_hash_keys(hash)
keys = hash.map do |date, price|
date
end
return keys
end
Because of these lines:
data[new_date] = data.delete(date)
You're modifying the original data object. If you don't want to do this, create a copy of the object:
data2 = data.clone
and then replace all other references to data with data2 in your method (including return data2).

Ruby Array Variable Reference Lost During Loop

I am writing a parsing routine in Ruby 2.1 for a spreadsheet. The code works properly through the first array of pricing data. Unfortunately, on the fifth loop through datatable, while processing the second set of pricing data, the variable termtable is not set, even though #tmptermtables is modified by the shift method in this statement on line 72: termtable = #tmptermtables.shift if termtable.empty? This is possibly a scope problem and I am hoping someone can explain to me why the reference is lost.
Below is a copy of the code. Thank you in advance for lending me your brain.
def sp_parser()
begin
pricing_date = Date.new(2014,2,7)
tz = DateTime.parse(Time.now.to_s).strftime('%z')
expires = DateTime.new(pricing_date.year,pricing_date.mon,pricing_date.mday,17,00,00,tz)
datatable = Array.new
datatable << ["Zone","Business - Low Load Factor",nil,nil,nil,"Business - Medium Load Factor",nil,nil,nil,"Business - High Load Factor",nil,nil,nil]
datatable << [nil,6,9,12,15,6,9,12,15,6,9,12,15]
datatable << [nil,"Daily Pricing",nil,nil,nil,"Daily Pricing",nil,nil,nil,"Daily Pricing",nil,nil,nil]
datatable << ["COAST",6.41,6.55,6.19,6.01,6.07,6.18,5.88,5.74,5.63,5.71,5.48,5.37]
datatable << ["NORTH",6.58,6.74,6.35,6.15,6.02,6.13,5.85,5.68,5.61,5.68,5.47,5.33]
datatable << [nil,3/1/2014,nil,nil,nil,3/1/2014,nil,nil,nil,3/1/2014,nil,nil,nil]
datatable << ["COAST",7.08,6.53,6.20,6.00,6.63,6.17,5.89,5.73,6.06,5.69,5.49,5.36]
datatable << ["NORTH",7.34,6.72,6.36,6.13,6.60,6.10,5.86,5.66,6.06,5.65,5.48,5.31]
loadprofiles = Array.new
termtables = Array.new
pvalue = 0
load_factor_found = false
daily_pricing_found = false
dataset = []
datatable.each_index {|row|
record = datatable[row]
termtable = Array.new
#tmptermtables = Array.new(termtables)
#tmploadprofiles = Array.new(loadprofiles)
record.each_index {|col|
val = record[col]
## Build the load profile table
loadprofiles << "LOW" if val.to_s.downcase.match(/ low/)
loadprofiles << "MEDIUM" if val.to_s.downcase.match(/medium/)
loadprofiles << "HIGH" if val.to_s.downcase.match(/high/)
load_factor_found = true if val.to_s.downcase.match(/load factor/)
daily_pricing_found = true if val.to_s.downcase.match(/daily pricing/)
## Build the term tables for each load profile
if load_factor_found and !daily_pricing_found
isinteger = val.is_a? Integer
if isinteger
cvalue = val
if cvalue > pvalue
termtable << cvalue
pvalue = cvalue
termtables << termtable if col == record.length - 1
else
unless termtable.empty?
termtables << termtable
termtable = []
termtable << cvalue
pvalue = cvalue
end
end
else
cvalue = 0
end
end
if daily_pricing_found
#start_date = pricing_date if val.to_s.downcase.match(/daily pricing/)
#start_date = val if val.is_a? Date
#zone = "CenterPoint" if val.to_s.downcase.match(/coast/)
#zone = "Oncor" if val.to_s.downcase.match(/north/)
if val.is_a? Float
#load = #tmploadprofiles.shift if termtable.empty?
# Here is where it breaks
termtable = #tmptermtables.shift if termtable.empty?
term = termtable.shift unless termtable.empty?
price = (val/100).round(4)
r = {
:loaded => Time.now,
:start => #start_date,
:load => #load,
:term => term,
:zone => #zone,
:price => price,
:expiration => expires,
:product => "Fixed"
}
dataset << r
end
end
}
}
return dataset
rescue => err
puts "\n" + DateTime.parse(Time.now.to_s).strftime("%Y-%m-%d %r") + " Exception: #{__callee__} in #{__FILE__} generated an error: #{err}\n"
err
end
end
x = sp_parser()

Resources