By re-opening script, processing speed decreases - performance

This is the code I have written.
It parses an xml and turns on lights when a certain condition is true.
The problem I have is, if I restart the script it takes longer to read out or process the xml. If I restart again, it takes even longer. So at some point it takes 10 or more seconds till one cycle is through.
def core():
import urllib #import urllib.request
x = 0
while True:
### XML Extraction ###
from xml.dom import minidom
xml = urllib.urlopen("http://192.168.60.242/xml") # xml = urllib.request.urlopen("http://192.168.60.242/xml")
xml_string = xml.read()
xml.close()
re_string = xml_string[130:4000]
re_string = re_string.replace('</TEXTAREA></FORM></BODY></HTML>', '') #zwecks inkompatibilität mit Python 3.5, muss hier eine Änderung vorgenommen werden
#parsing
xmldoc = minidom.parseString(re_string)
Sensor0Elm = xmldoc.getElementsByTagName('t0')
Sensor1Elm = xmldoc.getElementsByTagName('t1')
#Sensor2Elm = xmldoc.getElementsByTagName('t2')
Sensor0Elm = Sensor0Elm[0]
Sensor1Elm = Sensor1Elm[0]
#Sensor2Elm = Sensor2Elm[0]
Sensor0 = Sensor0Elm.childNodes[0].data
Sensor1 = Sensor1Elm.childNodes[0].data
#Sensor2 = Sensor2Elm.childNodes[0].data
Sensor0 = float(Sensor0)
Sensor1 = float(Sensor1)
#Sensor2 = float(Sensor2)
#Datenaufbereitung
print (Sensor0*100.000000000001)
print (Sensor1*100.000000000001)
#print (Sensor2*100)
### int to bin ###
Sensor0=bin(int(Sensor0*100.000000000001))
Sensor1=bin(int(Sensor1*100.000000000001))
#Sensor2=bin(int(Sensor2*100))
Sensor0 = Sensor0[2:]
Sensor1 = Sensor1[2:]
#Sensor2 = Sensor2[2:]
Sensor0_count_int = int(len(str(Sensor0)))
Sensor1_count_int = int(len(str(Sensor1)))
#Sensor2_count_int = int(len(str(Sensor2)))
Sub0 = int(8 - Sensor0_count_int)
Sub1 = int(8 - Sensor1_count_int)
#Sub2 = int(8 - Sensor2_count_int)
Sensor0_compl = (str(Sub0*"0")+Sensor0)
Sensor1_compl = (str(Sub1*"0")+Sensor1)
#Sensor2_compl = (str(Sub2*"0")+Sensor2)
x = x+1
print (">>>", x ,"<<<")
print (Sensor0_compl)
print (Sensor1_compl)
#print (Sensor2_compl)
#############################
# import RPi.GPIO as GPIO
# GPIO.setmode(GPIO.BCM)
# GPIO.setup(4,GPIO.OUT)
# GPIO.setup(5,GPIO.OUT)
# GPIO.setup(6,GPIO.OUT)
# GPIO.setup(12,GPIO.OUT)
# GPIO.setup(13,GPIO.OUT)
# GPIO.setup(16,GPIO.OUT)
# GPIO.setup(17,GPIO.OUT)
# GPIO.setup(18,GPIO.OUT)
# GPIO.setup(19,GPIO.OUT)
# GPIO.setup(20,GPIO.OUT)
# GPIO.setup(21,GPIO.OUT)
# GPIO.setup(22,GPIO.OUT)
# GPIO.setup(23,GPIO.OUT)
# GPIO.setup(24,GPIO.OUT)
# GPIO.setup(25,GPIO.OUT)
#############################
#Sensor0
# AgCh3A=(Sensor0_compl[0:1])
# if AgCh3A=="0":
# GPIO.output(4,True)
# else:
# GPIO.output(4,False)
# AgPro2=(Sensor0_compl[1:2])
# if AgPro2=="0":
# GPIO.output(5,True)
# else:
# GPIO.output(5,False)
# CharRo440=(Sensor0_compl[2:3])
# if CharRo440=="0":
# GPIO.output(6,True)
# else:
# GPIO.output(6,False)
# AgInnoC=(Sensor0_compl[3:4])
# if AgInnoC=="0":
# GPIO.output(12,True)
# else:
# GPIO.output(12,False)
# AgInnoB=(Sensor0_compl[4:5])
# if AgInnoB=="0":
# GPIO.output(13,True)
# else:
# GPIO.output(13,False)
# ZK700=(Sensor0_compl[5:6])
# if ZK700=="0":
# GPIO.output(16,True)
# else:
# GPIO.output(16,False)
# AgF3000=(Sensor0_compl[6:7])
# if AgF3000=="0":
# GPIO.output(17,True)
# else:
# GPIO.output(17,False)
# ZK1200=(Sensor0_compl[7:8])
# if ZK1200=="0":
# GPIO.output(18,True)
# else:
# GPIO.output(18,False)
#Sensor1
# AgProV3=(Sensor1_compl[3:4])
# if AgProV3=="0":
# GPIO.output(19,True)
# else:
# GPIO.output(19,False)
# MakWPG1=(Sensor1_compl[4:5])
# if MakWPG1=="0":
# GPIO.output(20,True)
# else:
# GPIO.output(20,False)
# AgExcell2eC=(Sensor1_compl[5:6])
# if AgExcell2eC=="0":
# GPIO.output(21,True)
# else:
# GPIO.output(21,False)
# AgCh2eC=(Sensor1_compl[6:7])
# if AgCh2eC=="0":
# GPIO.output(22,True)
# else:
# GPIO.output(22,False)
# AgCh3B=(Sensor1_compl[7:8])
# if AgCh3B=="0":
# GPIO.output(23,True)
# else:
# GPIO.output(23,False)
#################################
# MM5=(Sensor1_compl[5:6])
# if MM5=="0":
# GPIO.output(24,True)
# else:
# GPIO.output(24,False)
# MM6=(Sensor1_compl[6:7])
# if MM6=="0":
# GPIO.output(25,True)
# else:
# GPIO.output(25,False)
# MM7=(Sensor2_compl[0:1])
# if MM7=="0":
# GPIO.output(1,True)
# else:
# GPIO.output(1,False)
#Sensor2
# MMM0=int(Sensor2[2:3])
# if M0==1:
# GPIO.output(1,True)
# else:
# GPIO.output(1,False)
# MMM1=int(Sensor2[3:4])
# if M0==1:
# GPIO.output(1,True)
# else:
# GPIO.output(1,False)
# MMM2=int(Sensor2[4:5])
# if M0==1:
# GPIO.output(1,True)
# else:
# GPIO.output(1,False)
# MMM3=int(Sensor2[5:6])
# if M0==1:
# GPIO.output(1,True)
# else:
# GPIO.output(1,False)
# MMM4=int(Sensor2[6:7])
# if M0==1:
# GPIO.output(1,True)
# else:
# GPIO.output(1,False)
# MMM5=int(Sensor2[7:8])
# if M0==1:
# GPIO.output(1,True)
# else:
# GPIO.output(1,False)
# MMM6=int(Sensor2[8:9])
# if M0==1:
# GPIO.output(1,True)
# else:
# GPIO.output(1,False)
# MMM7=int(Sensor2[9:10])
# if M0==1:
# GPIO.output(1,True)
# else:
# GPIO.output(1,False)
#import time
#time.sleep(1)
#GPIO.cleanup()
def main():
import time
while True:
try:
core()
except:
continue
time.sleep(0.01)
main()
Does somebody have an idea where my issue originates from?
Thank you

I suspect that the script isn't closing when you think it is. The obvious way to check for this is to see if pythonw.exe is still running in task manager.
To be really, really,sure you could create a file in the 'continue' block of your try/except:
with open('filepath\file.txt','w') as myfile:
pass
Then when you think you've killed the script, delete this file and see if it comes back. You'd have to increase your timeout to 1s or so to avoid flooding your system with file-creation requests as well.
Two thing to try first though:
Get rid of the while True at the top of core(), it seems unnecessary
Reduce the frequency of your calling loop, to, say once or twice a second

Related

Ruby: Checking a value within a csv list of multiple arrays and showing which array has that value

require 'csv'
########
## Ask for Serial number
########
serial_number = ask("Product serial number?")
serial_number = serial_number.to_s
serial_number = serial_number.upcase
stamp_date= Time.now
old = Time.now.to_i
##########
##Check if file exist in directory
##########
if File.exist? ('procedures/Serial.csv')
#########
#Check serial number exist in CSV
#########
file = File.open("procedures/Serial.csv","r")
items = []
while (line1 = file.gets)
arr = line1.split(',')
items.push ({"Product Number": arr[0],"Time Used": arr[1], "Time in Secs": arr[2]})
end
file.close
checklist = items.inspect
puts checklist
repeat = checklist.include?serial_number
puts repeat
if repeat == true
#prompt("Exist")
#Thinking I should insert that part of code here to check if the value exist within the csv file.
exit
else
#prompt("Does not exist")
########
#If serial number does not exist, create it within the csv
########
######
#Append value into the csv
#######
CSV.open("procedures/Serial.csv","a+") do |csv|
csv << [ serial_number, stamp_date, old]
end
###############
## Read the file
###############
file = File.open("procedures/Serial.csv","r")
items = []
while (line = file.gets)
arr = line.split(',')
items.push ({"Product Number": arr[0],"Time Used": arr[1], "Time in Secs": arr[2]})
end
file.close
puts items.inspect
end
else
#######
##Insert Serial number into file
#######
CSV.open("procedures/Serial.csv","wb") do |csv|
csv << [ serial_number, stamp_date, old]
end
###############
## Read the file
###############
file = File.open("procedures/Serial.csv","r")
items = []
while (line = file.gets)
arr = line.split(',')
items.push ({"Product Number": arr[0],"Time Used": arr[1], "Time in Secs": arr[2]})
end
file.close
puts items.inspect
end
Currently, I am asking the user to input a value and checking that value exist within the csv file in the first column or in this case...arr[0] in every array.
I was wondering if anyone know how to pull that specific array out from the list of array in the csv file? The goal is to pull that specific array out and replace the value within the array with a new Time.

How to split a string like the following example in ruby? [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 6 years ago.
Improve this question
I want to convert an arbitrary string to an array of strings. The conversion is best explained with an example. If the string were "8737928" I would like to return the following array.
#=> ["8737928",
# "8.737928", "87.37928", "873.7928", "8737.928", "87379.28", "873792.8",
# "8.7.37928", "8.73.7928", "8.737.928", "8.7379.28", "8.73792.8",
# "87.3.7928", "87.37.928", "87.379.28", "87.3792.8", "873.7.928",
# "873.79.28", "873.792.8", "8737.9.28", "8737.92.8", "87379.2.8",
# "8.7.3.7928", "8.7.37.928", "8.7.379.28", "8.7.3792.8", "8.73.7.928",
# "8.73.79.28", "8.73.792.8", "8.737.9.28", "8.737.92.8", "8.7379.2.8",
# "87.3.7.928", "87.3.79.28", "87.3.792.8", "87.37.9.28", "87.37.92.8",
# "87.379.2.8", "873.7.9.28", "873.7.92.8", "873.79.2.8", "8737.9.2.8",
# "8.7.3.7.928", "8.7.3.79.28", "8.7.3.792.8", "8.7.37.9.28", "8.7.37.92.8",
# "8.7.379.2.8", "8.73.7.9.28", "8.73.7.92.8", "8.73.79.2.8", "8.737.9.2.8",
# "87.3.7.9.28", "87.3.7.92.8", "87.3.79.2.8", "87.37.9.2.8", "873.7.9.2.8",
# "8.7.3.7.9.28", "8.7.3.7.92.8", "8.7.3.79.2.8", "8.7.37.9.2.8",
# "8.73.7.9.2.8", "87.3.7.9.2.8",
# "8.7.3.7.9.2.8"]
As you see, between 0 and 6 ("8737928".size-1 #=> 6) decimal points are inserted in the string, at every combination of indices between 1 and 6. Since a decimal point may or may not follow each character other than the last, the array contains 2**6 #=> 64 elements.
How can I do this?
def doit(str)
indices = (1..str.size-1).to_a
indices.each_with_object([str]) { |ndots, combos|
indices.combination(ndots).each { |sub| combos << dotify(str, sub) } }
end
def dotify(str, indices)
indices.reverse.each_with_object(str.dup) { |i,s| s.insert(i,'.') }
end
doit("8737928").size
#=> 64
doit "8737928"
#=> ["8737928",
# "8.737928", "87.37928", "873.7928", "8737.928", "87379.28", "873792.8",
# "8.7.37928", "8.73.7928", "8.737.928", "8.7379.28", "8.73792.8",
# "87.3.7928", "87.37.928", "87.379.28", "87.3792.8", "873.7.928",
# "873.79.28", "873.792.8", "8737.9.28", "8737.92.8", "87379.2.8",
# "8.7.3.7928", "8.7.37.928", "8.7.379.28", "8.7.3792.8", "8.73.7.928",
# "8.73.79.28", "8.73.792.8", "8.737.9.28", "8.737.92.8", "8.7379.2.8",
# "87.3.7.928", "87.3.79.28", "87.3.792.8", "87.37.9.28", "87.37.92.8",
# "87.379.2.8", "873.7.9.28", "873.7.92.8", "873.79.2.8", "8737.9.2.8",
# "8.7.3.7.928", "8.7.3.79.28", "8.7.3.792.8", "8.7.37.9.28", "8.7.37.92.8",
# "8.7.379.2.8", "8.73.7.9.28", "8.73.7.92.8", "8.73.79.2.8", "8.737.9.2.8",
# "87.3.7.9.28", "87.3.7.92.8", "87.3.79.2.8", "87.37.9.2.8", "873.7.9.2.8",
# "8.7.3.7.9.28", "8.7.3.7.92.8", "8.7.3.79.2.8", "8.7.37.9.2.8",
# "8.73.7.9.2.8", "87.3.7.9.2.8",
# "8.7.3.7.9.2.8"]
Note:
dotify("8737928", [1,3,5])
#=> "8.73.79.28"
Solution 1
Upon further reflection (see original, probably incorrect solution below), it seems like what OP really wants to do is insert dots at every possible combination of positions in the string. Here's a method that does literally that:
def splits(str, prefix="")
c = str.size - 1
(0..c).flat_map do |m|
(0...c).to_a.combination(m).map do |n|
n.each_with_object(str.dup) {|i,s| s.insert(c-i, ?.) }
end
end
end
puts splits("8737928")
# => 8737928
# 873792.8
# 87379.28
# 87379.2.8
# 8737.928
# 8737.92.8
# 8737.9.28
# 8737.9.2.8
# 873.7928
# 873.792.8
# 873.79.28
# 873.79.2.8
# 873.7.928
# 873.7.92.8
# 873.7.9.28
# 873.7.9.2.8
# 87.37928
# 87.3792.8
# 87.379.28
# 87.379.2.8
# 87.37.928
# 87.37.92.8
# 87.37.9.28
# 87.37.9.2.8
# 87.3.7928
# 87.3.792.8
# 87.3.79.28
# 87.3.79.2.8
# 87.3.7.928
# 87.3.7.92.8
# 87.3.7.9.28
# 87.3.7.9.2.8
# 8.737928
# 8.73792.8
# 8.7379.28
# 8.7379.2.8
# 8.737.928
# 8.737.92.8
# 8.737.9.28
# 8.737.9.2.8
# 8.73.7928
# 8.73.792.8
# 8.73.79.28
# 8.73.79.2.8
# 8.73.7.928
# 8.73.7.92.8
# 8.73.7.9.28
# 8.73.7.9.2.8
# 8.7.37928
# 8.7.3792.8
# 8.7.379.28
# 8.7.379.2.8
# 8.7.37.928
# 8.7.37.92.8
# 8.7.37.9.28
# 8.7.37.9.2.8
# 8.7.3.7928
# 8.7.3.792.8
# 8.7.3.79.28
# 8.7.3.79.2.8
# 8.7.3.7.928
# 8.7.3.7.92.8
# 8.7.3.7.9.28
# 8.7.3.7.9.2.8
Solution 2
However, while #EliSadoff's solution wasn't generalized, I did like his "idea that each spot a period can be is a boolean decision." If we think of the positions in the string at which we could insert a period as bits in a binary number m with the same (base 2) length as the string less one, we can simply iterate from 0 to 2(c-1)-1 (where c is the length of the string) to get every possible such number. For example, if our string is "abcd" (c = 4), then we can iterate from 0 to 7 (2(4-1)-1) to find the positions of each period:
m₁₀ | m₂ 4 2 1 | 4 2 1 | result
─────┼────┴─┴─┴─┼───┴───┴───┴───┼─────────
0 │ 0 0 0 │ a b c d │ abcd
1 │ 0 0 1 │ a b c • d | abc.d
2 │ 0 1 0 │ a b • c d | ab.cd
3 │ 0 1 1 │ a b • c • d | ab.c.d
4 │ 1 0 0 │ a • b c d | a.bcd
5 │ 1 0 1 │ a • b c • d | a.bc.d
6 │ 1 1 0 │ a • b • c d | a.b.cd
7 │ 1 1 1 │ a • b • c • d | a.b.c.d
The only missing piece is inserting periods based on the bits in the second column. That's pretty easy: To figure out if we need to insert a period at position n, we test if the nth bit in m is 1. To do that we can use the bitwise operation m & (1 ≪ n).
Put it all together and we get the following:
def splits2(str)
c = str.size - 1
(0...2**c).map do |m|
0.upto(c).with_object(str.dup) do |i,s|
s.insert(c-i, ?.) if m & (1 << i) > 0
end
end
end
Solution 3
Just for fun, here's another solution that also uses the binary number approach, but in a different way. I'll leave it as an exercise to the reader to figure out how it works:
def splits3(str)
c = str.size - 1
(0...2**c).map do |m|
dots = ("%*b" % [c,m]).each_char.map(&{?1=>?.})
str.each_char.zip(dots).join
end
end
Original solution
Similar to #CarySwoveland's solution but, I think, a bit simpler:
def splits(str, pfx="")
return [] if str.empty?
(1...str.size).map {|i| pfx + str.dup.insert(i, ?.) } +
splits(str[1..-1], "#{pfx}#{str[0]}.")
end
p splits("8737928")
# => [ "8.737928", "87.37928", "873.7928", "8737.928", "87379.28", "873792.8",
# "8.7.37928", "8.73.7928", "8.737.928", "8.7379.28", "8.73792.8",
# "8.7.3.7928", "8.7.37.928", "8.7.379.28", "8.7.3792.8",
# "8.7.3.7.928", "8.7.3.79.28", "8.7.3.792.8",
# "8.7.3.7.9.28", "8.7.3.7.92.8",
# "8.7.3.7.9.2.8"
# ]
The requirements are unclear, and I came up with a result that differs from what both Cary and Jordan have:
def dot_it(prefix, suffix = nil)
return dot_it(prefix[0], prefix[1..-1]) if suffix.nil? # first call
(1...suffix.length).flat_map do |i|
sp, ss = "#{prefix}.#{suffix[0...i]}", suffix[i..-1]
["#{sp}.#{ss}", dot_it(sp, ss)].flatten.compact
end
end
dot_it("8737928")
#⇒ ["8.7.37928", "8.7.3.7928", "8.7.3.7.928", "8.7.3.7.9.28",
# "8.7.3.7.9.2.8", "8.7.3.7.92.8", "8.7.3.79.28", "8.7.3.79.2.8",
# "8.7.3.792.8", "8.7.37.928", "8.7.37.9.28", "8.7.37.9.2.8",
# "8.7.37.92.8", "8.7.379.28", "8.7.379.2.8", "8.7.3792.8",
# "8.73.7928", "8.73.7.928", "8.73.7.9.28", "8.73.7.9.2.8",
# "8.73.7.92.8", "8.73.79.28", "8.73.79.2.8", "8.73.792.8",
# "8.737.928", "8.737.9.28", "8.737.9.2.8", "8.737.92.8",
# "8.7379.28", "8.7379.2.8", "8.73792.8"]
My method gives:
dot_it("8737928").count
#⇒ 31
while both answers above give 21 results. Who is right?

scrapy response.xpath() cause memory leaking

i found response.xpath() method leaking memory while using scrapy to write a spider. here is the code:
def extract_data(self, response):
aomen_host_water = None
aomen_pankou = None
aomen_guest_water = None
sb_host_water = None
sb_pankou = None
sb_guest_water = None
# response.xpath('//div[#id="webmain"]/table[#id="odds"]/tr')
# for tr in all_trs:
# # cname(company name)
# cname = tr.xpath('td[1]/text()').extract()
# if len(cname) == 0:
# continue
# # remove extra space and other stuff
# cname = cname[0].split(' ')[0]
# if cname == u'澳彩':
# aomen_host_water = tr.xpath('td[9]/text()').extract()
# if len(aomen_host_water) != 0:
# aomen_pankou = tr.xpath('td[10]/text()').extract()
# aomen_guest_water = tr.xpath('td[11]/text()').extract()
# else:
# aomen_host_water = tr.xpath('td[6]/text()').extract()
# aomen_pankou = tr.xpath('td[7]/text()').extract()
# aomen_guest_water = tr.xpath('td[8]/text()').extract()
# elif cname == u'SB':
# sb_host_water = tr.xpath('td[9]/text()').extract()
# if len(sb_host_water) != 0:
# sb_pankou = tr.xpath('td[10]/text()').extract()
# sb_guest_water = tr.xpath('td[11]/text()').extract()
# else:
# sb_host_water = tr.xpath('td[6]/text()').extract()
# sb_pankou = tr.xpath('td[7]/text()').extract()
# sb_guest_water = tr.xpath('td[8]/text()').extract()
# if (aomen_host_water is None) or (aomen_pankou is None) or (aomen_guest_water is None) or \
# (sb_host_water is None) or (sb_pankou is None) or (sb_guest_water is None):
# return None
# if (len(aomen_host_water) == 0) or (len(aomen_pankou) == 0) or (len(aomen_guest_water) == 0) or \
# (len(sb_host_water) == 0) or (len(sb_pankou) == 0) or (len(sb_guest_water) == 0):
# return None
# item = YPItem()
# item['aomen_host_water'] = float(aomen_host_water[0])
# item['aomen_pankou'] = aomen_pankou[0].encode('utf-8') # float(pankou.pankou2num(aomen_pankou[0]))
# item['aomen_guest_water'] = float(aomen_guest_water[0])
# item['sb_host_water'] = float(sb_host_water[0])
# item['sb_pankou'] = sb_pankou[0].encode('utf-8') # float(pankou.pankou2num(sb_pankou[0]))
# item['sb_guest_water'] = float(sb_guest_water[0])
item = YPItem()
item['aomen_host_water'] = 1.0
item['aomen_pankou'] = '111' # float(pankou.pankou2num(aomen_pankou[0]))
item['aomen_guest_water'] = 1.0
item['sb_host_water'] = 1.0
item['sb_pankou'] = '111' # float(pankou.pankou2num(sb_pankou[0]))
item['sb_guest_water'] = 1.0
return item
here i commented the useful statements and used fake data, spider used about 45M memory, when i uncommented the commented lines, spider used 100+M memory and the memory usage continuously rises. Did somebody met this kind of problem before ?
You might decrease the memory usage by switching to extract_first() instead of extract() which would create unnecessary lists.
I would also upgrade scrapy and lxml to the latest versions:
pip install --upgrade scrapy
pip install --upgrade lxml

how to separate this text into a hash ruby

sorry my bad english, im new
i have this document.txt
paul gordon,jin kazama,1277,1268,21-12,21-19
yoshimistu,the rock,2020,2092,21-9,21-23,25-27
... lot more
i mean, how to strip each line, and comma sparator, into a hash like this
result = {
line_num: { name1: "paula wood", name2: "sarah carnley", m1: 1277, m2: 1268, sc1: 21, sc2: 12, sc3: 21, sc4: 19 }
}
i try to code like this
im using text2re for regex here
doc = File.read("doc.txt")
lines = doc.split("\n")
counts = 0
example = {}
player1 = '((?:[a-z][a-z]+))(.)((?:[a-z][a-z]+))'
player2 = '((?:[a-z][a-z]+))(.)((?:[a-z][a-z]+))'
re = (player1 + player2 )
m = Regexp.new(re, Regexp::IGNORECASE)
lines.each do |line|
re1='((?:[a-z][a-z]+))' # Word 1
re2='(.)' # Any Single Character 1
re3='((?:[a-z][a-z]+))' # Word 2
re4='(.)' # Any Single Character 2
re5='((?:[a-z][a-z]+))' # Word 3
re6='(.)' # Any Single Character 3
re7='((?:[a-z][a-z]+))' # Word 4
re=(re1+re2+re3+re4+re5+re6+re7)
m=Regexp.new(re,Regexp::IGNORECASE);
if m.match(line)
word1=m.match(line)[1];
c1=m.match(line)[2];
word2=m.match(line)[3];
c2=m.match(line)[4];
word3=m.match(line)[5];
c3=m.match(line)[6];
word4=m.match(line)[7];
counts += 1
example[counts] = word1+word2
puts example
end
end
# (/[a-z].?/)
but the output does not match my expectation
1=>"", 2=>"indahdelika", 3=>"masam",
..more
Your data is comma-separated, so use the CSV class instead of trying to roll your own parser. There are dragons waiting for you if you try to split simply using commas.
I'd use:
require 'csv'
data = "paul gordon,jin kazama,1277,1268,21-12,21-19
yoshimistu,the rock,2020,2092,21-9,21-23,25-27
"
hash = {}
CSV.parse(data).each_with_index do |row, i|
name1, name2, m1, m2, sc1_2, sc3_4 = row
sc1, sc2 = sc1_2.split('-')
sc3, sc4 = sc3_4.split('-')
hash[i] = {
name1: name1,
name2: name2,
m1: m1,
m2: m2,
sc1: sc1,
sc2: sc2,
sc3: sc3,
sc4: sc4,
}
end
Which results in:
hash
# => {0=>
# {:name1=>"paul gordon",
# :name2=>"jin kazama",
# :m1=>"1277",
# :m2=>"1268",
# :sc1=>"21",
# :sc2=>"12",
# :sc3=>"21",
# :sc4=>"19"},
# 1=>
# {:name1=>"yoshimistu",
# :name2=>"the rock",
# :m1=>"2020",
# :m2=>"2092",
# :sc1=>"21",
# :sc2=>"9",
# :sc3=>"21",
# :sc4=>"23"}}
Since you're reading from a file, modify the above a bit using the "Reading from a file a line at a time" example in the documentation.
If the numerics need to be integers, tweak the hash definition to:
hash[i] = {
name1: name1,
name2: name2,
m1: m1.to_i,
m2: m2.to_i,
sc1: sc1.to_i,
sc2: sc2.to_i,
sc3: sc3.to_i,
sc4: sc4.to_i,
}
Which results in:
# => {0=>
# {:name1=>"paul gordon",
# :name2=>"jin kazama",
# :m1=>1277,
# :m2=>1268,
# :sc1=>21,
# :sc2=>12,
# :sc3=>21,
# :sc4=>19},
# 1=>
# {:name1=>"yoshimistu",
# :name2=>"the rock",
# :m1=>2020,
# :m2=>2092,
# :sc1=>21,
# :sc2=>9,
# :sc3=>21,
# :sc4=>23}}
# :sc4=>"23"}}
This is another way you could do it. I have made no assumptions about the number of items per line which are to be the values of :namex, :scx or :mx, or the order of those items.
Code
def hashify(str)
str.lines.each_with_index.with_object({}) { |(s,i),h| h[i] = inner_hash(s) }
end
def inner_hash(s)
n = m = sc = 0
s.split(',').each_with_object({}) do |f,g|
case f
when /[a-zA-Z].*/
g["name#{n += 1}".to_sym] = f
when /\-/
g["sc#{sc += 1}".to_sym], g["sc#{sc += 1}".to_sym] = f.split('-').map(&:to_i)
else
g["m#{m += 1}".to_sym] = f.to_i
end
end
end
Example
str = "paul gordon,jin kazama,1277,1268,21-12,21-19
yoshimistu,the rock,2020,2092,21-9,21-23,25-27"
hashify(str)
#=> {0=>{:name1=>"paul gordon", :name2=>"jin kazama",
# :m1=>1277, :m2=>1268,
# :sc1=>21, :sc2=>12, :sc3=>21, :sc4=>19},
# 1=>{:name1=>"yoshimistu", :name2=>"the rock",
# :m1=>2020, :m2=>2092,
# :sc1=>21, :sc2=>9, :sc3=>21, :sc4=>23, :sc5=>25, :sc6=>27}
# }

How to write code ruby to collect data while run loop condition

I am quit new in ruby and I need your help.
Now I want to write ruby code to collect some data while looping.
I have 2 code for this work.
My objective is collect sum score from text that split from input file.
-first, run test_dialog.rb
-Second, change input file for this format
from
AA:0.88:320:800|BB:0.82:1040:1330|CC:0.77:1330:1700 enquire-privilege_card
to
AA 0.88
BB 0.82
CC 0.77
-Then use each text that separate check on dialog condition. If this data appear in dialog ,store point until end of text (AA --> BB --> CC)
-Finally get average score.
I have problem will separating and use loop for collect point in same time.
Please help.
Best regard.
PS.
score will return if match with dialog
score of input line 1 should be (0.88+0.82+0.77/3) [match condition 1].
if no match, no score return.
Input data
AA:0.88:320:800|BB:0.82:1040:1330|CC:0.77:1330:1700 enquire-privilege_card
BB:0.88:320:800|EE:0.82:1040:1330|FF:0.77:1330:1700 enquire-privilege_card
EE:0.88:320:800|QQ:0.82:1040:1330|AA:0.77:1330:1700|RR:0.77:1330:1700|TT:0.77:1330:1700 enquire-privilege_card
test_dialog.rb
#!/usr/bin/env ruby
# encoding: UTF-8
#
# Input file:
# hyp(with confidence score), ref_tag
#
# Output:
# hyp, ref_tag, hyp_tag, result
#
require_relative 'dialog'
require_relative 'version'
unless ARGV.length > 0
puts 'Usage: ruby test_dialog.rb FILENAME [FILENAME2...]'
exit(1)
end
counter = Hash.new{|h,k| h[k]=Hash.new{|h2,k2| h2[k2]=Hash.new{|h3,k3| h3[k3]=0}}}
thresholds = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
puts %w(hyp ref_tag hyp_tag result).join("\t")
ARGV.each do |fname|
open(fname, 'r:UTF-8').each do |line|
hyp, ref_tag = line.strip.split(/\t/)
key = if ref_tag == "(reject)"
:reject
else
:accept
end
counter[fname][key][:all] += 1
thresholds.each do |threshold|
hyp_all = get_response_text(hyp, threshold)
hyp_tag = if hyp_all==:reject
"(reject)"
else
hyp_all.split(/,/)[1]
end
result = ref_tag==hyp_tag
counter[fname][key][threshold] += 1 if result
puts [hyp.split('|').map{|t| t.split(':')[0]}.join(' '),
ref_tag, hyp_tag, result].join("\t") if threshold==0.0
end
end
end
STDERR.puts ["Filename", "Result"].concat(thresholds).join("\t")
counter.each do |fname, c|
ca_all = c[:accept].delete(:all)
cr_all = c[:reject].delete(:all)
ca = thresholds.map{|t| c[:accept][t]}.map{|n| ca_all==0 ? "N/A" : '%4.1f' % (n.to_f/ca_all*100) }
cr = thresholds.map{|t| c[:reject][t]}.map{|n| cr_all==0 ? "N/A" : '%4.1f' % (n.to_f/cr_all*100) }
STDERR.puts [fname, "Correct Accept"].concat(ca).join("\t")
STDERR.puts [fname, "Correct Reject"].concat(cr).join("\t")
end
dialog.rb
# -*- coding: utf-8 -*-
#
# text : AA:0.88:320:800|BB:0.82:1040:1330|CC:0.77:1330:1700|DD:0.71:1700:2010|EE:1.00:2070:2390|FF:0.56:320:800|GG:0.12:1330:1700
#
def get_response_text text, threshold, dsr_session_id=nil
# ...
#p "result text >> " + text
# Promotion => detail => rate
# Promotion IR/IDD => high priority than enquire-promotion
# Rate IR/IDD => high priority than enquire-rate
# Problem IR/IDD => high priority than enquire-service_problem
# Internet IR/IDD => high priority than enquire-internet
# Cancel Net => enquire-internet NOT cancel-service
# Lost-Stolen => +Broken
memu = ""
intent = ""
prompt = ""
intent_th = ""
intent_id = ""
# strInput = text.gsub(/\s/,'')
strInput = text.split('|').map{|t| t.split(':')[0]}.join('')
puts ("****strINPUT*****")
puts strInput
scores = text.split('|').map{|t| t.split(':')[1].to_f}
puts ("****SCORE*****")
puts scores
avg_score = scores.inject(0){|a,x| a+=x} / scores.size
puts ("****AVG-Score*****")
puts avg_score
if avg_score < threshold
return :reject
end
# List of Country
country_fname = File.dirname(__FILE__)+"/country_list.txt"
country_list = open(country_fname, "r:UTF-8").readlines.map{|line| line.chomp}
contry_reg = Regexp.union(country_list)
# List of Mobile Type
mobile_fname = File.dirname(__FILE__)+"/mobile_list.txt"
mobile_list = open(mobile_fname, "r:UTF-8").readlines.map{|line| line.chomp}
mobile_reg = Regexp.union(mobile_list)
# List of Carrier
carrier_fname = File.dirname(__FILE__)+"/carrier_list.txt"
carrier_list = open(carrier_fname, "r:UTF-8").readlines.map{|line| line.chomp}
carrier_reg = Regexp.union(carrier_list)
if (strInput =~ /AA|BB/ and strInput =~ /CC/)
intent = "enquire-payment_method"
elsif (strInput =~ /EE/) and ("#{$'}" =~ /QQ|RR/)
intent = "enquire-balance_amount"
elsif (strInput =~ /AA|EE/i) and (strInput =~ /TT/i)
intent = "enquire-balance_unit"
elsif (strInput =~ /DD|BB|/i) and (strInput =~ /FF|AA/i)
intent = "service-balance_amount"
end
Parse as follows:
str = 'AA:0.88:320:800|BB:0.82:1040:1330|CC:0.77:1330:1700 enquire-privilege_card'
str.split( /[:|]/ ).select.with_index {| code, i | i % 4 < 2 ; }.join( ' ' )
# => "AA 0.88 BB 0.82 CC 0.77"

Resources