PyMQI bad consuming performance with persistence - ibm-mq

I'm testing the performance of IBM MQ (running the latest version in a local docker container)
I use a persistent queue.
On the producer side, I can get higher throughput by running multiple producing applications in parallel.
However, on the consumer side, I cannot increase the throughput by parallelizing consumer processes. On the contrary, the throughput is even worse for multiple consumers than for one single consumer.
What could be the reason for the poor consuming performance?
It shouldn't be due to the hardware limit as I'm comparing the consumption with the production and I did only message consumption without any other processing.
Does the GET perform the commit for each message? I don't find any explicit commit method in PyMQI though.
put_demo.py
#!/usr/bin/env python3
import pymqi
import time
queue_manager = 'QM1'
channel = 'DEV.APP.SVRCONN'
host = '127.0.0.1'
port = '1414'
queue_name = 'DEV.QUEUE.1'
message = b'Hello from Python!'
conn_info = '%s(%s)' % (host, port)
nb_messages = 1000
t0 = time.time()
qmgr = pymqi.connect(queue_manager, channel, conn_info)
queue = pymqi.Queue(qmgr, queue_name)
for i in range(nb_messages):
try:
queue.put(message)
except pymqi.MQMIError as e:
print(f"Fatal error: {str(e)}")
queue.close()
qmgr.disconnect()
t1 = time.time()
print(f"tps: {nb_messages/(t1-t0):.0f} nb_message_produced: {nb_messages}")
get_demo.py
#!/usr/bin/env python3
import pymqi
import time
import os
queue_manager = 'QM1'
channel = 'DEV.APP.SVRCONN'
host = '127.0.0.1'
port = '1414'
queue_name = 'DEV.QUEUE.1'
conn_info = '%s(%s)' % (host, port)
nb_messages = 1000
nb_messages_consumed = 0
t0 = time.time()
qmgr = pymqi.connect(queue_manager, channel, conn_info)
queue = pymqi.Queue(qmgr, queue_name)
gmo = pymqi.GMO(Options = pymqi.CMQC.MQGMO_WAIT | pymqi.CMQC.MQGMO_FAIL_IF_QUIESCING)
gmo.WaitInterval = 1000
while nb_messages_consumed < nb_messages:
try:
msg = queue.get(None, None, gmo)
nb_messages_consumed += 1
except pymqi.MQMIError as e:
if e.reason == 2033:
# No messages, that's OK, we can ignore it.
pass
queue.close()
qmgr.disconnect()
t1 = time.time()
print(f"tps: {nb_messages_consumed/(t1-t0):.0f} nb_messages_consumed: {nb_messages_consumed}")
run results
> for i in {1..10}; do ./put_demo.py & done
tps: 385 nb_message_produced: 1000
tps: 385 nb_message_produced: 1000
tps: 383 nb_message_produced: 1000
tps: 379 nb_message_produced: 1000
tps: 378 nb_message_produced: 1000
tps: 377 nb_message_produced: 1000
tps: 377 nb_message_produced: 1000
tps: 378 nb_message_produced: 1000
tps: 374 nb_message_produced: 1000
tps: 374 nb_message_produced: 1000
> for i in {1..10}; do ./get_demo.py & done
tps: 341 nb_messages_consumed: 1000
tps: 339 nb_messages_consumed: 1000
tps: 95 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
get_demo.py updated version using syncpoint and batch commit
#!/usr/bin/env python3
import pymqi
import time
import os
queue_manager = 'QM1'
channel = 'DEV.APP.SVRCONN'
host = '127.0.0.1'
port = '1414'
queue_name = 'DEV.QUEUE.1'
conn_info = '%s(%s)' % (host, port)
nb_messages = 1000
commit_batch = 10
nb_messages_consumed = 0
t0 = time.time()
qmgr = pymqi.connect(queue_manager, channel, conn_info)
queue = pymqi.Queue(qmgr, queue_name)
gmo = pymqi.GMO(Options = pymqi.CMQC.MQGMO_WAIT | pymqi.CMQC.MQGMO_FAIL_IF_QUIESCING | pymqi.CMQC.MQGMO_SYNCPOINT)
gmo.WaitInterval = 1000
while nb_messages_consumed < nb_messages:
try:
msg = queue.get(None, None, gmo)
nb_messages_consumed += 1
if nb_messages_consumed % commit_batch == 0:
qmgr.commit()
except pymqi.MQMIError as e:
if e.reason == 2033:
# No messages, that's OK, we can ignore it.
pass
queue.close()
qmgr.disconnect()
t1 = time.time()
print(f"tps: {nb_messages_consumed/(t1-t0):.0f} nb_messages_consumed: {nb_messages_consumed}")
Thanks.

Related

ffmpeg takes too long to start

I have this command in python script, in a loop:
ffmpeg -i somefile.mp4 -ss 00:03:12 -t 00:00:35 piece.mp4 -loglevel error -stats
It cuts out pieces of input file (-i). Input filename, as well as start time (-ss) and length of the piece I cut out (-t) varies, so it reads number of mp4 files and cuts out number of pieces from each one. During execution of the script it might be called around 100 times. My problem is that each time before it starts, there is a delay of 6-15 seconds and it adds up to significant time. How can I get it to start immediately?
Initially I thought it was process priority problem, but I noticed that even during the "pause", all processors work at 100%, so apparently some work is being done.
The script (process_videos.py):
import subprocess
import sys
import math
import time
class TF:
"""TimeFormatter class (TF).
This class' reason for being is to convert time in short
form, e.g. 1:33, 0:32, or 23 into long form accepted by
mp4cut function in bash, e.g. 00:01:22, 00:00:32, etc"""
def toLong(self, shrt):
"""Converts time to its long form"""
sx = '00:00:00'
ladd = 8 - len(shrt)
n = sx[:ladd] + shrt
return n
def toShort(self, lng):
"""Converts time to short form"""
if lng[0] == '0' or lng[0] == ':':
return self.toShort(lng[1:])
else:
return lng
def toSeconds(self, any_time):
"""Converts time to seconds"""
if len(any_time) < 3:
return int(any_time)
tt = any_time.split(':')
if len(any_time) < 6:
return int(tt[0])*60 + int(tt[1])
return int(tt[0])*3600 + int(tt[1])*60 + int(tt[2])
def toTime(self, secsInt):
""""""
tStr = ''
hrs, mins, secs = 0, 0, 0
if secsInt >= 3600:
hrs = math.floor(secsInt / 3600)
secsInt = secsInt % 3600
if secsInt >= 60:
mins = math.floor(secsInt / 60)
secsInt = secsInt % 60
secs = secsInt
return str(hrs).zfill(2) + ':' + str(mins).zfill(2) + ':' + str(secs).zfill(2)
def minus(self, t_start, t_end):
""""""
t_e = self.toSeconds(t_end)
t_s = self.toSeconds(t_start)
t_r = t_e - t_s
hrs, mins, secs = 0, 0, 0
if t_r >= 3600:
hrs = math.floor(t_r / 3600)
t_r = t_r - (hrs * 3600)
if t_r >= 60:
mins = math.floor(t_r / 60)
t_r = t_r - (mins * 60)
secs = t_r
hrsf = str(hrs).zfill(2)
minsf = str(mins).zfill(2)
secsf = str(secs).zfill(2)
t_fnl = hrsf + ':' + minsf + ':' + secsf
return t_fnl
def go_main():
tf = TF()
vid_n = 0
arglen = len(sys.argv)
if arglen == 2:
with open(sys.argv[1], 'r') as f_in:
lines = f_in.readlines()
start = None
end = None
cnt = 0
for line in lines:
if line[:5] == 'BEGIN':
start = cnt
if line[:3] == 'END':
end = cnt
cnt += 1
if start == None or end == None:
print('Invalid file format. start = {}, end = {}'.format(start,end))
return
else:
lines_r = lines[start+1:end]
del lines
print('videos to process: {}'.format(len(lines_r)))
f_out_prefix = ""
for vid in lines_r:
vid_n += 1
print('\nProcessing video {}/{}'.format(vid_n, len(lines_r)))
f_out_prefix = 'v' + str(vid_n) + '-'
dat = vid.split('!')[1:3]
title = dat[0]
dat_t = dat[1].split(',')
v_pieces = len(dat_t)
piece_n = 0
video_pieces = []
cmd1 = "echo -n \"\" > tmpfile"
subprocess.run(cmd1, shell=True)
print(' new tmpfile created')
for v_times in dat_t:
piece_n += 1
f_out = f_out_prefix + str(piece_n) + '.mp4'
video_pieces.append(f_out)
print(' piece filename {} added to video_pieces list'.format(f_out))
v_times_spl = v_times.split('-')
v_times_start = v_times_spl[0]
v_times_end = v_times_spl[1]
t_st = tf.toLong(v_times_start)
t_dur = tf.toTime(tf.toSeconds(v_times_end) - tf.toSeconds(v_times_start))
cmd3 = ["ffmpeg", "-i", title, "-ss", t_st, "-t", t_dur, f_out, "-loglevel", "error", "-stats"]
print(' cutting out piece {}/{} - {}'.format(piece_n, len(dat_t), t_dur))
subprocess.run(cmd3)
for video_piece_name in video_pieces:
cmd4 = "echo \"file " + video_piece_name + "\" >> tmpfile"
subprocess.run(cmd4, shell=True)
print(' filename {} added to tmpfile'.format(video_piece_name))
vname = f_out_prefix[:-1] + ".mp4"
print(' name of joined file: {}'.format(vname))
cmd5 = "ffmpeg -f concat -safe 0 -i tmpfile -c copy joined.mp4 -loglevel error -stats"
to_be_joined = " ".join(video_pieces)
print(' joining...')
join_cmd = subprocess.Popen(cmd5, shell=True)
join_cmd.wait()
print(' joined!')
cmd6 = "mv joined.mp4 " + vname
rename_cmd = subprocess.Popen(cmd6, shell=True)
rename_cmd.wait()
print(' File joined.mp4 renamed to {}'.format(vname))
cmd7 = "rm " + to_be_joined
rm_cmd = subprocess.Popen(cmd7, shell=True)
rm_cmd.wait()
print('rm command completed - pieces removed')
cmd8 = "rm tmpfile"
subprocess.run(cmd8, shell=True)
print('tmpfile removed')
print('All done')
else:
print('Incorrect number of arguments')
############################
if __name__ == '__main__':
go_main()
process_videos.py is called from bash terminal like this:
$ python process_videos.py video_data
video_data file has the following format:
BEGIN
!first_video.mp4!3-23,55-1:34,2:01-3:15,3:34-3:44!
!second_video.mp4!2-7,12-44,1:03-1:33!
END
My system details:
System: Host: snowflake Kernel: 5.4.0-52-generic x86_64 bits: 64 Desktop: Gnome 3.28.4
Distro: Ubuntu 18.04.5 LTS
Machine: Device: desktop System: Gigabyte product: N/A serial: N/A
Mobo: Gigabyte model: Z77-D3H v: x.x serial: N/A BIOS: American Megatrends v: F14 date: 05/31/2012
CPU: Quad core Intel Core i5-3570 (-MCP-) cache: 6144 KB
clock speeds: max: 3800 MHz 1: 1601 MHz 2: 1601 MHz 3: 1601 MHz 4: 1602 MHz
Drives: HDD Total Size: 1060.2GB (55.2% used)
ID-1: /dev/sda model: ST31000524AS size: 1000.2GB
ID-2: /dev/sdb model: Corsair_Force_GT size: 60.0GB
Partition: ID-1: / size: 366G used: 282G (82%) fs: ext4 dev: /dev/sda1
ID-2: swap-1 size: 0.70GB used: 0.00GB (0%) fs: swap dev: /dev/sda5
Info: Processes: 313 Uptime: 16:37 Memory: 3421.4/15906.9MB Client: Shell (bash) inxi: 2.3.56
UPDATE:
Following Charles' advice, I used performance sampling:
# perf record -a -g sleep 180
...and here's the report:
Samples: 74K of event 'cycles', Event count (approx.): 1043554519767
Children Self Command Shared Object
- 50.56% 45.86% ffmpeg libavcodec.so.57.107.100
- 3.10% 0x4489480000002825
0.64% 0x7ffaf24b92f0
- 2.12% 0x5f7369007265646f
av_default_item_name
1.39% 0
- 44.48% 40.59% ffmpeg libx264.so.152
5.78% x264_add8x8_idct_avx2.skip_prologue
3.13% x264_add8x8_idct_avx2.skip_prologue
2.91% x264_add8x8_idct_avx2.skip_prologue
2.31% x264_add8x8_idct_avx.skip_prologue
2.03% 0
1.78% 0x1
1.26% x264_add8x8_idct_avx2.skip_prologue
1.09% x264_add8x8_idct_avx.skip_prologue
1.06% x264_me_search_ref
0.97% x264_add8x8_idct_avx.skip_prologue
0.60% x264_me_search_ref
- 38.01% 0.00% ffmpeg [unknown]
4.10% 0
- 3.49% 0x4489480000002825
0.70% 0x7ffaf24b92f0
0.56% 0x7f273ae822f0
0.50% 0x7f0c4768b2f0
- 2.29% 0x5f7369007265646f
av_default_item_name
1.99% 0x1
10.13% 10.12% ffmpeg [kernel.kallsyms]
- 3.14% 0.73% ffmpeg libavutil.so.55.78.100
2.34% av_default_item_name
- 1.73% 0.21% ffmpeg libpthread-2.27.so
- 0.70% pthread_cond_wait##GLIBC_2.3.2
- 0.62% entry_SYSCALL_64_after_hwframe
- 0.62% do_syscall_64
- 0.57% __x64_sys_futex
0.52% do_futex
0.93% 0.89% ffmpeg libc-2.27.so
- 0.64% 0.64% swapper [kernel.kallsyms]
0.63% secondary_startup_64
0.21% 0.18% ffmpeg libavfilter.so.6.107.100
0.20% 0.11% ffmpeg libavformat.so.57.83.100
0.12% 0.11% ffmpeg ffmpeg
0.11% 0.00% gnome-terminal- [unknown]
0.09% 0.07% ffmpeg libm-2.27.so
0.08% 0.07% ffmpeg ld-2.27.so
0.04% 0.04% gnome-terminal- libglib-2.0.so.0.5600.4
When you put -ss afer -i, mplayer will not use the keyframes to jump into the frame. ffmpeg will decode the video from the beginning of the video. That's where the 6-15 second delay with 100% CPU usage came from.
You can put -ss before the -i e.g:
ffmpeg -ss 00:03:12 -i somefile.mp4 -t 00:00:35 piece.mp4 -loglevel error -stats
This will make ffmpeg use the keyframes and directly jumps to the starting time.

Scapy Ethernet packet from byte string loses information of top layers

I am trying to build scapy Ether packet from raw string.
packet = packets[4] # this is the packet I get from pcap file
str_packet = str(packet) # I get string form from here
packet2 = Ether(str_packet) # I try to make packet2 from the string
packet.show()
packet2.show()
Outputs here shows that Ether packet2 is not formed well.
Any idea, How can I get packet2 from raw string ?
###[ Ethernet ]###
dst = 80:e6:50:14:3d:52
src = 2a:74:02:9b:85:64
type = IPv4
###[ IP ]###
version = 4
ihl = 5
tos = 0x0
len = 52
id = 44178
flags =
frag = 0
ttl = 88
proto = tcp
chksum = 0x5503
src = 157.240.13.35
dst = 172.20.10.7
\options \
###[ TCP ]###
sport = https
dport = 60643
seq = 905248884
ack = 938762494
dataofs = 8
reserved = 0
flags = A
window = 113
chksum = 0x43e9
urgptr = 0
options = [('NOP', None), ('NOP', None), ('Timestamp', (2596765797, 886096700))]
###[ Raw ]###
load = "b'\\x80\\xe6P\\x14=R*t\\x02\\x9b\\x85d\\x08\\x00E\\x00\\x004\\xac\\x92\\x00\\x00X\\x06U\\x03\\x9d\\xf0\\r#\\xac\\x14\\n\\x07\\x01\\xbb\\xec\\xe35\\xf5\\x00t7\\xf4`\\xfe\\x80\\x10\\x00qC\\xe9\\x00\\x00\\x01\\x01\\x08\\n\\x9a\\xc7\\x80e4\\xd0\\xc3<'"
At line 2, use raw instead of str. I extracted some frame from a pcap file as you did and here is what I get:
>>> str(p)
WARNING: Calling str(pkt) on Python 3 makes no sense!
"b'\\xf4\\xca\\xe5Cu\\x10\\x00!\\xcc\\xd3px\\x08\\x00E\\x00\\x004\\xbe\\x05#\\x00#\\x06\\xfb\\xc4\\xc0\\xa8\\x01\\x0f6\\xbf\\x88\\x83\\x96N\\x01\\xbb5M\\x94r\\xd0\\xc7\\xd9\\xae\\x80\\x10\\x01\\xf5\\x81 \\x00\\x00\\x01\\x01\\x08\\np\\xaf\\xdf\\xc4a\\xcf=\\x84'"
>>> raw(p)
b'\xf4\xca\xe5Cu\x10\x00!\xcc\xd3px\x08\x00E\x00\x004\xbe\x05#\x00#\x06\xfb\xc4\xc0\xa8\x01\x0f6\xbf\x88\x83\x96N\x01\xbb5M\x94r\xd0\xc7\xd9\xae\x80\x10\x01\xf5\x81 \x00\x00\x01\x01\x08\np\xaf\xdf\xc4a\xcf=\x84'
>>> Ether(raw(p)).show()
###[ Ethernet ]###
dst= f4:ca:e5:43:75:10
src= 00:21:cc:d3:70:78
type= IPv4
###[ IP ]###
version= 4
ihl= 5
tos= 0x0
len= 52
id= 48645
flags= DF
frag= 0
ttl= 64
proto= tcp
chksum= 0xfbc4
src= 192.168.1.15
dst= 54.191.136.131
\options\
###[ TCP ]###
sport= 38478
dport= https
seq= 894276722
ack= 3502758318
dataofs= 8
reserved= 0
flags= A
window= 501
chksum= 0x8120
urgptr= 0
options= [('NOP', None), ('NOP', None), ('Timestamp', (1890574276, 1640971652))]

XGboost keeps failing in Latest H2O stable release

I downloaded the latest release of H2O (3.18.0.1) and XGboost keeps failing. I am not sure whether to post to the JIRA issues or here.
h2o.init()
from h2o.estimators import H2OXGBoostEstimator
is_xgboost_available = H2OXGBoostEstimator.available()
train_path = 'https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/higgs_train_imbalance_100k.csv'
test_path = 'https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/higgs_test_imbalance_100k.csv'
df_train = h2o.import_file(train_path)
df_test = h2o.import_file(test_path)
# Transform first feature into categorical feature
df_train[0] = df_train[0].asfactor()
df_test[0] = df_test[0].asfactor()
param = {
"ntrees" : 500
}
model = H2OXGBoostEstimator(**param)
model.train(x = list(range(1, df_train.shape[1])), y = 0, training_frame = df_train)
I can run random forest, GBM without an issue but xgboost keeps failing.
I am running on Ubuntu 16.04. Java Version: java version "1.8.0_161"; Java(TM) SE Runtime Environment (build 1.8.0_161-b12); Java HotSpot(TM) 64-Bit Server VM (build 25.161-b12, mixed mode). Anaconda Python 3.6
I reinstalled Anaconda and reinstalled JRE, but am still having the same issue.
It keeps giving me the following error:
xgboost Model Build progress: |████████████████████████████████████████
---------------------------------------------------------------------------
ConnectionResetError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
600 body=body, headers=headers,
--> 601 chunked=chunked)
602
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
386 # otherwise it looks like a programming error was the cause.
--> 387 six.raise_from(e, None)
388 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
382 try:
--> 383 httplib_response = conn.getresponse()
384 except Exception as e:
~/anaconda3/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
~/anaconda3/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
~/anaconda3/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
~/anaconda3/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
439 retries=self.max_retries,
--> 440 timeout=timeout
441 )
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
638 retries = retries.increment(method, url, error=e, _pool=self,
--> 639 _stacktrace=sys.exc_info()[2])
640 retries.sleep()
~/anaconda3/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
356 if read is False or not self._is_method_retryable(method):
--> 357 raise six.reraise(type(error), error, _stacktrace)
358 elif read is not None:
~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
600 body=body, headers=headers,
--> 601 chunked=chunked)
602
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
386 # otherwise it looks like a programming error was the cause.
--> 387 six.raise_from(e, None)
388 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
382 try:
--> 383 httplib_response = conn.getresponse()
384 except Exception as e:
~/anaconda3/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
~/anaconda3/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
~/anaconda3/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
~/anaconda3/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/h2o/backend/connection.py in request(self, endpoint, data, json, filename, save_to)
399 headers=headers, timeout=self._timeout, stream=stream,
--> 400 auth=self._auth, verify=self._verify_ssl_cert, proxies=self._proxies)
401 self._log_end_transaction(start_time, resp)
~/anaconda3/lib/python3.6/site-packages/requests/api.py in request(method, url, **kwargs)
57 with sessions.Session() as session:
---> 58 return session.request(method=method, url=url, **kwargs)
59
~/anaconda3/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
507 send_kwargs.update(settings)
--> 508 resp = self.send(prep, **send_kwargs)
509
~/anaconda3/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
617 # Send the request
--> 618 r = adapter.send(request, **kwargs)
619
~/anaconda3/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
489 except (ProtocolError, socket.error) as err:
--> 490 raise ConnectionError(err, request=request)
491
ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
H2OConnectionError Traceback (most recent call last)
<ipython-input-22-37b26d4dfbfd> in <module>()
1 start = time.time()
----> 2 model.train(x = list(range(1, df_train.shape[1])), y = 0, training_frame = df_train)
3 end = time.time()
4 print(end - start)
~/anaconda3/lib/python3.6/site-packages/h2o/estimators/estimator_base.py in train(self, x, y, training_frame, offset_column, fold_column, weights_column, validation_frame, max_runtime_secs, ignored_columns, model_id, verbose)
229 return
230
--> 231 model.poll(verbose_model_scoring_history=verbose)
232 model_json = h2o.api("GET /%d/Models/%s" % (rest_ver, model.dest_key))["models"][0]
233 self._resolve_model(model.dest_key, model_json)
~/anaconda3/lib/python3.6/site-packages/h2o/job.py in poll(self, verbose_model_scoring_history)
56 pb.execute(self._refresh_job_status, print_verbose_info=lambda x: self._print_verbose_info() if int(x * 10) % 5 == 0 else " ")
57 else:
---> 58 pb.execute(self._refresh_job_status)
59 except StopIteration as e:
60 if str(e) == "cancelled":
~/anaconda3/lib/python3.6/site-packages/h2o/utils/progressbar.py in execute(self, progress_fn, print_verbose_info)
167 # Query the progress level, but only if it's time already
168 if self._next_poll_time <= now:
--> 169 res = progress_fn() # may raise StopIteration
170 assert_is_type(res, (numeric, numeric), numeric)
171 if not isinstance(res, tuple):
~/anaconda3/lib/python3.6/site-packages/h2o/job.py in _refresh_job_status(self)
91 def _refresh_job_status(self):
92 if self._poll_count <= 0: raise StopIteration("")
---> 93 jobs = h2o.api("GET /3/Jobs/%s" % self.job_key)
94 self.job = jobs["jobs"][0] if "jobs" in jobs else jobs["job"][0]
95 self.status = self.job["status"]
~/anaconda3/lib/python3.6/site-packages/h2o/h2o.py in api(endpoint, data, json, filename, save_to)
101 # type checks are performed in H2OConnection class
102 _check_connection()
--> 103 return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
104
105
~/anaconda3/lib/python3.6/site-packages/h2o/backend/connection.py in request(self, endpoint, data, json, filename, save_to)
408 else:
409 self._log_end_exception(e)
--> 410 raise H2OConnectionError("Unexpected HTTP error: %s" % e)
411 except requests.exceptions.Timeout as e:
412 self._log_end_exception(e)
H2OConnectionError: Unexpected HTTP error: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))

PyQt Copy folder with progress and transfer rate/speed

I'd like to expand my current code with the ability to show the transfer rate/speed of the files being copied. Im working on Windows 10 with py 3.6 and Qt 5.8. Here is my code:
import os
import shutil
from PyQt5.QtWidgets import QApplication, QWidget, QVBoxLayout, QLabel, QProgressBar, QFileDialog
class FileCopyProgress(QWidget):
def __init__(self, parent=None, src=None, dest=None):
super(FileCopyProgress, self).__init__()
self.src = src
self.dest = dest
self.build_ui()
def build_ui(self):
hbox = QVBoxLayout()
lbl_src = QLabel('Source: ' + self.src)
lbl_dest = QLabel('Destination: ' + self.dest)
self.pb = QProgressBar()
self.pb.setMinimum(0)
self.pb.setMaximum(100)
self.pb.setValue(0)
hbox.addWidget(lbl_src)
hbox.addWidget(lbl_dest)
hbox.addWidget(self.pb)
self.setLayout(hbox)
self.setWindowTitle('File copy')
self.auto_start_timer = QTimer()
self.auto_start_timer.singleShot(2000, lambda: self.copyFilesWithProgress(self.src, self.dest, self.progress, self.copydone))
self.show()
def progress(self, done, total):
progress = int(round((done/float(total))*100))
try:
self.pb.setValue(progress)
except:
pass
app.processEvents()
def copydone(self):
self.pb.setValue(100)
self.close()
def countfiles(self, _dir):
files = []
if os.path.isdir(_dir):
for path, dirs, filenames in os.walk(_dir):
files.extend(filenames)
return len(files)
def makedirs(self, dest):
if not os.path.exists(dest):
os.makedirs(dest)
#pyqtSlot()
def copyFilesWithProgress(self, src, dest, callback_progress, callback_copydone):
numFiles = self.countfiles(src)
if numFiles > 0:
dest = os.path.join(dest, src.replace(BASE_DIR, '').replace('\\', ''))
print(''.join(['Destination: ', dest]))
self.makedirs(dest)
numCopied = 0
for path, dirs, filenames in os.walk(src):
for directory in dirs:
destDir = path.replace(src,dest)
self.makedirs(os.path.join(destDir, directory))
for sfile in filenames:
srcFile = os.path.join(path, sfile)
destFile = os.path.join(path.replace(src, dest), sfile)
shutil.copy(srcFile, destFile)
numCopied += 1
callback_progress(numCopied, numFiles)
callback_copydone()
BASE_DIR = 'C:\\dev'
app = QApplication([])
FileCopyProgress(src="C:\dev\pywin32-221", dest='C:\dev\copied')
# Run the app
app.exec_()
This code opens a gui with a progressbar showing progress while copying files. A simple label with the current transfer rate/speed (approximate) would be rlly nice :)
Unfortuanetly i can't find any examples, can someone give me a hint or maybe a working example please?
EDIT:
I did a remake and now I have transfer rate, time elapsed and time remaining. The data seems to be realistic. I have only one problem: Lets assume i have a folder/file that time remaining is 7 sec -> currently it starts with 7 sec and gets an update every 1 second. We expect that in the next step it would display 6 sec but instead it goes:
5 sec
3 sec
1.5 sec
1.4 sec
1.3 sec
1.2 sec
1.1 sec and so on
Where is the mistake?
class FileCopyProgress(QWidget):
def __init__(self, parent=None, src=None, dest=None):
super(FileCopyProgress, self).__init__()
self.src = src
self.dest = dest
self.rate = "0"
self.total_time = "0 s"
self.time_elapsed = "0 s"
self.time_remaining = "0 s"
self.build_ui()
def build_ui(self):
hbox = QVBoxLayout()
lbl_src = QLabel('Source: ' + self.src)
lbl_dest = QLabel('Destination: ' + self.dest)
self.pb = QProgressBar()
self.lbl_rate = QLabel('Transfer rate: ' + self.rate)
self.lbl_time_elapsed = QLabel('Time Elapsed: ' + self.time_elapsed)
self.lbl_time_remaining = QLabel('Time Remaining: ' + self.time_remaining)
self.pb.setMinimum(0)
self.pb.setMaximum(100)
self.pb.setValue(0)
hbox.addWidget(lbl_src)
hbox.addWidget(lbl_dest)
hbox.addWidget(self.pb)
hbox.addWidget(self.lbl_rate)
hbox.addWidget(self.lbl_time_elapsed)
hbox.addWidget(self.lbl_time_remaining)
self.setLayout(hbox)
self.setWindowTitle('File copy')
self.auto_start_timer = QTimer()
self.auto_start_timer.singleShot(100, lambda: self.copy_files_with_progress(self.src, self.dest, self.progress, self.copy_done))
self.copy_timer = QTimer()
self.copy_timer.timeout.connect(lambda: self.process_informations())
self.copy_timer.start(1000)
self.show()
#pyqtSlot()
def process_informations(self):
time_elapsed_raw = time.clock() - self.start_time
self.time_elapsed = '{:.2f} s'.format(time_elapsed_raw)
self.lbl_time_elapsed.setText('Time Elapsed: ' + self.time_elapsed)
# example - Total: 100 Bytes, bisher kopiert 12 Bytes/s
time_remaining_raw = self._totalSize/self._copied
self.time_remaining = '{:.2f} s'.format(time_remaining_raw) if time_remaining_raw < 60. else '{:.2f} min'.format(time_remaining_raw)
self.lbl_time_remaining.setText('Time Remaining: ' + self.time_remaining)
rate_raw = (self._copied - self._copied_tmp)/1024/1024
self.rate = '{:.2f} MB/s'.format(rate_raw)
self.lbl_rate.setText('Transfer rate: ' + self.rate)
self._copied_tmp = self._copied
def progress(self):
self._progress = (self._copied/self._totalSize)*100
try:
self.pb.setValue(self._progress)
except:
pass
app.processEvents()
def get_total_size(self, src):
return sum( os.path.getsize(os.path.join(dirpath,filename)) for dirpath, dirnames, filenames in os.walk(src) for filename in filenames ) # total size of files in bytes
def copy_done(self):
self.pb.setValue(100)
print("done")
self.close()
def make_dirs(self, dest):
if not os.path.exists(dest):
os.makedirs(dest)
#pyqtSlot()
def copy_files_with_progress(self, src, dst, callback_progress, callback_copydone, length=16*1024*1024):
self._copied = 0
self._copied_tmp = 0
self._totalSize = self.get_total_size(src)
print(''.join(['Pre Dst: ', dst]))
dst = os.path.join(dst, src.replace(BASE_DIR, '').replace('\\', ''))
print(''.join(['Src: ', src]))
print(''.join(['Dst: ', dst]))
self.make_dirs(dst)
self.start_time = time.clock()
for path, dirs, filenames in os.walk(src):
for directory in dirs:
destDir = path.replace(src, dst)
self.make_dirs(os.path.join(destDir, directory))
for sfile in filenames:
srcFile = os.path.join(path, sfile)
destFile = os.path.join(dst, sfile)
# destFile = os.path.join(path.replace(src, dst), sfile)
with open(srcFile, 'rb') as fsrc:
with open(destFile, 'wb') as fdst:
while 1:
buf = fsrc.read(length)
if not buf:
break
fdst.write(buf)
self._copied += len(buf)
callback_progress()
try:
self.copy_timer.stop()
except:
print('Error: could not stop QTimer')
callback_copydone()
That's all about logic, think in the following way:
You have a TOTAL size of all files, let's say you have 100 dashes: [----- ... -----]
Now you get the starting time when you started transferring your files.
Choose some interval, let's have for example 2 secs.
So, after 2 secs see how much of the total you already transferred, in other words how much of files you already have in the new dir. Let's say you transferred 26 dashes.
The calc would be, 26 dashes/2secs = 13dashes per seconds.
Going deeper we have 26% of the content downloaded in 2 seconds since the total is 100 or 13% per second.
Even further we can calc also the prediction of time.
Total Time = 100%/13% = 7.6 secs
Time Elapsed = 2secs
Time Remaining = 7.6 - 2 = 5.6 secs
I think you got the idea...
Note: You just gotta keep verifying and remaking all these calcs in each 2 seconds, if you choose 2 secs of course, and updating the information to the user. If you wanna be even more precise or show more frequently updated information to the user just reduce that interval to let's say 0.5 secs and make the calc on top of milliseconds. It's up to you how often you update the information, that's just an overview of how to do the math. ")

Jupyter notebook Conda error running MoviePy code

On my macOS v 10.11.6, I got an error running moviepy on jupyter notebook
Python v 3.5.2
Conda v 4.3.8
Jupyter 4.2.1
I'm importing and running a simple cell:
from moviepy.editor import VideoFileClip
from IPython.display import HTML
new_clip_output = 'test_output.mp4'
test_clip = VideoFileClip("test.mp4")
new_clip = test_clip.fl_image(lambda x: cv2.cvtColor(x, cv2.COLOR_RGB2YUV)) #NOTE: this function expects color images!!
%time new_clip.write_videofile(new_clip_output, audio=False)
The error is:
TypeError Traceback (most recent call last)
<ipython-input-8-27aee53c99d8> in <module>()
1 new_clip_output = 'test_output.mp4'
--> 2 test_clip = VideoFileClip("test.mp4")
3 new_clip = test_clip.fl_image(lambda x: cv2.cvtColor(x, cv2.COLOR_RGB2YUV)) #NOTE: this function expects color images!!
4 get_ipython().magic('time new_clip.write_videofile(new_clip_output, audio=False)')
/Users/<username>/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/moviepy/video/io/VideoFileClip.py in __init__(self, filename, has_mask, audio, audio_buffersize, audio_fps, audio_nbytes, verbose)
80 buffersize= audio_buffersize,
81 fps = audio_fps,
--> 82 nbytes = audio_nbytes)
83
84 def __del__(self):
/Users/<username>/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/moviepy/audio/io/AudioFileClip.py in __init__(self, filename, buffersize, nbytes, fps)
61 self.filename = filename
62 reader = FFMPEG_AudioReader(filename,fps=fps,nbytes=nbytes,
--> 63 buffersize=buffersize)
64
65 self.reader = reader
/Users/<username>/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/moviepy/audio/io/readers.py in __init__(self, filename, buffersize, print_infos, fps, nbytes, nchannels)
68 self.buffer_startframe = 1
69 self.initialize()
--> 70 self.buffer_around(1)
71
72
/Users/<username>/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/moviepy/audio/io/readers.py in buffer_around(self, framenumber)
232 else:
233 self.seek(new_bufferstart)
--> 234 self.buffer = self.read_chunk(self.buffersize)
235
236 self.buffer_startframe = new_bufferstart
/Users/<username>/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/moviepy/audio/io/readers.py in read_chunk(self, chunksize)
121 result = (1.0*result / 2**(8*self.nbytes-1)).\
122 reshape((len(result)/self.nchannels,
--> 123 self.nchannels))
124 #self.proc.stdout.flush()
125 self.pos = self.pos+chunksize
TypeError: 'float' object cannot be interpreted as an integer
Is it because of some conflict in versions of various libraries?

Resources