I'm testing the performance of IBM MQ (running the latest version in a local docker container)
I use a persistent queue.
On the producer side, I can get higher throughput by running multiple producing applications in parallel.
However, on the consumer side, I cannot increase the throughput by parallelizing consumer processes. On the contrary, the throughput is even worse for multiple consumers than for one single consumer.
What could be the reason for the poor consuming performance?
It shouldn't be due to the hardware limit as I'm comparing the consumption with the production and I did only message consumption without any other processing.
Does the GET perform the commit for each message? I don't find any explicit commit method in PyMQI though.
put_demo.py
#!/usr/bin/env python3
import pymqi
import time
queue_manager = 'QM1'
channel = 'DEV.APP.SVRCONN'
host = '127.0.0.1'
port = '1414'
queue_name = 'DEV.QUEUE.1'
message = b'Hello from Python!'
conn_info = '%s(%s)' % (host, port)
nb_messages = 1000
t0 = time.time()
qmgr = pymqi.connect(queue_manager, channel, conn_info)
queue = pymqi.Queue(qmgr, queue_name)
for i in range(nb_messages):
try:
queue.put(message)
except pymqi.MQMIError as e:
print(f"Fatal error: {str(e)}")
queue.close()
qmgr.disconnect()
t1 = time.time()
print(f"tps: {nb_messages/(t1-t0):.0f} nb_message_produced: {nb_messages}")
get_demo.py
#!/usr/bin/env python3
import pymqi
import time
import os
queue_manager = 'QM1'
channel = 'DEV.APP.SVRCONN'
host = '127.0.0.1'
port = '1414'
queue_name = 'DEV.QUEUE.1'
conn_info = '%s(%s)' % (host, port)
nb_messages = 1000
nb_messages_consumed = 0
t0 = time.time()
qmgr = pymqi.connect(queue_manager, channel, conn_info)
queue = pymqi.Queue(qmgr, queue_name)
gmo = pymqi.GMO(Options = pymqi.CMQC.MQGMO_WAIT | pymqi.CMQC.MQGMO_FAIL_IF_QUIESCING)
gmo.WaitInterval = 1000
while nb_messages_consumed < nb_messages:
try:
msg = queue.get(None, None, gmo)
nb_messages_consumed += 1
except pymqi.MQMIError as e:
if e.reason == 2033:
# No messages, that's OK, we can ignore it.
pass
queue.close()
qmgr.disconnect()
t1 = time.time()
print(f"tps: {nb_messages_consumed/(t1-t0):.0f} nb_messages_consumed: {nb_messages_consumed}")
run results
> for i in {1..10}; do ./put_demo.py & done
tps: 385 nb_message_produced: 1000
tps: 385 nb_message_produced: 1000
tps: 383 nb_message_produced: 1000
tps: 379 nb_message_produced: 1000
tps: 378 nb_message_produced: 1000
tps: 377 nb_message_produced: 1000
tps: 377 nb_message_produced: 1000
tps: 378 nb_message_produced: 1000
tps: 374 nb_message_produced: 1000
tps: 374 nb_message_produced: 1000
> for i in {1..10}; do ./get_demo.py & done
tps: 341 nb_messages_consumed: 1000
tps: 339 nb_messages_consumed: 1000
tps: 95 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
tps: 82 nb_messages_consumed: 1000
get_demo.py updated version using syncpoint and batch commit
#!/usr/bin/env python3
import pymqi
import time
import os
queue_manager = 'QM1'
channel = 'DEV.APP.SVRCONN'
host = '127.0.0.1'
port = '1414'
queue_name = 'DEV.QUEUE.1'
conn_info = '%s(%s)' % (host, port)
nb_messages = 1000
commit_batch = 10
nb_messages_consumed = 0
t0 = time.time()
qmgr = pymqi.connect(queue_manager, channel, conn_info)
queue = pymqi.Queue(qmgr, queue_name)
gmo = pymqi.GMO(Options = pymqi.CMQC.MQGMO_WAIT | pymqi.CMQC.MQGMO_FAIL_IF_QUIESCING | pymqi.CMQC.MQGMO_SYNCPOINT)
gmo.WaitInterval = 1000
while nb_messages_consumed < nb_messages:
try:
msg = queue.get(None, None, gmo)
nb_messages_consumed += 1
if nb_messages_consumed % commit_batch == 0:
qmgr.commit()
except pymqi.MQMIError as e:
if e.reason == 2033:
# No messages, that's OK, we can ignore it.
pass
queue.close()
qmgr.disconnect()
t1 = time.time()
print(f"tps: {nb_messages_consumed/(t1-t0):.0f} nb_messages_consumed: {nb_messages_consumed}")
Thanks.
I downloaded the latest release of H2O (3.18.0.1) and XGboost keeps failing. I am not sure whether to post to the JIRA issues or here.
h2o.init()
from h2o.estimators import H2OXGBoostEstimator
is_xgboost_available = H2OXGBoostEstimator.available()
train_path = 'https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/higgs_train_imbalance_100k.csv'
test_path = 'https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/higgs_test_imbalance_100k.csv'
df_train = h2o.import_file(train_path)
df_test = h2o.import_file(test_path)
# Transform first feature into categorical feature
df_train[0] = df_train[0].asfactor()
df_test[0] = df_test[0].asfactor()
param = {
"ntrees" : 500
}
model = H2OXGBoostEstimator(**param)
model.train(x = list(range(1, df_train.shape[1])), y = 0, training_frame = df_train)
I can run random forest, GBM without an issue but xgboost keeps failing.
I am running on Ubuntu 16.04. Java Version: java version "1.8.0_161"; Java(TM) SE Runtime Environment (build 1.8.0_161-b12); Java HotSpot(TM) 64-Bit Server VM (build 25.161-b12, mixed mode). Anaconda Python 3.6
I reinstalled Anaconda and reinstalled JRE, but am still having the same issue.
It keeps giving me the following error:
xgboost Model Build progress: |████████████████████████████████████████
---------------------------------------------------------------------------
ConnectionResetError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
600 body=body, headers=headers,
--> 601 chunked=chunked)
602
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
386 # otherwise it looks like a programming error was the cause.
--> 387 six.raise_from(e, None)
388 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
382 try:
--> 383 httplib_response = conn.getresponse()
384 except Exception as e:
~/anaconda3/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
~/anaconda3/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
~/anaconda3/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
~/anaconda3/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
439 retries=self.max_retries,
--> 440 timeout=timeout
441 )
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
638 retries = retries.increment(method, url, error=e, _pool=self,
--> 639 _stacktrace=sys.exc_info()[2])
640 retries.sleep()
~/anaconda3/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
356 if read is False or not self._is_method_retryable(method):
--> 357 raise six.reraise(type(error), error, _stacktrace)
358 elif read is not None:
~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
600 body=body, headers=headers,
--> 601 chunked=chunked)
602
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
386 # otherwise it looks like a programming error was the cause.
--> 387 six.raise_from(e, None)
388 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
382 try:
--> 383 httplib_response = conn.getresponse()
384 except Exception as e:
~/anaconda3/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
~/anaconda3/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
~/anaconda3/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
~/anaconda3/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/h2o/backend/connection.py in request(self, endpoint, data, json, filename, save_to)
399 headers=headers, timeout=self._timeout, stream=stream,
--> 400 auth=self._auth, verify=self._verify_ssl_cert, proxies=self._proxies)
401 self._log_end_transaction(start_time, resp)
~/anaconda3/lib/python3.6/site-packages/requests/api.py in request(method, url, **kwargs)
57 with sessions.Session() as session:
---> 58 return session.request(method=method, url=url, **kwargs)
59
~/anaconda3/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
507 send_kwargs.update(settings)
--> 508 resp = self.send(prep, **send_kwargs)
509
~/anaconda3/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
617 # Send the request
--> 618 r = adapter.send(request, **kwargs)
619
~/anaconda3/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
489 except (ProtocolError, socket.error) as err:
--> 490 raise ConnectionError(err, request=request)
491
ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
H2OConnectionError Traceback (most recent call last)
<ipython-input-22-37b26d4dfbfd> in <module>()
1 start = time.time()
----> 2 model.train(x = list(range(1, df_train.shape[1])), y = 0, training_frame = df_train)
3 end = time.time()
4 print(end - start)
~/anaconda3/lib/python3.6/site-packages/h2o/estimators/estimator_base.py in train(self, x, y, training_frame, offset_column, fold_column, weights_column, validation_frame, max_runtime_secs, ignored_columns, model_id, verbose)
229 return
230
--> 231 model.poll(verbose_model_scoring_history=verbose)
232 model_json = h2o.api("GET /%d/Models/%s" % (rest_ver, model.dest_key))["models"][0]
233 self._resolve_model(model.dest_key, model_json)
~/anaconda3/lib/python3.6/site-packages/h2o/job.py in poll(self, verbose_model_scoring_history)
56 pb.execute(self._refresh_job_status, print_verbose_info=lambda x: self._print_verbose_info() if int(x * 10) % 5 == 0 else " ")
57 else:
---> 58 pb.execute(self._refresh_job_status)
59 except StopIteration as e:
60 if str(e) == "cancelled":
~/anaconda3/lib/python3.6/site-packages/h2o/utils/progressbar.py in execute(self, progress_fn, print_verbose_info)
167 # Query the progress level, but only if it's time already
168 if self._next_poll_time <= now:
--> 169 res = progress_fn() # may raise StopIteration
170 assert_is_type(res, (numeric, numeric), numeric)
171 if not isinstance(res, tuple):
~/anaconda3/lib/python3.6/site-packages/h2o/job.py in _refresh_job_status(self)
91 def _refresh_job_status(self):
92 if self._poll_count <= 0: raise StopIteration("")
---> 93 jobs = h2o.api("GET /3/Jobs/%s" % self.job_key)
94 self.job = jobs["jobs"][0] if "jobs" in jobs else jobs["job"][0]
95 self.status = self.job["status"]
~/anaconda3/lib/python3.6/site-packages/h2o/h2o.py in api(endpoint, data, json, filename, save_to)
101 # type checks are performed in H2OConnection class
102 _check_connection()
--> 103 return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
104
105
~/anaconda3/lib/python3.6/site-packages/h2o/backend/connection.py in request(self, endpoint, data, json, filename, save_to)
408 else:
409 self._log_end_exception(e)
--> 410 raise H2OConnectionError("Unexpected HTTP error: %s" % e)
411 except requests.exceptions.Timeout as e:
412 self._log_end_exception(e)
H2OConnectionError: Unexpected HTTP error: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
I have set an spring quertz timer with following expressions:
#Scheduled(cron = "${quartz.expire.data.cron:0 0 0 * * ?}")
But it starts a little bit ahead as shown in our logs:
2017-10-22 23:59:59.899 scheduler-4
Why?
This might come from the calculation for the next execution time.
In the schedule method of the ReschedulingRunnable, the time is taken from the nextExecutionTime at line 68. The actual delay until execution gets calculated at line 72.
66 public ScheduledFuture<?> schedule() {
67 synchronized (this.triggerContextMonitor) {
68 this.scheduledExecutionTime = this.trigger.nextExecutionTime(this.triggerContext);
69 if (this.scheduledExecutionTime == null) {
70 return null;
71 }
72 long initialDelay = this.scheduledExecutionTime.getTime() - System.currentTimeMillis();
73 this.currentFuture = this.executor.schedule(this, initialDelay, TimeUnit.MILLISECONDS);
74 return this;
75 }
76 }
Now let's see what happens in the nextExecutionTime method of the CronTrigger:
72 #Override
73 public Date nextExecutionTime(TriggerContext triggerContext) {
74 Date date = triggerContext.lastCompletionTime();
75 if (date != null) {
76 Date scheduled = triggerContext.lastScheduledExecutionTime();
77 if (scheduled != null && date.before(scheduled)) {
78 // Previous task apparently executed too early...
79 // Let's simply use the last calculated execution time then,
80 // in order to prevent accidental re-fires in the same second.
81 date = scheduled;
82 }
83 }
84 else {
85 date = new Date();
86 }
87 return this.sequenceGenerator.next(date);
88 }
At Line 86, the time is taken. The taken moment is the base to calculate the next execution time for the cron definition which happens in sequenceGenerator.next
134 Calendar calendar = new GregorianCalendar();
135 calendar.setTimeZone(this.timeZone);
136 calendar.setTime(date);
137
138 // First, just reset the milliseconds and try to calculate from there...
139 calendar.set(Calendar.MILLISECOND, 0);
140 long originalTimestamp = calendar.getTimeInMillis();
141 doNext(calendar, calendar.get(Calendar.YEAR));
142
143 if (calendar.getTimeInMillis() == originalTimestamp) {
144 // We arrived at the original timestamp - round up to the next whole second and try again...
145 calendar.add(Calendar.SECOND, 1);
146 doNext(calendar, calendar.get(Calendar.YEAR));
147 }
148
149 return calendar.getTime();
This certainly takes a few milliseconds, which will be missing within the initialDelay.
Proof
A small test to proof this looks like following. I create a regular CronTrigger, and a manipulated CronTrigger
#Test
public void test() {
CronTrigger normalTrigger= new CronTrigger("0 0 0 * * ?");
Date d2 = normalTrigger.nextExecutionTime(new SimpleTriggerContext());
long initialDelay2 = d2.getTime() - System.currentTimeMillis();
System.out.println("Normal trigger:"+ initialDelay2);
//create a manipulated trigger, which takes longer to return the nextExecutionTime
CronTrigger manipulated = new CronTrigger("0 0 0 * * ?") {
#Override
public Date nextExecutionTime(TriggerContext triggerContext) {
Date nextExecutionTime = super.nextExecutionTime(triggerContext);
try {
Thread.sleep(5000);
} catch (InterruptedException ex) {
//ignore
}
return nextExecutionTime;
}
};
Date d = manipulated.nextExecutionTime(new SimpleTriggerContext());
long initialDelay = d.getTime() - System.currentTimeMillis();
System.out.println("Manipulated trigger:" +initialDelay);
}
From the result, you see that the manipulated trigger will trigger 5 seconds earlier than the non-manipulated, because it took 5 seconds longer to return the nextExecutionTime.