Related
I am trying to connect to Elasticsearch using the elasticsearch-dbapi library. I want to be able to execute SQL queries on my Elasticsearch cluster.
However, I keep getting this error when I attempt to run a query,
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\urllib3\connection.py:174, in HTTPConnection._new_conn(self)
173 try:
--> 174 conn = connection.create_connection(
175 (self._dns_host, self.port), self.timeout, **extra_kw
176 )
178 except SocketTimeout:
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\urllib3\util\connection.py:72, in create_connection(address, timeout, source_address, socket_options)
68 return six.raise_from(
69 LocationParseError(u"'%s', label empty or too long" % host), None
70 )
---> 72 for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
73 af, socktype, proto, canonname, sa = res
File ~\Anaconda3\envs\elasticsearch\lib\socket.py:955, in getaddrinfo(host, port, family, type, proto, flags)
954 addrlist = []
--> 955 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
956 af, socktype, proto, canonname, sa = res
gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\elasticsearch\connection\http_urllib3.py:251, in Urllib3HttpConnection.perform_request(self, method, url, params, body, timeout, ignore, headers)
249 request_headers["content-encoding"] = "gzip"
--> 251 response = self.pool.urlopen(
252 method, url, body, retries=Retry(False), headers=request_headers, **kw
253 )
254 duration = time.time() - start
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\urllib3\connectionpool.py:787, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
785 e = ProtocolError("Connection aborted.", e)
--> 787 retries = retries.increment(
788 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
789 )
790 retries.sleep()
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\urllib3\util\retry.py:525, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
523 if self.total is False and error:
524 # Disabled, indicate to re-raise the error.
--> 525 raise six.reraise(type(error), error, _stacktrace)
527 total = self.total
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\urllib3\packages\six.py:770, in reraise(tp, value, tb)
769 raise value.with_traceback(tb)
--> 770 raise value
771 finally:
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\urllib3\connectionpool.py:703, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
702 # Make the request on the httplib connection object.
--> 703 httplib_response = self._make_request(
704 conn,
705 method,
706 url,
707 timeout=timeout_obj,
708 body=body,
709 headers=headers,
710 chunked=chunked,
711 )
713 # If we're going to release the connection in ``finally:``, then
714 # the response doesn't need to know about the connection. Otherwise
715 # it will also try to release it and we'll have a double-release
716 # mess.
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\urllib3\connectionpool.py:386, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
385 try:
--> 386 self._validate_conn(conn)
387 except (SocketTimeout, BaseSSLError) as e:
388 # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\urllib3\connectionpool.py:1042, in HTTPSConnectionPool._validate_conn(self, conn)
1041 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
-> 1042 conn.connect()
1044 if not conn.is_verified:
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\urllib3\connection.py:358, in HTTPSConnection.connect(self)
356 def connect(self):
357 # Add certificate verification
--> 358 self.sock = conn = self._new_conn()
359 hostname = self.host
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\urllib3\connection.py:186, in HTTPConnection._new_conn(self)
185 except SocketError as e:
--> 186 raise NewConnectionError(
187 self, "Failed to establish a new connection: %s" % e
188 )
190 return conn
NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x00000236CF7A3D60>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\es\baseapi.py:318, in BaseCursor.elastic_query(self, query)
317 try:
--> 318 response = self.es.transport.perform_request("POST", path, body=payload)
319 except es_exceptions.ConnectionError:
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\elasticsearch\transport.py:413, in Transport.perform_request(self, method, url, headers, params, body)
412 if attempt == self.max_retries:
--> 413 raise e
414 else:
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\elasticsearch\transport.py:381, in Transport.perform_request(self, method, url, headers, params, body)
380 try:
--> 381 status, headers_response, data = connection.perform_request(
382 method,
383 url,
384 params,
385 body,
386 headers=headers,
387 ignore=ignore,
388 timeout=timeout,
389 )
391 except TransportError as e:
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\elasticsearch\connection\http_urllib3.py:266, in Urllib3HttpConnection.perform_request(self, method, url, params, body, timeout, ignore, headers)
265 raise ConnectionTimeout("TIMEOUT", str(e), e)
--> 266 raise ConnectionError("N/A", str(e), e)
268 # raise warnings if any from the 'Warnings' header.
ConnectionError: ConnectionError(<urllib3.connection.HTTPSConnection object at 0x00000236CF7A3D60>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed) caused by: NewConnectionError(<urllib3.connection.HTTPSConnection object at 0x00000236CF7A3D60>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed)
During handling of the above exception, another exception occurred:
OperationalError Traceback (most recent call last)
Input In [16], in <cell line: 2>()
1 curs = conn.cursor()
----> 2 curs.execute(
3 "select * from kibana_sample_data_ecommerce LIMIT 10"
4 )
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\es\baseapi.py:36, in check_closed.<locals>.wrap(self, *args, **kwargs)
32 if self.closed:
33 raise exceptions.Error(
34 "{klass} already closed".format(klass=self.__class__.__name__)
35 )
---> 36 return f(self, *args, **kwargs)
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\es\elastic\api.py:158, in Cursor.execute(self, operation, parameters)
155 return self.get_array_type_columns(re_table_name[1])
157 query = apply_parameters(operation, parameters)
--> 158 results = self.elastic_query(query)
159 # We need a list of tuples
160 rows = [tuple(row) for row in results.get("rows", [])]
File ~\Anaconda3\envs\elasticsearch\lib\site-packages\es\baseapi.py:320, in BaseCursor.elastic_query(self, query)
318 response = self.es.transport.perform_request("POST", path, body=payload)
319 except es_exceptions.ConnectionError:
--> 320 raise exceptions.OperationalError("Error connecting to Elasticsearch")
321 except es_exceptions.RequestError as ex:
322 raise exceptions.ProgrammingError(f"Error ({ex.error}): {ex.info}")
OperationalError: Error connecting to Elasticsearch
This is what my code looks like,
from es.elastic.api import connect
conn = connect(
host='https://<endpoint-alias>.es.eastus2.azure.elastic-cloud.com',
port=9200,
scheme="https",
user='elastic',
password='<password>'
)
curs = conn.cursor()
curs.execute(
"select * from kibana_sample_data_ecommerce LIMIT 10"
)
What exactly is the problem here?
Note: I am using the elasticsearch-dbapi package because although it is possible to execute SQL queries using elasticsearch, there does not seem to be an option to scan through all of the results. I want to access all the hits that are returned by a query.
However, when I tried connecting using elasticsearch, it worked without any problem. But, I used the Cloud ID to establish the connection. Is there some issue with the host that I have used?
I have a dataset with name "haberman.csv" in my current directory and upon trying to load the dataset with below load_dataset function then it throws an error saying HTTPError: HTTP Error 404: Not Found.
But when loaded a different dataset with name "tips.csv" in the same directory it works well.
What is the reason behind this error that one dataset when loaded works and another doesn't.
import seaborn as sns
sns.set()
haberman_plot = sns.load_dataset("haberman")
Error:
<ipython-input-58-fb9fa2439b0a> in <module>()
4
5 sns.set()
----> 6 haberman_plot = sns.load_dataset("haberman")
~/.local/lib/python3.6/site-packages/seaborn/utils.py in load_dataset(name, cache, data_home, **kws)
426 os.path.basename(full_path))
427 if not os.path.exists(cache_path):
--> 428 urlretrieve(full_path, cache_path)
429 full_path = cache_path
430
~/anaconda3/lib/python3.6/urllib/request.py in urlretrieve(url, filename, reporthook, data)
246 url_type, path = splittype(url)
247
--> 248 with contextlib.closing(urlopen(url, data)) as fp:
249 headers = fp.info()
250
~/anaconda3/lib/python3.6/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
221 else:
222 opener = _opener
--> 223 return opener.open(url, data, timeout)
224
225 def install_opener(opener):
~/anaconda3/lib/python3.6/urllib/request.py in open(self, fullurl, data, timeout)
530 for processor in self.process_response.get(protocol, []):
531 meth = getattr(processor, meth_name)
--> 532 response = meth(req, response)
533
534 return response
~/anaconda3/lib/python3.6/urllib/request.py in http_response(self, request, response)
640 if not (200 <= code < 300):
641 response = self.parent.error(
--> 642 'http', request, response, code, msg, hdrs)
643
644 return response
~/anaconda3/lib/python3.6/urllib/request.py in error(self, proto, *args)
568 if http_err:
569 args = (dict, 'default', 'http_error_default') + orig_args
--> 570 return self._call_chain(*args)
571
572 # XXX probably also want an abstract factory that knows when it makes
~/anaconda3/lib/python3.6/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
502 for handler in handlers:
503 func = getattr(handler, meth_name)
--> 504 result = func(*args)
505 if result is not None:
506 return result
~/anaconda3/lib/python3.6/urllib/request.py in http_error_default(self, req, fp, code, msg, hdrs)
648 class HTTPDefaultErrorHandler(BaseHandler):
649 def http_error_default(self, req, fp, code, msg, hdrs):
--> 650 raise HTTPError(req.full_url, code, msg, hdrs, fp)
651
652 class HTTPRedirectHandler(BaseHandler):
HTTPError: HTTP Error 404: Not Found```
I downloaded the latest release of H2O (3.18.0.1) and XGboost keeps failing. I am not sure whether to post to the JIRA issues or here.
h2o.init()
from h2o.estimators import H2OXGBoostEstimator
is_xgboost_available = H2OXGBoostEstimator.available()
train_path = 'https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/higgs_train_imbalance_100k.csv'
test_path = 'https://s3.amazonaws.com/h2o-public-test-data/bigdata/laptop/higgs_test_imbalance_100k.csv'
df_train = h2o.import_file(train_path)
df_test = h2o.import_file(test_path)
# Transform first feature into categorical feature
df_train[0] = df_train[0].asfactor()
df_test[0] = df_test[0].asfactor()
param = {
"ntrees" : 500
}
model = H2OXGBoostEstimator(**param)
model.train(x = list(range(1, df_train.shape[1])), y = 0, training_frame = df_train)
I can run random forest, GBM without an issue but xgboost keeps failing.
I am running on Ubuntu 16.04. Java Version: java version "1.8.0_161"; Java(TM) SE Runtime Environment (build 1.8.0_161-b12); Java HotSpot(TM) 64-Bit Server VM (build 25.161-b12, mixed mode). Anaconda Python 3.6
I reinstalled Anaconda and reinstalled JRE, but am still having the same issue.
It keeps giving me the following error:
xgboost Model Build progress: |████████████████████████████████████████
---------------------------------------------------------------------------
ConnectionResetError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
600 body=body, headers=headers,
--> 601 chunked=chunked)
602
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
386 # otherwise it looks like a programming error was the cause.
--> 387 six.raise_from(e, None)
388 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
382 try:
--> 383 httplib_response = conn.getresponse()
384 except Exception as e:
~/anaconda3/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
~/anaconda3/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
~/anaconda3/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
~/anaconda3/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
ConnectionResetError: [Errno 104] Connection reset by peer
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
439 retries=self.max_retries,
--> 440 timeout=timeout
441 )
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
638 retries = retries.increment(method, url, error=e, _pool=self,
--> 639 _stacktrace=sys.exc_info()[2])
640 retries.sleep()
~/anaconda3/lib/python3.6/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
356 if read is False or not self._is_method_retryable(method):
--> 357 raise six.reraise(type(error), error, _stacktrace)
358 elif read is not None:
~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in reraise(tp, value, tb)
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
600 body=body, headers=headers,
--> 601 chunked=chunked)
602
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
386 # otherwise it looks like a programming error was the cause.
--> 387 six.raise_from(e, None)
388 except (SocketTimeout, BaseSSLError, SocketError) as e:
~/anaconda3/lib/python3.6/site-packages/urllib3/packages/six.py in raise_from(value, from_value)
~/anaconda3/lib/python3.6/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
382 try:
--> 383 httplib_response = conn.getresponse()
384 except Exception as e:
~/anaconda3/lib/python3.6/http/client.py in getresponse(self)
1330 try:
-> 1331 response.begin()
1332 except ConnectionError:
~/anaconda3/lib/python3.6/http/client.py in begin(self)
296 while True:
--> 297 version, status, reason = self._read_status()
298 if status != CONTINUE:
~/anaconda3/lib/python3.6/http/client.py in _read_status(self)
257 def _read_status(self):
--> 258 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
259 if len(line) > _MAXLINE:
~/anaconda3/lib/python3.6/socket.py in readinto(self, b)
585 try:
--> 586 return self._sock.recv_into(b)
587 except timeout:
ProtocolError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
~/anaconda3/lib/python3.6/site-packages/h2o/backend/connection.py in request(self, endpoint, data, json, filename, save_to)
399 headers=headers, timeout=self._timeout, stream=stream,
--> 400 auth=self._auth, verify=self._verify_ssl_cert, proxies=self._proxies)
401 self._log_end_transaction(start_time, resp)
~/anaconda3/lib/python3.6/site-packages/requests/api.py in request(method, url, **kwargs)
57 with sessions.Session() as session:
---> 58 return session.request(method=method, url=url, **kwargs)
59
~/anaconda3/lib/python3.6/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
507 send_kwargs.update(settings)
--> 508 resp = self.send(prep, **send_kwargs)
509
~/anaconda3/lib/python3.6/site-packages/requests/sessions.py in send(self, request, **kwargs)
617 # Send the request
--> 618 r = adapter.send(request, **kwargs)
619
~/anaconda3/lib/python3.6/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
489 except (ProtocolError, socket.error) as err:
--> 490 raise ConnectionError(err, request=request)
491
ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
During handling of the above exception, another exception occurred:
H2OConnectionError Traceback (most recent call last)
<ipython-input-22-37b26d4dfbfd> in <module>()
1 start = time.time()
----> 2 model.train(x = list(range(1, df_train.shape[1])), y = 0, training_frame = df_train)
3 end = time.time()
4 print(end - start)
~/anaconda3/lib/python3.6/site-packages/h2o/estimators/estimator_base.py in train(self, x, y, training_frame, offset_column, fold_column, weights_column, validation_frame, max_runtime_secs, ignored_columns, model_id, verbose)
229 return
230
--> 231 model.poll(verbose_model_scoring_history=verbose)
232 model_json = h2o.api("GET /%d/Models/%s" % (rest_ver, model.dest_key))["models"][0]
233 self._resolve_model(model.dest_key, model_json)
~/anaconda3/lib/python3.6/site-packages/h2o/job.py in poll(self, verbose_model_scoring_history)
56 pb.execute(self._refresh_job_status, print_verbose_info=lambda x: self._print_verbose_info() if int(x * 10) % 5 == 0 else " ")
57 else:
---> 58 pb.execute(self._refresh_job_status)
59 except StopIteration as e:
60 if str(e) == "cancelled":
~/anaconda3/lib/python3.6/site-packages/h2o/utils/progressbar.py in execute(self, progress_fn, print_verbose_info)
167 # Query the progress level, but only if it's time already
168 if self._next_poll_time <= now:
--> 169 res = progress_fn() # may raise StopIteration
170 assert_is_type(res, (numeric, numeric), numeric)
171 if not isinstance(res, tuple):
~/anaconda3/lib/python3.6/site-packages/h2o/job.py in _refresh_job_status(self)
91 def _refresh_job_status(self):
92 if self._poll_count <= 0: raise StopIteration("")
---> 93 jobs = h2o.api("GET /3/Jobs/%s" % self.job_key)
94 self.job = jobs["jobs"][0] if "jobs" in jobs else jobs["job"][0]
95 self.status = self.job["status"]
~/anaconda3/lib/python3.6/site-packages/h2o/h2o.py in api(endpoint, data, json, filename, save_to)
101 # type checks are performed in H2OConnection class
102 _check_connection()
--> 103 return h2oconn.request(endpoint, data=data, json=json, filename=filename, save_to=save_to)
104
105
~/anaconda3/lib/python3.6/site-packages/h2o/backend/connection.py in request(self, endpoint, data, json, filename, save_to)
408 else:
409 self._log_end_exception(e)
--> 410 raise H2OConnectionError("Unexpected HTTP error: %s" % e)
411 except requests.exceptions.Timeout as e:
412 self._log_end_exception(e)
H2OConnectionError: Unexpected HTTP error: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
On my macOS v 10.11.6, I got an error running moviepy on jupyter notebook
Python v 3.5.2
Conda v 4.3.8
Jupyter 4.2.1
I'm importing and running a simple cell:
from moviepy.editor import VideoFileClip
from IPython.display import HTML
new_clip_output = 'test_output.mp4'
test_clip = VideoFileClip("test.mp4")
new_clip = test_clip.fl_image(lambda x: cv2.cvtColor(x, cv2.COLOR_RGB2YUV)) #NOTE: this function expects color images!!
%time new_clip.write_videofile(new_clip_output, audio=False)
The error is:
TypeError Traceback (most recent call last)
<ipython-input-8-27aee53c99d8> in <module>()
1 new_clip_output = 'test_output.mp4'
--> 2 test_clip = VideoFileClip("test.mp4")
3 new_clip = test_clip.fl_image(lambda x: cv2.cvtColor(x, cv2.COLOR_RGB2YUV)) #NOTE: this function expects color images!!
4 get_ipython().magic('time new_clip.write_videofile(new_clip_output, audio=False)')
/Users/<username>/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/moviepy/video/io/VideoFileClip.py in __init__(self, filename, has_mask, audio, audio_buffersize, audio_fps, audio_nbytes, verbose)
80 buffersize= audio_buffersize,
81 fps = audio_fps,
--> 82 nbytes = audio_nbytes)
83
84 def __del__(self):
/Users/<username>/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/moviepy/audio/io/AudioFileClip.py in __init__(self, filename, buffersize, nbytes, fps)
61 self.filename = filename
62 reader = FFMPEG_AudioReader(filename,fps=fps,nbytes=nbytes,
--> 63 buffersize=buffersize)
64
65 self.reader = reader
/Users/<username>/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/moviepy/audio/io/readers.py in __init__(self, filename, buffersize, print_infos, fps, nbytes, nchannels)
68 self.buffer_startframe = 1
69 self.initialize()
--> 70 self.buffer_around(1)
71
72
/Users/<username>/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/moviepy/audio/io/readers.py in buffer_around(self, framenumber)
232 else:
233 self.seek(new_bufferstart)
--> 234 self.buffer = self.read_chunk(self.buffersize)
235
236 self.buffer_startframe = new_bufferstart
/Users/<username>/anaconda3/envs/carnd-term1/lib/python3.5/site-packages/moviepy/audio/io/readers.py in read_chunk(self, chunksize)
121 result = (1.0*result / 2**(8*self.nbytes-1)).\
122 reshape((len(result)/self.nchannels,
--> 123 self.nchannels))
124 #self.proc.stdout.flush()
125 self.pos = self.pos+chunksize
TypeError: 'float' object cannot be interpreted as an integer
Is it because of some conflict in versions of various libraries?
I have been successfully visualizing dask objects for a few days now, but I just restarted my Jupyter notebook kernel and am running into a KeyError that I can't seem to debug.
Before I restarted the kernel, the following code worked fine:
def sigmoid(x):
'''Sigmoid function of x.'''
return 1/(1+da.exp(-x))
X = da.from_array(np.random.random((4,2)), chunks=2)
beta = np.ones((2,1))
##
p = (X.dot(beta)).map_blocks(sigmoid)
W = da.diag((p*(1-p))[:,0])
hessian = da.dot(X.T.dot(W),X)
hessian.visualize()
But now when I run, I get the following traceback:
KeyError Traceback (most recent call last)
<ipython-input-62-660a47cb4654> in <module>()
7 W = da.diag((p*(1-p))[:,0])
8 hessian = dot(X.T.dot(W),X)
----> 9 hessian.visualize()
.../anaconda3/anaconda/lib/python3.5/site-packages/dask/base.py in visualize(self, filename, format, optimize_graph, **kwargs)
59 """
60 return visualize(self, filename=filename, format=format,
---> 61 optimize_graph=optimize_graph, **kwargs)
62
63 def compute(self, **kwargs):
.../anaconda3/anaconda/lib/python3.5/site-packages/dask/base.py in visualize(*args, **kwargs)
234 dsk = merge(dsks)
235
--> 236 return dot_graph(dsk, filename=filename, **kwargs)
237
238
.../anaconda3/anaconda/lib/python3.5/site-packages/dask/dot.py in dot_graph(dsk, filename, format, **kwargs)
217 dask.dot.to_graphviz
218 """
--> 219 g = to_graphviz(dsk, **kwargs)
220
221 fmts = ['.png', '.pdf', '.dot', '.svg', '.jpeg', '.jpg']
.../anaconda3/anaconda/lib/python3.5/site-packages/dask/dot.py in to_graphviz(dsk, data_attributes, function_attributes, rankdir, graph_attr, node_attr, edge_attr, **kwargs)
130 if func_name not in seen:
131 seen.add(func_name)
--> 132 g.node(func_name, label=task_label(v), shape='circle',
133 **function_attributes.get(k, {}))
134 g.edge(func_name, k_name)
.../anaconda3/anaconda/lib/python3.5/site-packages/dask/dot.py in task_label(task)
29 if hasattr(func, 'funcs'):
30 if len(func.funcs) > 1:
---> 31 return '{0}(...)'.format(funcname(func.funcs[0]))
32 else:
33 head = funcname(func.funcs[0])
KeyError: 0
It appears there is some label missing for one of the tasks or something like that?
EDIT: Actually, to reproduce the error you need to modify the sigmoid function as follows:
from multipledispatch import dispatch
#dispatch(da.Array)
def sigmoid(x):
'''Sigmoid function of x.'''
return 1/(1+da.exp(-x))
#dispatch(np.ndarray)
def sigmoid(x):
'''Sigmoid function of x.'''
return 1/(1+np.exp(-x))
Thanks for the reproducible example.
Unfortunately, this runs fine for me:
In [1]: import numpy as np
In [2]: import dask.array as da
In [3]: from multipledispatch import dispatch
...:
...: #dispatch(da.Array)
...: def sigmoid(x):
...: '''Sigmoid function of x.'''
...: return 1/(1+da.exp(-x))
...:
...: #dispatch(np.ndarray)
...: def sigmoid(x):
...: '''Sigmoid function of x.'''
...: return 1/(1+np.exp(-x))
...:
In [4]: pdb
Automatic pdb calling has been turned ON
In [5]: def sigmoid(x):
...: '''Sigmoid function of x.'''
...: return 1/(1+da.exp(-x))
...:
...: X = da.from_array(np.random.random((4,2)), chunks=2)
...: beta = np.ones((2,1))
...:
...: ##
...: p = (X.dot(beta)).map_blocks(sigmoid)
...: W = da.diag((p*(1-p))[:,0])
...: hessian = da.dot(X.T.dot(W),X)
...: hessian.visualize()
...:
Out[5]: <IPython.core.display.Image object>
The fact that it worked until you restarted something is a bit odd. Perhaps something is up with your environment?