asyncio_redis and aioredis error on getting a list of keys - python-asyncio

When I try to get values for a list of keys using asyncio_redis or aioredis, I am getting the following error. I know it is about something python socket, but unable to resolve the error. I attached both the code and error log with this issue. Here keys are a list of large byte arrays. get_params_redis is called by multiple processes. Any help would be appreciated, thanks!
async def multi_get_key_redis(keys):
redis = await aioredis.create_redis_pool(
'redis://localhost')
result =[]
for key in keys:
result.append(await redis.get(key))
# assert result == await asyncio.gather(*keys)
# return result
redis.close()
await redis.wait_closed()
print(result)
return result
def get_params_redis(shapes):
i = -1
params=[]
keys = []
for s in range(len(shapes)):
keys.append(s)
values = asyncio.get_event_loop().run_until_complete(multi_get_key_redis(keys))
for shape in shapes:
i = i + 1
param_np = pc._loads(values[i]).reshape(shape)
param_tensor = torch.nn.Parameter(torch.from_numpy(param_np))
params.append(param_tensor)
return params
Error Log:
Process Process-1:
Traceback (most recent call last):
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/multiprocessing/process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "/Users/srujithpoondla/largescaleml_project/train_redis.py", line 33, in train_redis
train_redis_epoch(epoch, args, model, train_loader, optimizer,shapes_len, loop)
File "/Users/srujithpoondla/largescaleml_project/train_redis.py", line 43, in train_redis_epoch
params = get_params_redis(shapes_len,loop)
File "/Users/srujithpoondla/largescaleml_project/common_functions.py", line 76, in get_params_redis
params = loop.run_until_complete(multi_get_key_redis(keys))
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 454, in run_until_complete
self.run_forever()
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 421, in run_forever
self._run_once()
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 1395, in _run_once
event_list = self._selector.select(timeout)
File "/usr/local/Cellar/python/3.6.4_4/Frameworks/Python.framework/Versions/3.6/lib/python3.6/selectors.py", line 577, in select
kev_list = self._kqueue.control(None, max_ev, timeout)
OSError: [Errno 9] Bad file descriptor

Related

DataprocCreateClusterOperator fails due to TypeError

EDIT 1: The problem is related to the field "initialization_actions". Originally I'd put a String there, now I gave it the object it's asking for:
from google.cloud.dataproc_v1beta2 import NodeInitializationAction
CLUSTER_CONFIG = {
...
"initialization_actions": NodeInitializationAction({
"executable_file": <string>})]
}
Unfortunately it's still complaining:
ERROR - Parameter to MergeFrom() must be instance of same class: expected google.cloud.dataproc.v1beta2.NodeInitializationAction got NodeInitializationAction.
I am trying to deploy a Dataproc cluster with airflow.providers.google.cloud.operators.dataproc.DataprocCreateClusterOperator, but I get a cryptic TypeError.
Here is the task definition:
CLUSTER_CONFIG = {
"config_bucket": <my_bucket>,
"temp_bucket": <my_bucket>,
"master_config": {
"num_instances": 1,
"machine_type_uri": "c2-standard-8",
"disk_config": {"boot_disk_type": "pd-standard", "boot_disk_size_gb": 1024},
},
"initialization_actions": [<string>],
}
create_cluster = DataprocCreateClusterOperator(
task_id="create_cluster",
project_id=PROJECT_ID,
cluster_config=CLUSTER_CONFIG,
region=REGION,
cluster_name=CLUSTER_NAME,
metadata=[("ENV", ENV)],
dag=dag)
Traceback:
Traceback (most recent call last)
File "/usr/local/lib/airflow/airflow/models/taskinstance.py", line 985, in _run_raw_tas
result = task_copy.execute(context=context
File "/usr/local/lib/airflow/airflow/providers/google/cloud/operators/dataproc.py", line 603, in execut
cluster = self._create_cluster(hook
File "/usr/local/lib/airflow/airflow/providers/google/cloud/operators/dataproc.py", line 540, in _create_cluste
metadata=self.metadata
File "/usr/local/lib/airflow/airflow/providers/google/common/hooks/base_google.py", line 425, in inner_wrappe
return func(self, *args, **kwargs
File "/usr/local/lib/airflow/airflow/providers/google/cloud/hooks/dataproc.py", line 304, in create_cluste
metadata=metadata
File "/opt/python3.6/lib/python3.6/site-packages/google/cloud/dataproc_v1beta2/services/cluster_controller/client.py", line 412, in create_cluste
request = clusters.CreateClusterRequest(request
File "/opt/python3.6/lib/python3.6/site-packages/proto/message.py", line 506, in __init_
pb_value = marshal.to_proto(pb_type, value
File "/opt/python3.6/lib/python3.6/site-packages/proto/marshal/marshal.py", line 208, in to_prot
pb_value = rule.to_proto(value
File "/opt/python3.6/lib/python3.6/site-packages/proto/marshal/rules/message.py", line 32, in to_prot
return self._descriptor(**value
TypeError: Parameter to MergeFrom() must be instance of same class: expected google.cloud.dataproc.v1beta2.NodeInitializationAction got str
The field `initialization_actions" is not a list of strings, but a list of dicts:
"initialization_actions": [{"executable_file": <string>}]

How to call async function inside a celery task

I have a web chat using websockets (AsyncWebsocketConsumer, django-channels). I'm using celery to parse a request but it halt with no debbugable (for me) errors every time I try to send the response back to the consumer.
This attempt give me the next error:
#shared_task
def execute(command, parameter, room_group_name):
if command == '/stock':
loop = asyncio.get_event_loop()
loop.run_until_complete(sendData(stock(parameter), "BOT", room_group_name))
return True
loop = asyncio.get_event_loop()
loop.run_until_complete(sendData("I do not understand that parameter", "BOT", room_group_name))
return True
from channels.layers import get_channel_layer
async def sendData(message, from_, room_group_name):
channel_layer = get_channel_layer()
import datetime
currentDT = datetime.datetime.now()
datetime = currentDT.strftime("%Y-%m-%d %H:%M:%S")
await channel_layer.group_send(
room_group_name,
{
'type': 'chat_message',
'username': from_,
'datetime': datetime,
'message': message
}
)
await asyncio.sleep(5)
Error:
[2019-05-12 18:01:15,491: ERROR/ForkPoolWorker-1] Task chat.tasks.execute[8a69afca-8173-46d0-84bc-4ee5ce7782ca] raised unexpected: OSError(9, 'Bad file descriptor')
Traceback (most recent call last):
File "/Users/juan/Documents/manu/dev/python_challenge/venv/lib/python3.6/site-packages/celery/app/trace.py", line 385, in trace_task
R = retval = fun(*args, **kwargs)
File "/Users/juan/Documents/manu/dev/python_challenge/venv/lib/python3.6/site-packages/celery/app/trace.py", line 648, in __protected_call__
return self.run(*args, **kwargs)
File "/Users/juan/Documents/manu/dev/python_challenge/chat/tasks.py", line 14, in execute
loop.run_until_complete(sendData(stock(parameter), "BOT", room_group_name))
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 455, in run_until_complete
self.run_forever()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
self._run_once()
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/asyncio/base_events.py", line 1396, in _run_once
event_list = self._selector.select(timeout)
File "/usr/local/Cellar/python/3.6.5/Frameworks/Python.framework/Versions/3.6/lib/python3.6/selectors.py", line 577, in select
kev_list = self._kqueue.control(None, max_ev, timeout)
OSError: [Errno 9] Bad file descriptor
OSError: [Errno 9] Bad file descriptor, but I cannot find where it is coming from.
celery==4.3.0

pyspark.sql.utils.IllegalArgumentException: u'java.net.UnknownHostException: user'

I am new to Pyspark and I am trying to do a simple count. However it is giving me this error. The text file is inside hdfs.
CODE:
>>> mydata = sc.textFile("hdfs://user/poem.txt")
>>> mydata.count()
ERROR:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/spark-2.0.1-bin-hadoop2.7/python/pyspark/rdd.py", line 1008, in count
return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
File "/usr/local/lib/spark-2.0.1-bin-hadoop2.7/python/pyspark/rdd.py", line 999, in sum
return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
File "/usr/local/lib/spark-2.0.1-bin-hadoop2.7/python/pyspark/rdd.py", line 873, in fold
vals = self.mapPartitions(func).collect()
File "/usr/local/lib/spark-2.0.1-bin-hadoop2.7/python/pyspark/rdd.py", line 776, in collect
port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
File "/usr/local/lib/spark-2.0.1-bin-hadoop2.7/python/lib/py4j-0.10.3-src.zip/py4j/java_gateway.py", line 1133, in __call__
File "/usr/local/lib/spark-2.0.1-bin-hadoop2.7/python/pyspark/sql/utils.py", line 79, in deco
raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)
pyspark.sql.utils.IllegalArgumentException: u'java.net.UnknownHostException: user'
You are missing a "/"
r = sc.textFile("hdfs://user/myFile")
r.count()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/opt/cloudera/parcels/CDH-5.7.0-1.cdh5.7.0.p1464.1349/lib/spark/python/pyspark/rdd.py", line 1004, in count
return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
File "/opt/cloudera/parcels/CDH-5.7.0-1.cdh5.7.0.p1464.1349/lib/spark/python/pyspark/rdd.py", line 995, in sum
return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
File "/opt/cloudera/parcels/CDH-5.7.0-1.cdh5.7.0.p1464.1349/lib/spark/python/pyspark/rdd.py", line 869, in fold
vals = self.mapPartitions(func).collect()
File "/opt/cloudera/parcels/CDH-5.7.0-1.cdh5.7.0.p1464.1349/lib/spark/python/pyspark/rdd.py", line 771, in collect
port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
File "/opt/cloudera/parcels/CDH-5.7.0-1.cdh5.7.0.p1464.1349/lib/spark/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py", line 813, in __call__
File "/opt/cloudera/parcels/CDH-5.7.0-1.cdh5.7.0.p1464.1349/lib/spark/python/pyspark/sql/utils.py", line 53, in deco
raise IllegalArgumentException(s.split(': ', 1)[1], stackTrace)
pyspark.sql.utils.IllegalArgumentException: u'java.net.UnknownHostException: user'
However, if you do
>>> r = sc.textFile("hdfs:///user/myFile")
>>> r.count()
318199
it is because hdfs:// is the URI. And in Fullly qualified syntax, it should be hdfs:///. Hence, Spark is thinking the token "user" as NN-Host

Error when I Update Module List in OpenERP

I am trying to update my modulo list using the Update Module List menu item, but I get the follwing error:
OpenERP Server Error
Client Traceback (most recent call last):
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/web/http.py", line 204, in dispatch
response["result"] = method(self, **self.params)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/web/controllers/main.py", line 1132, in call_button
action = self._call_kw(req, model, method, args, {})
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/web/controllers/main.py", line 1120, in _call_kw
return getattr(req.session.model(model), method)(*args, **kwargs)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/web/session.py", line 42, in proxy
result = self.proxy.execute_kw(self.session._db, self.session._uid, self.session._password, self.model, method, args, kw)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/web/session.py", line 30, in proxy_method
result = self.session.send(self.service_name, method, *args)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/web/session.py", line 103, in send
raise xmlrpclib.Fault(openerp.tools.ustr(e), formatted_info)
Server Traceback (most recent call last):
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/web/session.py", line 89, in send
return openerp.netsvc.dispatch_rpc(service_name, method, args)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/netsvc.py", line 296, in dispatch_rpc
result = ExportService.getService(service_name).dispatch(method, params)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/service/web_services.py", line 626, in dispatch
res = fn(db, uid, *params)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/osv/osv.py", line 190, in execute_kw
return self.execute(db, uid, obj, method, *args, **kw or {})
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/osv/osv.py", line 132, in wrapper
return f(self, dbname, *args, **kwargs)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/osv/osv.py", line 199, in execute
res = self.execute_cr(cr, uid, obj, method, *args, **kw)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/osv/osv.py", line 187, in execute_cr
return getattr(object, method)(cr, uid, *args, **kw)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/base/module/wizard/base_module_update.py", line 42, in update_module
update, add = module_obj.update_list(cr, uid,)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/base/module/module.py", line 617, in update_list
handler.load_addons()
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/web/http.py", line 580, in load_addons
m = __import__('openerp.addons.' + module)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/modules/module.py", line 133, in load_module
mod = imp.load_module('openerp.addons.' + module_part, f, path, descr)
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/magento_integration-develop/__init__.py", line 9, in <module>
import magento_
File "/opt/bitnami/apps/openerp/lib/openerp-7.0_20140330_231328-py2.7.egg/openerp/addons/magento_integration-develop/magento_.py", line 17, in <module>
import magento
ImportError: No module named magento
I am trying to install a Magento OpenERP connector, but in order to to that I must locate it in the Installed Module list.
Thanks
Please check your file magento_.py, and see whether the class has been called.And if it has been called correctly check whether it has been correctly specified in the import line in your init.py file.

gdata.docs.client.DocsClient

I have the following code, reads oauth2 token form file, then try's to perform a doc's list query to find a specific spreadsheet that I want to copy, however no matter what I try the code either errors out or returns with an object containing no document data.
I am using gdata.docs.client.DocsClient which as far as I can tell is version 3 of the API
def CreateClient():
"""Create a Documents List Client."""
client = gdata.docs.client.DocsClient(source=config.APP_NAME)
client.http_client.debug = config.DEBUG
# Authenticate the user with CLientLogin, OAuth, or AuthSub.
if os.path.exists(config.CONFIG_FILE):
f = open(config.CONFIG_FILE)
tok = pickle.load(f)
f.close()
client.auth_token = tok.auth_token
return client
1st query attempt
def get_doc():
new_api_query = gdata.docs.client.DocsQuery(title='RichSheet', title_exact=True, show_collections=True)
d = client.GetResources(q = new_api_query)
this fails with the following stack trace
Traceback (most recent call last):
File "/Users/richard/PycharmProjects/reportone/make_my_report.py", line 83, in <module>
get_doc()
File "/Users/richard/PycharmProjects/reportone/make_my_report.py", line 57, in get_doc
d = client.GetResources(q = new_api_query)
File "/Users/richard/PycharmProjects/reportone/gdata/docs/client.py", line 151, in get_resources
**kwargs)
File "/Users/richard/PycharmProjects/reportone/gdata/client.py", line 640, in get_feed
**kwargs)
File "/Users/richard/PycharmProjects/reportone/gdata/docs/client.py", line 66, in request
return super(DocsClient, self).request(method=method, uri=uri, **kwargs)
File "/Users/richard/PycharmProjects/reportone/gdata/client.py", line 267, in request
uri=uri, auth_token=auth_token, http_request=http_request, **kwargs)
File "/Users/richard/PycharmProjects/reportone/atom/client.py", line 115, in request
self.auth_token.modify_request(http_request)
File "/Users/richard/PycharmProjects/reportone/gdata/gauth.py", line 1047, in modify_request
token_secret=self.token_secret, verifier=self.verifier)
File "/Users/richard/PycharmProjects/reportone/gdata/gauth.py", line 668, in generate_hmac_signature
next, token, verifier=verifier)
File "/Users/richard/PycharmProjects/reportone/gdata/gauth.py", line 629, in build_oauth_base_string
urllib.quote(params[key], safe='~')))
File "/opt/local/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 1266, in quote
if not s.rstrip(safe):
AttributeError: 'bool' object has no attribute 'rstrip'
Process finished with exit code 1
then my second attempt
def get_doc():
other = gdata.docs.service.DocumentQuery(text_query='RichSheet')
d = client.GetResources(q = other)
this returns an ResourceFeed object, but has no content. I have been through the source code for these function but thing are not any obvious.
Have i missed something ? or should i go back to version 2 of the api ?

Resources