PicklingError attribute lookup do_something on __main__ failed - multiprocessing

How to fix the below PicklingError? I have the same code as belowed youtube link: https://youtu.be/fKl2JW_qrso?t=1064
However, I have come up this error while running.
import concurrent.futures
import time
############## multiprocessing ##############
start = time.time()
def do_something(seconds):
print(f'sleeping {seconds} second(s)...')
time.sleep(seconds)
return 'Done Sleeping...'
with concurrent.futures.ProcessPoolExecutor() as executor:
f1 = executor.submit(do_something, 1)
print(f1.result())
finish = time.time()
print(f'Finished in {round(finish - start, 2)} seconds')
runfile('/Trading/test2.py', wdir='/Trading')
PyDev console: using IPython 7.31.0
Python 3.8.12 (default, Jan 5 2022, 12:00:26)
[GCC 4.8.5 20150623 (Red Hat 4.8.5-44)] on linux
concurrent.futures.process._RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/local/lib/python3.8/multiprocessing/queues.py", line 239, in _feed
obj = _ForkingPickler.dumps(obj)
File "/usr/local/lib/python3.8/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <function do_something at 0x7f884a1f5160>: attribute lookup do_something on __main__ failed

Related

How to solve ValueError('Invalid async_mode specified') for flask-socketio?

I'm testing a flask-socketio server in bitbucket pipeline. It failed with the following messages:
Traceback (most recent call last):
File "/root/.local/share/virtualenvs/build-3vGKWv3F/lib/python3.7/site-packages/flask_failsafe.py", line 29, in wrapper
return func(*args, **kwargs)
File "/opt/atlassian/pipelines/agent/build/main.py", line 89, in create_app
return cell_data_api.create_app()
File "/opt/atlassian/pipelines/agent/build/cell_data_api/__init__.py", line 30, in create_app
socketio.init_app(app)
File "/root/.local/share/virtualenvs/build-3vGKWv3F/lib/python3.7/site-packages/flask_socketio/__init__.py", line 243, in init_app
self.server = socketio.Server(**self.server_options)
File "/root/.local/share/virtualenvs/build-3vGKWv3F/lib/python3.7/site-packages/socketio/server.py", line 127, in __init__
self.eio = self._engineio_server_class()(**engineio_options)
File "/root/.local/share/virtualenvs/build-3vGKWv3F/lib/python3.7/site-packages/engineio/server.py", line 145, in __init__
raise ValueError('Invalid async_mode specified')
ValueError: Invalid async_mode specified
Traceback (most recent call last):
File "/.pyenv/versions/3.7.4/lib/python3.7/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/.pyenv/versions/3.7.4/lib/python3.7/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/opt/atlassian/pipelines/agent/build/main.py", line 117, in <module>
socketio.run(APP, host='0.0.0.0', port=8080, debug=True, use_reloader=True)
File "/root/.local/share/virtualenvs/build-3vGKWv3F/lib/python3.7/site-packages/flask_socketio/__init__.py", line 564, in run
if app.debug and self.server.eio.async_mode != 'threading':
AttributeError: 'NoneType' object has no attribute 'eio'
My main.py file looks like:
import os
from cell_data_api import socketio
# Detect if we are running in App Engine
# Make sure this does NOT start if we are running a Cloud Function
if os.getenv('APP_ENGINE', '') == 'TRUE':
import cell_data_api
APP = cell_data_api.create_app()
if __name__ == '__main__':
from flask_failsafe import failsafe
#failsafe
def create_app():
# note that the import is *inside* this function so that we can catch
# errors that happen at import time
import cell_data_api
# If `entrypoint` is not defined in app.yaml, App Engine will look for an app
# called `app` in `main.py`.
return cell_data_api.create_app()
APP = create_app()
# This is used when running locally only. When deploying to Google App
# Engine, a webserver process such as Gunicorn will serve the app. This
# can be configured by adding an `entrypoint` to app.yaml.
socketio.run(APP, host='0.0.0.0', port=8080, debug=True, use_reloader=True)
Main.py imports from cell_data_api.py, which looks like:
import os
from flask import Flask
from flask_cors import CORS
# import eventlet
from engineio.async_drivers import eventlet
from flask_socketio import SocketIO
socketio = SocketIO(
always_connect=True,
logger=True,
async_mode=eventlet,
cookie=...,
ping_timeout=...
)
def create_app():
# create and configure the app
app = Flask(__name__)
CORS(app)
......
socketio.init_app(app)
# ensure the instance folder exists
try:
os.makedirs(app.instance_path)
except OSError:
pass
return app
My environment is Python 3.7 with installation packages:
[dev-packages]
alembic = "*"
flask_failsafe = "*"
wcwidth = "*"
[packages]
flask = "*"
absl-py = "*"
flask-cors = "*"
grpcio = "*"
transitions = "*"
sqlalchemy-json = "*"
sqlalchemy = "1.3.0"
flask_socketio='*'
eventlet='*'
Unlike the other two questions I found about the same error, I'm not using pyinstaller or cx_Freeze.
The async_mode parameter takes a string as an argument.
Instead of this:
async_mode=eventlet,
Do this:
async_mode='eventlet',

Using per-column compression codec in Parquet.write_table

I have pyarrow 2.0.0 installed. The docs for pyarrow.parquet.write_table state
compression (str or dict) – Specify the compression codec, either on a general basis or per-column. Valid values: {‘NONE’, ‘SNAPPY’, ‘GZIP’, ‘LZO’, ‘BROTLI’, ‘LZ4’, ‘ZSTD’}.
Works fine if compression is a string, but when I try using a dict for per-column specification, I get the following error. What am I doing wrong? I can use a similar dict for compression_level on a per-column basis without error.
(py3) C:\tmp\python>python
Python 3.8.5 (default, Sep 3 2020, 21:29:08) [MSC v.1916 64 bit (AMD64)] :: Anaconda, Inc. on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> import pyarrow as pa
>>> import pyarrow.parquet as pq
>>> import pandas as pd
>>>
>>> df = pd.DataFrame([[1,2,3],[4,5,6]],columns=['foo','bar','baz'])
>>> t = pa.Table.from_pandas(df)
>>> pq.write_table(t,'test1.pq',compression=dict(foo='zstd',bar='snappy',baz='brotli'))
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "c:\app\python\anaconda\3\envs\py3\lib\site-packages\pyarrow\parquet.py", line 1717, in write_table
with ParquetWriter(
File "c:\app\python\anaconda\3\envs\py3\lib\site-packages\pyarrow\parquet.py", line 554, in __init__
self.writer = _parquet.ParquetWriter(
File "pyarrow\_parquet.pyx", line 1390, in pyarrow._parquet.ParquetWriter.__cinit__
File "pyarrow\_parquet.pyx", line 1236, in pyarrow._parquet._create_writer_properties
File "stringsource", line 15, in string.from_py.__pyx_convert_string_from_py_std__in_string
TypeError: expected bytes, str found

BrokenPipeError: [WinError 109] The pipe has been ended during data extraction

I am new to multiprocessing in python.I am extracting some features from a list of 70,000 URLs. I have them from 2 different files. After the feature extraction process I pass the result to a list and then to a CSV file.
The code runs but then stops with the error.I tried to catch the error but it produced another one.
Python version = 3.5
from feature_extractor import Feature_extraction
import pandas as pd
from pandas.core.frame import DataFrame
import sys
from multiprocessing.dummy import Pool as ThreadPool
import threading as thread
from multiprocessing import Process,Manager,Array
import time
class main():
lst = None
def __init__(self):
manager = Manager()
self.lst = manager.list()
self.dostuff()
self.read_lst()
def feature_extraction(self,url):
if self.lst is None:
self.lst = []
features = Feature_extraction(url)
self.lst.append(features.get_features())
print(len(self.lst))
def Pool(self,url):
pool = ThreadPool(8)
results = pool.map(self.feature_extraction, url)
def dostuff(self):
df = pd.read_csv('verified_online.csv',encoding='latin-1')
df['label'] = df['phish_id'] * 0
mal_urls = df['url']
df2 = pd.read_csv('new.csv')
df2['label'] = df['phish_id']/df['phish_id']
ben_urls = df2['urls']
t = Process(target=self.Pool,args=(mal_urls,))
t2 = Process(target=self.Pool,args=(ben_urls,))
t.start()
t2.start()
t.join()
t2.join
def read_lst(self):
nw_df = DataFrame(list(self.lst))
nw_df.columns = ['Redirect count','ssl_classification','url_length','hostname_length','subdomain_count','at_sign_in_url','exe_extension_in_request_url','exe_extension_in_landing_url',
'ip_as_domain_name','no_of_slashes_in requst_url','no_of_slashes_in_landing_url','no_of_dots_in_request_url','no_of_dots_in_landing_url','tld_value','age_of_domain',
'age_of_last_modified','content_length','same_landing_and_request_ip','same_landing_and_request_url']
frames = [df['label'],df2['label']]
new_df = pd.concat(frames)
new_df = new_df.reset_index()
nw_df['label'] = new_df['label']
nw_df.to_csv('dataset.csv', sep=',', encoding='latin-1')
if __name__ == '__main__':
start_time = time.clock()
try:
main()
except BrokenPipeError:
print("broken pipe....")
pass
print (time.clock() - start_time, "seconds")
Error Traceback
Process Process-3:
Traceback (most recent call last):
File "F:\Continuum\Anaconda3\lib\multiprocessing\connection.py", line 312, in _recv_bytes
nread, err = ov.GetOverlappedResult(True)
BrokenPipeError: [WinError 109] The pipe has been ended
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "F:\Continuum\Anaconda3\lib\multiprocessing\process.py", line 249, in _bootstrap
self.run()
File "F:\Continuum\Anaconda3\lib\multiprocessing\process.py", line 93, in run
self._target(*self._args, **self._kwargs)
File "H:\Projects\newoproject\src\main.py", line 33, in Pool
results = pool.map(self.feature_extraction, url)
File "F:\Continuum\Anaconda3\lib\multiprocessing\pool.py", line 260, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "F:\Continuum\Anaconda3\lib\multiprocessing\pool.py", line 608, in get
raise self._value
File "F:\Continuum\Anaconda3\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "F:\Continuum\Anaconda3\lib\multiprocessing\pool.py", line 44, in mapstar
return list(map(*args))
File "H:\Projects\newoproject\src\main.py", line 26, in feature_extraction
self.lst.append(features.get_features())
File "<string>", line 2, in append
File "F:\Continuum\Anaconda3\lib\multiprocessing\managers.py", line 717, in _callmethod
kind, result = conn.recv()
File "F:\Continuum\Anaconda3\lib\multiprocessing\connection.py", line 250, in recv
buf = self._recv_bytes()
File "F:\Continuum\Anaconda3\lib\multiprocessing\connection.py", line 321, in _recv_bytes
raise EOFError
EOFError
My response is late and does not address the posted problem directly; but hopefully will provide a clue to others who encounter similar errors.
Errors that I encountered:
BrokenPipeError
WinError 109 The pipe has been ended &
WinError 232 The pipe is being closed
Observed with Python 36 on Windows 7, when:
(1) the same async function was submitted multiple times, each time with a different instance of a multiprocessing data store, a Queue in my case (multiprocessing.Manager().Queue())
AND
(2) the references to the Queues were saved in short-life local variables in the enveloping function.
The errors were occurring despite the fact that the Queues, shared with the successfully spawned and executing async-functions, had items and would still be in active use (put() & get()) at the time of exception.
The error consistently occurred when the same async_func was called the 2nd time with a 2nd instance of the Queue. Immediately after apply_async() of the function, the connection to the 1st Queue supplied to the async_func the 1st time, would get broken.
The issue got resolved when the references to the Queues were saved in non-overlapping (like a Queue-list) & longer-life variables (like variables returned to functions higher in the call-stack) in the enveloping function.

Replace pickle with dill and pymongo call

I finally understood example how to replace pickle with dill from the following discussion: pickle-dill.
For example, the following code worked for me
import os
import dill
import multiprocessing
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
if __name__ == '__main__':
pool = multiprocessing.Pool(5)
results = [apply_async(pool, lambda x: x*x, args=(x,)) for x in range(1,7)]
output = [p.get() for p in results]
print(output)
I tried to apply the same philosophy to pymongo. The following code
import os
import dill
import multiprocessing
import pymongo
def run_dill_encoded(what):
fun, args = dill.loads(what)
return fun(*args)
def apply_async(pool, fun, args):
return pool.apply_async(run_dill_encoded, (dill.dumps((fun, args)),))
def write_to_db(value_to_insert):
client = pymongo.MongoClient('localhost', 27017)
db = client['somedb']
collection = db['somecollection']
result = collection.insert_one({"filed1": value_to_insert})
client.close()
if __name__ == '__main__':
pool = multiprocessing.Pool(5)
results = [apply_async(pool, write_to_db, args=(x,)) for x in ['one', 'two', 'three']]
output = [p.get() for p in results]
print(output)
produces error:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "C:\Python34\lib\multiprocessing\pool.py", line 119, in worker
result = (True, func(*args, **kwds))
File "C:\...\temp2.py", line 10, in run_dill_encoded
return fun(*args)
File "C:\...\temp2.py", line 21, in write_to_db
client = pymongo.MongoClient('localhost', 27017)
NameError: name 'pymongo' is not defined
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:/.../temp2.py", line 32, in <module>
output = [p.get() for p in results]
File "C:/.../temp2.py", line 32, in <listcomp>
output = [p.get() for p in results]
File "C:\Python34\lib\multiprocessing\pool.py", line 599, in get
raise self._value
NameError: name 'pymongo' is not defined
Process finished with exit code 1
What is wrong?
As I mentioned in the comments, you need to put an import pymongo inside the function write_to_db. This is because when the function is serialized, it does not take along any of the global references with it when it is shipped to the other process space.

Error 1053 When Starting Window Service Written In Python

I have already looked at and tried the resolutions to this question that others have posted. One user said that to try and change my setup.py file from:
from distutils.core import setup
import py2exe
setup(console=["dev.py"])
to
from distutils.core import setup
import py2exe
setup(service=["dev.py"])
I got the following results:
running py2exe
*** searching for required modules ***
Traceback (most recent call last):
File "C:\Python27\Scripts\distutils-setup.py", line 5, in <module>
setup(service=["C:\Python27\Scripts\dev.py"])
File "C:\Python27\lib\distutils\core.py", line 152, in setup
dist.run_commands()
File "C:\Python27\lib\distutils\dist.py", line 953, in run_commands
self.run_command(cmd)
File "C:\Python27\lib\distutils\dist.py", line 972, in run_command
cmd_obj.run()
File "C:\Python27\lib\site-packages\py2exe\build_exe.py", line 243, in run
self._run()
File "C:\Python27\lib\site-packages\py2exe\build_exe.py", line 296, in _run
self.find_needed_modules(mf, required_files, required_modules)
File "C:\Python27\lib\site-packages\py2exe\build_exe.py", line 1274, in
find_needed_modules
mf.import_hook(mod)
File "C:\Python27\lib\site-packages\py2exe\mf.py", line 719, in import_hook
return Base.import_hook(self,name,caller,fromlist,level)
File "C:\Python27\lib\site-packages\py2exe\mf.py", line 136, in import_hook
q, tail = self.find_head_package(parent, name)
File "C:\Python27\lib\site-packages\py2exe\mf.py", line 204, in find_head_package
raise ImportError, "No module named " + qname
ImportError: No module named dev
Now, when I run py2exe with "console" in my setup script it works fine, but the service doesn't start and I get the error. When I run py2exe with "service" in my setup script py2exe doesn't run and tells me it can't find my module.
I have tried to re-install py2exe to no resolution. I have also tried to change:
def SvcDoRun(self):
servicemanager.LogMsg(servicemanager.EVENTLOG_INFORMATION_TYPE,
servicemanager.PYS_SERVICE_STARTED,
(self._svc_name_,''))
to
def SvcDoRun(self):
self.ReportServiceStatus(win32service.SERVICE_RUNNING)
win32event.WaitForSingleObject(self.hWaitStop, win32event.INFINITE)
Didn't make a difference either. CAN ANYONE HELP ME PLEASE? Here is what I am working on. It monitors a server and spits back a text file every 60 seconds which I use to monitor my servers at any given minute. Any help you guys and gals can give would be great.
import win32serviceutil
import win32service
import win32event
import servicemanager
import socket
import wmi
import _winreg
from time import sleep
import os
class SrvMonSvc (win32serviceutil.ServiceFramework):
_svc_name_ = "SrvMonSvc"
_svc_display_name_ = "Server Monitor"
def __init__(self,args):
win32serviceutil.ServiceFramework.__init__(self,args)
self.hWaitStop = win32event.CreateEvent(None,0,0,None)
socket.setdefaulttimeout(60)
def SvcStop(self):
self.ReportServiceStatus(win32service.SERVICE_STOP_PENDING)
win32event.SetEvent(self.hWaitStop)
def SvcDoRun(self):
servicemanager.LogMsg(servicemanager.EVENTLOG_INFORMATION_TYPE,
servicemanager.PYS_SERVICE_STARTED,
(self._svc_name_,''))
self.main()
def main(self):
host = wmi.WMI(namespace="root/default").StdRegProv
try:
result, api = host.GetStringValue(
hDefKey = _winreg.HKEY_LOCAL_MACHINE,
sSubKeyName = "SOFTWARE\Server Monitor",
sValueName = "API")
if api == None:
raise Exception
else:
pass
except:
exit()
while 1 == 1:
with open("C:/test.txt", "wb") as b:
computer = wmi.WMI(computer="exsan100")
for disk in computer.Win32_LogicalDisk (DriveType=3):
name = disk.caption
size = round(float(disk.Size)/1073741824, 2)
free = round(float(disk.FreeSpace)/1073741824, 2)
used = round(float(size), 2) - round(float(free), 2)
for mem in computer.Win32_OperatingSystem():
a_mem = (int(mem.FreePhysicalMemory)/1024)
for me in computer.Win32_ComputerSystem():
t_mem = (int(me.TotalPhysicalMemory)/1048576)
u_mem = t_mem - a_mem
for cpu in computer.Win32_Processor():
load = cpu.LoadPercentage
print >>b, api
print >>b, name
print >>b, size
print >>b, used
print >>b, t_mem
print >>b, u_mem
print >>b, load
b.close()
date_list = []
stamp = time.strftime("%c",time.localtime(time.time()))
date_list.append(stamp)
name = re.sub(r"[^\w\s]", "",date_list[0])
os.rename("C:/test.txt", ("C:/%s.txt" % name))
try:
sleep(60.00)
except:
exit()
if __name__ == '__main__':
win32serviceutil.HandleCommandLine(SrvMonSvc)
Have you progressed from your original problem. I had similar problem with a python service and found out that it was missing DLLs since the 'System Path' (not the user path) was not complete.
Running pythonservice.exe with -debug from the command prompt was not a problem because it used correct PATH environment variable, but if your service is installed as a System service it's worth checking out if the System Path variable has all the paths for the required DLLs (MSVC, Python, System32). For me it was missing the python DLLs path, after that it worked again.

Resources