Using cython functions with multiprocessing failed - multiprocessing

I have the following code in my jupyter notebook on my MacOS to compute similarity measure between list pairs. When running distSeq on a pair of lists distSeq(list1,list2,len1,len2), the similarity measure can be successfully computed. However, when I use multiprocessing in another cell, trying to compute the measure among many list pairs, a ModuleNotFound error is raised, and I'm wondering the problem is as I can't find any answer online.
Define functions in a cython cell
%%cython --annotate
cimport numpy as np
import numpy as np
import osmnx as ox
from scipy.spatial.distance import cdist
import cython
from __main__ import G
def simPnt(p1_n,p2_m):
*details cleared*
cpdef list lcs(list r1,list r2): # longest common subsequence btw route 1 and route 2
cdef int N = len(r1)
cdef int M = len(r2) # route is list of node ID
L_arr = np.empty((N+1,M+1),dtype=np.single)
cdef float[:,:] L = L_arr #declare L -> sum of similarity score
cdef list LCS=[]
sp_arr = cdist(np.array(r1).reshape((N,1)),np.array(r2).reshape((M,1)),simPnt)
cdef double[:,:] sp = sp_arr
cdef int n, m
for n in range(N+1):
for m in range(M+1):
if n == 0 or m == 0 :
L[n,m] = 0
else:
L[n,m] = max([L[n-1,m-1]+sp[n-1,m-1],L[n,m-1],L[n-1,m]])
# backtrack
n,m = N,M
cdef float tmp
while n > 0 and m > 0:
tmp = L[n-1,m-1]+sp[n-1,m-1]
if tmp > L[n,m-1] and tmp > L[n-1,m]:
LCS.append((n-1,m-1))
n -= 1
m -= 1
elif L[n-1,m] > L[n,m-1]:
n-=1
else:
m-=1
#print(LCS)
return LCS[::-1] # matched points solely from either route 1 or 2
cpdef float distSeq(list r1,list r2,float lenseq_r1,float lenseq_r2):
*details cleared*
if min(lenseq_LCS1,lenseq_LCS2) < gamma:
return 1
elif lenseq_r1<lenseq_r2:
return 1- (lenseq_LCS1/lenseq_r1)
else:
return 1- (lenseq_LCS2/lenseq_r2)
Run the function with multiprocessing
import multiprocessing as mp
pool = mp.Pool(mp.cpu_count())
results = [pool.apply(distSeq, args=(r1,r2,lenseq[i],lenseq[j])) for ((i,r1),(j,r2)) in itertools.combinations(enumerate(routes), 2)]
df2 = pd.DataFrame()
df2['r1_r2'] = list(itertools.combinations(list(range(len(routes))),2))
df2['distSeq'] = results
pool.close()
Error log
Process SpawnPoolWorker-28:
Traceback (most recent call last):
File "/Users/timmyhsu/miniconda3/envs/py3.8/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/Users/timmyhsu/miniconda3/envs/py3.8/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/Users/timmyhsu/miniconda3/envs/py3.8/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get()
File "/Users/timmyhsu/miniconda3/envs/py3.8/lib/python3.8/multiprocessing/queues.py", line 358, in get
return _ForkingPickler.loads(res)
ModuleNotFoundError: No module named '_cython_magic_86f410b10c6f86b67ee02703c26cace8'

Related

Generating a series of eCDFs of estimated wind speeds for each weather type classification

Trying to read in , write out the WT_matrix, and then read the output into separate file in python or Excel. The WT_matrix output then needs to be represented in a series of eCDF functions (estimated daily mean wind speeds) for each individual weather type (WT)
import numpy as np
import netCDF4 as nc
import os
from collections import Counter
os.chdir('C:/Users/peter/OneDrive/Documents/MSc Dissertation/IWT')
IWT_full = np.loadtxt('ERA-5_version2_classifications_19790101_20190930.csv', delimiter=',')
os.chdir('C:/Users/peter/OneDrive/Documents/MSc Dissertation/Wind/ERA-5_stats/ws_conversion')
WIND = nc.Dataset('ERA-5_daily_mean_final_uv.nc')
ws = WIND['ws'][:]
start_index = 1
end_index = 14611
IWT = IWT_full[start_index : end_index][:, 3]
IWT = list(map(int, IWT)) #turn into integers
IWT = np.array(IWT) #turn from list to numpy array
most_comm_WT = np.max(list(Counter(IWT).values()))
WT_matrix = np.empty((30, most_comm_WT, 2, 129, 121))
WT_matrix[:] = np.nan
for WT in range(30):
index = np.where(IWT == (WT+1))
WT_matrix[WT, 0:len(index[0]), :, :, : ] = ws[index, :, : ,: ]

MVGC F value and P value calculation

I am currently working to find Multivariate Granger Causality F value and p value via this code.
def demean(x, axis=0):
"Return x minus its mean along the specified axis"
x = np.asarray(x)
if axis == 0 or axis is None or x.ndim <= 1:
return x - x.mean(axis)
ind = [slice(None)] * x.ndim
ind[axis] = np.newaxis
return x - x.mean(axis)[ind]
#------------------------------
def tsdata_to_autocov(X, q):
import numpy as np
from matplotlib import pylab
if len(X.shape) == 2:
X = np.expand_dims(X, axis=2)
[n, m, N] = np.shape(X)
else:
[n, m, N] = np.shape(X)
X = demean(X, axis=1)
G = np.zeros((n, n, (q+1)))
for k in range(q+1):
M = N * (m-k)
G[:,:,k] = np.dot(np.reshape(X[:,k:m,:], (n, M)), np.reshape(X[:,0:m-k,:], (n, M)).conj().T) / M-1
return G
#-------------------------
def autocov_to_mvgc(G, x, y):
import numpy as np
from mvgc import autocov_to_var
n = G.shape[0]
z = np.arange(n)
z = np.delete(z,[np.array(np.hstack((x, y)))])
# indices of other variables (to condition out)
xz = np.array(np.hstack((x, z)))
xzy = np.array(np.hstack((xz, y)))
F = 0
# full regression
ixgrid1 = np.ix_(xzy,xzy)
[AF,SIG] = autocov_to_var(G[ixgrid1])
# reduced regression
ixgrid2 = np.ix_(xz,xz)
[AF,SIGR] = autocov_to_var(G[ixgrid2])
ixgrid3 = np.ix_(x,x)
F = np.log(np.linalg.det(SIGR[ixgrid3]))-np.log(np.linalg.det(SIG[ixgrid3]))
return F
Can anyone show me an example for how they got to solving for F and p?
It would also help a lot to see what your timeseries data looks like.

Optimizing simple CPU-bound loops using Cython and replacing a list

I am trying to evaluate some approaches, and I'm hitting a stumbling block with performance.
Why is my cython code so slow?? My expectation is that the code would run quite a bit faster (maybe nano seconds for a 2d loop with only 256 ** 2 entries) as opposed to milliseconds.
Here are my test results:
$ python setup.py build_ext --inplace; python test.py
running build_ext
counter: 0.00236220359802 sec
pycounter: 0.00323309898376 sec
percentage: 73.1 %
My initial code looks something like this:
#!/usr/bin/env python
# encoding: utf-8
# filename: loop_testing.py
def generate_coords(dim, length):
"""Generates a list of coordinates from dimensions and size
provided.
Parameters:
dim -- dimension
length -- size of each dimension
Returns:
A list of coordinates based on dim and length
"""
values = []
if dim == 2:
for x in xrange(length):
for y in xrange(length):
values.append((x, y))
if dim == 3:
for x in xrange(length):
for y in xrange(length):
for z in xrange(length):
values.append((x, y, z))
return values
This works for what I need, but is slow. For a given dim, length = (2, 256), I see a timing on iPython of approximately 2.3ms.
In an attempt to speed this up, I developed a cython equivalent (I think it's an equivalent).
#!/usr/bin/env python
# encoding: utf-8
# filename: loop_testing.pyx
# cython: boundscheck=False
# cython: wraparound=False
cimport cython
from cython.parallel cimport prange
import numpy as np
cimport numpy as np
ctypedef int DTYPE
# 2D point updater
cpdef inline void _counter_2d(DTYPE[:, :] narr, int val) nogil:
cdef:
DTYPE count = 0
DTYPE index = 0
DTYPE x, y
for x in range(val):
for y in range(val):
narr[index][0] = x
narr[index][1] = y
index += 1
cpdef DTYPE[:, :] counter(dim=2, val=256):
narr = np.zeros((val**dim, dim), dtype=np.dtype('i4'))
_counter_2d(narr, val)
return narr
def pycounter(dim=2, val=256):
vals = []
for x in xrange(val):
for y in xrange(val):
vals.append((x, y))
return vals
And the invocation of the timing:
#!/usr/bin/env python
# filename: test.py
"""
Usage:
test.py [options]
test.py [options] <val>
test.py [options] <dim> <val>
Options:
-h --help This Message
-n Number of loops [default: 10]
"""
if __name__ == "__main__":
from docopt import docopt
from timeit import Timer
args = docopt(__doc__)
dim = args.get("<dim>") or 2
val = args.get("<val>") or 256
n = args.get("-n") or 10
dim = int(dim)
val = int(val)
n = int(n)
tests = ['counter', 'pycounter']
timing = {}
for test in tests:
code = "{}(dim=dim, val=val)".format(test)
variables = "dim, val = ({}, {})".format(dim, val)
setup = "from loop_testing import {}; {}".format(test, variables)
t = Timer(code, setup=setup)
timing[test] = t.timeit(n) / n
for test, val in timing.iteritems():
print "{:>20}: {} sec".format(test, val)
print "{:>20}: {:>.3} %".format("percentage", timing['counter'] / timing['pycounter'] * 100)
And for reference, the setup.py to build the cython code:
from distutils.core import setup
from Cython.Build import cythonize
import numpy
include_path = [numpy.get_include()]
setup(
name="looping",
ext_modules=cythonize('loop_testing.pyx'), # accepts a glob pattern
include_dirs=include_path,
)
EDIT:
Link to working version: https://github.com/brianbruggeman/cython_experimentation
This Cython code was slow because of the narr[index][0] = x assignment, which relies heavily on Python C-API. Using, narr[index, 0] = x instead, is translated to pure C, and solves this issue.
As pointed out by #perimosocordiae, using cythonize with annotations is definitely the way to go to debug such issues.
In some cases it can also be worth explicitly specifying compilation flags in setup.py for gcc,
setup(
[...]
extra_compile_args=['-O2', '-march=native'],
extra_link_args=['-O2', '-march=native'])
This should not be necessary, assuming reasonable default compilation flags. However, for instance, on my Linux system the default appear to be no optimization at all and adding the above flags, results in a significant performance improvement.
It looks like your Cython code is doing some strange things with numpy arrays, and isn't really taking advantage of the C compilation. To check the generated code, run
cython -a loop_testing.pyx
What happens if you avoid the numpy parts and do a straightforward Cython translation of the Python function?
EDIT: It looks like you can avoid Cython entirely for a pretty decent speedup. (~30x on my machine)
def npcounter(dim=2, val=256):
return np.indices((val,)*dim).reshape((dim,-1)).T

Fitting a capped Poisson process with a variable rate

I'm trying to estimate the rate of a Poisson process where the rate varies over time using the maximum a posteriori estimate. Here's a simplified example with a rate varying linearly (λ = ax+b) :
import numpy as np
import pymc
# Observation
a_actual = 1.3
b_actual = 2.0
t = np.arange(10)
obs = np.random.poisson(a_actual * t + b_actual)
# Model
a = pymc.Uniform(name='a', value=1., lower=0, upper=10)
b = pymc.Uniform(name='b', value=1., lower=0, upper=10)
#pymc.deterministic
def linear(a=a, b=b):
return a * t + b
r = pymc.Poisson(mu=linear, name='r', value=obs, observed=True)
model = pymc.Model([a, b, r])
map = pymc.MAP(model)
map.fit()
map.revert_to_max()
print "a :", a._value
print "b :", b._value
This is working fine. But my actual Poisson process is capped by a deterministic value. As I can't associate my observed values to a Deterministic function, I'm adding a Normal Stochastic function with a small variance for my observations :
import numpy as np
import pymc
# Observation
a_actual = 1.3
b_actual = 2.0
t = np.arange(10)
obs = np.random.poisson(a_actual * t + b_actual).clip(0, 10)
# Model
a = pymc.Uniform(name='a', value=1., lower=0, upper=10)
b = pymc.Uniform(name='b', value=1., lower=0, upper=10)
#pymc.deterministic
def linear(a=a, b=b):
return a * t + b
r = pymc.Poisson(mu=linear, name='r')
#pymc.deterministic
def clip(r=r):
return r.clip(0, 10)
rc = pymc.Normal(mu=r, tau=0.001, name='rc', value=obs, observed=True)
model = pymc.Model([a, b, r, rc])
map = pymc.MAP(model)
map.fit()
map.revert_to_max()
print "a :", a._value
print "b :", b._value
This code is producing the following error :
Traceback (most recent call last):
File "pymc-bug-2.py", line 59, in <module>
map.revert_to_max()
File "pymc/NormalApproximation.py", line 486, in revert_to_max
self._set_stochastics([self.mu[s] for s in self.stochastics])
File "pymc/NormalApproximation.py", line 58, in __getitem__
tot_len += self.owner.stochastic_len[p]
KeyError: 0
Any idea on what am I doing wrong?
By "Capped" do you mean that it is a truncated Poisson? It appears thats what you are saying. If it were a left truncation (which is more common), you could use the TruncatedPoisson distribution, but since you are doing a right truncation, you cannot (we should have made this more general!). What you are trying will not work -- the Poisson object has no clip() method. What you can do is use a factor potential. It would look like this:
#pymc.potential
def clip(r=r):
if np.any(r>10):
return -np.inf
return 0
This will constrain the values of r to be less than 10. Refer to the pymc docs for information on the Potential class.

Fastest way to get a hash from a list?

I have a long list of integers that I want to turn into an MD5 hash. What's the quickest way to do this? I have tried two options, both similar. Just wondering if I'm missing an obviously quicker method.
import random
import hashlib
import cPickle as pickle
r = [random.randrange(1, 1000) for _ in range(0, 1000000)]
def method1(r):
p = pickle.dumps(r, -1)
return hashlib.md5(p).hexdigest()
def method2(r):
p = str(r)
return hashlib.md5(p).hexdigest()
def method3(r):
p = ','.join(map(str, r))
return hashlib.md5(p).hexdigest()
Then time it in iPython:
timeit method1(r)
timeit method2(r)
timeit method3(r)
Gives me this:
In [8]: timeit method1(r)
10 loops, best of 3: 68.7 ms per loop
In [9]: timeit method2(r)
10 loops, best of 3: 176 ms per loop
In [10]: timeit method3(r)
1 loops, best of 3: 270 ms per loop
So, option 1 is the best I've got. But I have to do it a lot and it's currently the rate determining step in my code.
Any tips or tricks to get a unique hash from a big list quicker than what's here, using Python 2.7?
You may find this useful. It uses my own custom bench-marking framework (based on timeit) to gather and print the results. Since the variations in speed are primarily due to the need to convert the r list to something that hashlib.md5() can work with, I've updated the suite of test cases to show how storing the values in an array.array instead, as #DSM suggested in a comment, would dramatically speed things up. Note that since the integers in the list are all relatively small I've stored them in an array of short (2-byte) values.
from __future__ import print_function
import sys
import timeit
setup = """
import array
import random
import hashlib
import marshal
import cPickle as pickle
import struct
r = [random.randrange(1, 1000) for _ in range(0, 1000000)]
ra = array.array('h', r) # create an array of shorts equivalent
def method1(r):
p = pickle.dumps(r, -1)
return hashlib.md5(p).hexdigest()
def method2(r):
p = str(r)
return hashlib.md5(p).hexdigest()
def method3(r):
p = ','.join(map(str, r))
return hashlib.md5(p).hexdigest()
def method4(r):
fmt = '%dh' % len(r)
buf = struct.pack(fmt, *r)
return hashlib.md5(buf).hexdigest()
def method5(r):
a = array.array('h', r)
return hashlib.md5(a).hexdigest()
def method6(r):
m = marshal.dumps(r)
return hashlib.md5(m).hexdigest()
# using pre-built array...
def pb_method1(ra):
p = pickle.dumps(ra, -1)
return hashlib.md5(p).hexdigest()
def pb_method2(ra):
p = str(ra)
return hashlib.md5(p).hexdigest()
def pb_method3(ra):
p = ','.join(map(str, ra))
return hashlib.md5(p).hexdigest()
def pb_method4(ra):
fmt = '%dh' % len(ra)
buf = struct.pack(fmt, *ra)
return hashlib.md5(buf).hexdigest()
def pb_method5(ra):
return hashlib.md5(ra).hexdigest()
def pb_method6(ra):
m = marshal.dumps(ra)
return hashlib.md5(m).hexdigest()
"""
statements = {
"pickle.dumps(r, -1)": """
method1(r)
""",
"str(r)": """
method2(r)
""",
"','.join(map(str, r))": """
method3(r)
""",
"struct.pack(fmt, *r)": """
method4(r)
""",
"array.array('h', r)": """
method5(r)
""",
"marshal.dumps(r)": """
method6(r)
""",
# versions using pre-built array...
"pickle.dumps(ra, -1)": """
pb_method1(ra)
""",
"str(ra)": """
pb_method2(ra)
""",
"','.join(map(str, ra))": """
pb_method3(ra)
""",
"struct.pack(fmt, *ra)": """
pb_method4(ra)
""",
"ra (pre-built)": """
pb_method5(ra)
""",
"marshal.dumps(ra)": """
pb_method6(ra)
""",
}
N = 10
R = 3
timings = [(
idea,
min(timeit.repeat(statements[idea], setup=setup, repeat=R, number=N)),
) for idea in statements]
longest = max(len(t[0]) for t in timings) # length of longest name
print('fastest to slowest timings (Python {}.{}.{})\n'.format(*sys.version_info[:3]),
' ({:,d} calls, best of {:d})\n'.format(N, R))
ranked = sorted(timings, key=lambda t: t[1]) # sort by speed (fastest first)
for timing in ranked:
print("{:>{width}} : {:.6f} secs, rel speed {rel:>8.6f}x".format(
timing[0], timing[1], rel=timing[1]/ranked[0][1], width=longest))
Results:
fastest to slowest timings (Python 2.7.6)
(10 calls, best of 3)
ra (pre-built) : 0.037906 secs, rel speed 1.000000x
marshal.dumps(ra) : 0.177953 secs, rel speed 4.694626x
marshal.dumps(r) : 0.695606 secs, rel speed 18.350932x
pickle.dumps(r, -1) : 1.266096 secs, rel speed 33.401179x
array.array('h', r) : 1.287884 secs, rel speed 33.975950x
pickle.dumps(ra, -1) : 1.955048 secs, rel speed 51.576558x
struct.pack(fmt, *r) : 2.085602 secs, rel speed 55.020743x
struct.pack(fmt, *ra) : 2.357887 secs, rel speed 62.203962x
str(r) : 2.918623 secs, rel speed 76.996860x
str(ra) : 3.686666 secs, rel speed 97.258777x
','.join(map(str, r)) : 4.701531 secs, rel speed 124.032173x
','.join(map(str, ra)) : 4.968734 secs, rel speed 131.081303x
You can improve performance slightly, simplify your code, and remove an import by using Python's builtin hash function instead of md5 from hashlib:
import random
import cPickle as pickle
r = [random.randrange(1, 1000) for _ in range(0, 1000000)]
def method1(r):
p = pickle.dumps(r, -1)
return hash(p)
def method2(r):
p = str(r)
return hash(p)
def method3(r):
p = ','.join(map(str, r))
return hash(p)

Resources