Errror message for py tesseract - filenotfoundexception

im a noobie at python. Im trying to automate the "chimp test" on human benchmark. I decided to use tesseract.
import pyautogui as p
import pytesseract
from PIL import Image
import PIL.ImageOps
import time as t
a,b = 0,0
while a==0 and b==0:
try:
a,b = p.locateCenterOnScreen("Start.png",confidence=0.7)
except TypeError:
pass
p.click(a,b)
columns = 8
rows = 5
squareSize = 88
numbers = 4
while True:
t.sleep(1)
p.moveTo(100,100)
image = p.screenshot(region = (320, 96, columns * squareSize, rows * squareSize))
image = image.convert("L")
image = PIL.ImageOps.invert(image)
pixels = image.load()
for x in range(image.size[0]):
for y in range(image.size[1]):
if pixels[x,y] > 10:
pixels[x,y] = 255
else:
pixels[x,y] = 0
coords = [(0,0)] * numbers
grid = []
for y in range(rows):
row = []
for x in range(columns):
digit = pytesseract.image_to_string(image.crop((x * squareSize, y * squareSize, (x+1) * squareSize, (y+1) * squareSize)), config="--psm 10 --oem 2 -c tessedit_char_whitelist=0123456789 classify_max_slope=20 classify_min_slope=0.2")
digit = digit[:-2]
if digit.isdigit():
coords[int(digit)-1] = ((x+0.5)*squareSize,(y+0.5)*squareSize)
row.append(digit)
print("At " + str(x) + "," + str(y) + " pytesseract saw: " + digit)
grid.append(row)
print(grid)
for z in range(numbers):
p.click(coords[z][0]+320,coords[z][1]+96)
a,b = 0,0
while a==0 and b==0:
try:
a,b = p.locateCenterOnScreen("Continue.JPG",confidence=0.7)
except TypeError:
pass
p.click(a,b)
numbers += 1
and i get the error message
Traceback (most recent call last):
File "C:\Users\franc\AppData\Local\Programs\Python\Python310\lib\site-packages\pytesseract\pytesseract.py", line 254, in run_tesseract
proc = subprocess.Popen(cmd_args, **subprocess_args())
File "C:\Users\franc\AppData\Local\Programs\Python\Python310\lib\subprocess.py", line 966, in __init__
self._execute_child(args, executable, preexec_fn, close_fds,
File "C:\Users\franc\AppData\Local\Programs\Python\Python310\lib\subprocess.py", line 1435, in _execute_child
hp, ht, pid, tid = _winapi.CreateProcess(executable, args,
FileNotFoundError: [WinError 2] The system cannot find the file specified
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\franc\Desktop\New folder\New Text Document.py", line 40, in <module>
digit = pytesseract.image_to_string(image.crop((x * squareSize, y * squareSize, (x+1) * squareSize, (y+1) * squareSize)), config="--psm 10 --oem 2 -c tessedit_char_whitelist=0123456789 classify_max_slope=20 classify_min_slope=0.2")
File "C:\Users\franc\AppData\Local\Programs\Python\Python310\lib\site-packages\pytesseract\pytesseract.py", line 416, in image_to_string
return {
File "C:\Users\franc\AppData\Local\Programs\Python\Python310\lib\site-packages\pytesseract\pytesseract.py", line 419, in <lambda>
Output.STRING: lambda: run_and_get_output(*args),
File "C:\Users\franc\AppData\Local\Programs\Python\Python310\lib\site-packages\pytesseract\pytesseract.py", line 286, in run_and_get_output
run_tesseract(**kwargs)
File "C:\Users\franc\AppData\Local\Programs\Python\Python310\lib\site-packages\pytesseract\pytesseract.py", line 258, in run_tesseract
raise TesseractNotFoundError()
pytesseract.pytesseract.TesseractNotFoundError: tesseract is not installed or it's not in your PATH. See README file for more information.
ive installed tesseract correctly, i have uninstalled it, reinstalled it and used it previously too. if i could get an explanation or a fix for this that would be great. Im very new to python so im still learning the ropes

Related

How do I solve the following error message: TypeError: fit() missing 1 required positional argument: 'y'

My code is:
#Drop the irrelevant variables from train2 dataset
#Create the independant variable X train and dependant variable y train
X_train = train2.drop(['Item_Outlet_Sales', 'Outlet_Identifier', 'Item_Identifier'], axis=1)
y_train = train2.Item_Outlet_Sales
#Drop those irrelevant variables from test2 dataset
X_test = test2.drop(['Outlet_Identifier', 'Item_Identifier'], axis=1)
#Lets 1st import sklearn liobrary for model selection
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
#Create a train and test split. Use X-train and y_train for linear regression.
xtrain, xtest, ytrain, ytest = model_selection.train_test_split(X_train, y_train, test_size=0.3, random_state=42)
#Fit the linear regression to the training dataset
lin = LinearRegression()
LinearRegression.fit(xtrain, ytrain)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/var/folders/mh/_vtvlkm54rn8_9pqdq1_7g9m0000gn/T/ipykernel_1637/3652998115.py in <module>
----> 1 LinearRegression.fit(xtrain, ytrain)
TypeError: fit() missing 1 required positional argument: 'y'
I first tried:
lin.fit(xtrain, ytrain)
Output:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/var/folders/mh/_vtvlkm54rn8_9pqdq1_7g9m0000gn/T/ipykernel_1637/2886984673.py in <module>
----> 1 lin.fit(xtrain, ytrain)
~/opt/anaconda3/lib/python3.9/site-packages/sklearn/linear_model/_base.py in fit(self, X, y, sample_weight)
660 accept_sparse = False if self.positive else ["csr", "csc", "coo"]
661
--> 662 X, y = self._validate_data(
663 X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True
664 )
~/opt/anaconda3/lib/python3.9/site-packages/sklearn/base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
579 y = check_array(y, **check_y_params)
580 else:
--> 581 X, y = check_X_y(X, y, **check_params)
582 out = X, y
583
~/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
962 raise ValueError("y cannot be None")
963
--> 964 X = check_array(
965 X,
966 accept_sparse=accept_sparse,
~/opt/anaconda3/lib/python3.9/site-packages/sklearn/utils/validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
744 array = array.astype(dtype, casting="unsafe", copy=False)
745 else:
--> 746 array = np.asarray(array, order=order, dtype=dtype)
747 except ComplexWarning as complex_warning:
748 raise ValueError(
~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py in __array__(self, dtype)
2062
2063 def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
-> 2064 return np.asarray(self._values, dtype=dtype)
2065
2066 def __array_wrap__(
ValueError: could not convert string to float: 'Grocery Store'

Using cython functions with multiprocessing failed

I have the following code in my jupyter notebook on my MacOS to compute similarity measure between list pairs. When running distSeq on a pair of lists distSeq(list1,list2,len1,len2), the similarity measure can be successfully computed. However, when I use multiprocessing in another cell, trying to compute the measure among many list pairs, a ModuleNotFound error is raised, and I'm wondering the problem is as I can't find any answer online.
Define functions in a cython cell
%%cython --annotate
cimport numpy as np
import numpy as np
import osmnx as ox
from scipy.spatial.distance import cdist
import cython
from __main__ import G
def simPnt(p1_n,p2_m):
*details cleared*
cpdef list lcs(list r1,list r2): # longest common subsequence btw route 1 and route 2
cdef int N = len(r1)
cdef int M = len(r2) # route is list of node ID
L_arr = np.empty((N+1,M+1),dtype=np.single)
cdef float[:,:] L = L_arr #declare L -> sum of similarity score
cdef list LCS=[]
sp_arr = cdist(np.array(r1).reshape((N,1)),np.array(r2).reshape((M,1)),simPnt)
cdef double[:,:] sp = sp_arr
cdef int n, m
for n in range(N+1):
for m in range(M+1):
if n == 0 or m == 0 :
L[n,m] = 0
else:
L[n,m] = max([L[n-1,m-1]+sp[n-1,m-1],L[n,m-1],L[n-1,m]])
# backtrack
n,m = N,M
cdef float tmp
while n > 0 and m > 0:
tmp = L[n-1,m-1]+sp[n-1,m-1]
if tmp > L[n,m-1] and tmp > L[n-1,m]:
LCS.append((n-1,m-1))
n -= 1
m -= 1
elif L[n-1,m] > L[n,m-1]:
n-=1
else:
m-=1
#print(LCS)
return LCS[::-1] # matched points solely from either route 1 or 2
cpdef float distSeq(list r1,list r2,float lenseq_r1,float lenseq_r2):
*details cleared*
if min(lenseq_LCS1,lenseq_LCS2) < gamma:
return 1
elif lenseq_r1<lenseq_r2:
return 1- (lenseq_LCS1/lenseq_r1)
else:
return 1- (lenseq_LCS2/lenseq_r2)
Run the function with multiprocessing
import multiprocessing as mp
pool = mp.Pool(mp.cpu_count())
results = [pool.apply(distSeq, args=(r1,r2,lenseq[i],lenseq[j])) for ((i,r1),(j,r2)) in itertools.combinations(enumerate(routes), 2)]
df2 = pd.DataFrame()
df2['r1_r2'] = list(itertools.combinations(list(range(len(routes))),2))
df2['distSeq'] = results
pool.close()
Error log
Process SpawnPoolWorker-28:
Traceback (most recent call last):
File "/Users/timmyhsu/miniconda3/envs/py3.8/lib/python3.8/multiprocessing/process.py", line 315, in _bootstrap
self.run()
File "/Users/timmyhsu/miniconda3/envs/py3.8/lib/python3.8/multiprocessing/process.py", line 108, in run
self._target(*self._args, **self._kwargs)
File "/Users/timmyhsu/miniconda3/envs/py3.8/lib/python3.8/multiprocessing/pool.py", line 114, in worker
task = get()
File "/Users/timmyhsu/miniconda3/envs/py3.8/lib/python3.8/multiprocessing/queues.py", line 358, in get
return _ForkingPickler.loads(res)
ModuleNotFoundError: No module named '_cython_magic_86f410b10c6f86b67ee02703c26cace8'

wxPython FloatCanvas event Binding

I am using Python 3.6 with wxPython 4.1.0 gtk3 (phoenix) wxWidgets 3.1.4. There is a delay in the Bind function on every 4th line that i try to plot and after about 12 lines the program crashs. Normally my code uses a grid to input the points for the lines but the attached code generates the data once the Draw button is clicked with exactly the same effect. The delay is about 2.5 seconds on the 4th line normally it takes about .1 secs to plot a line. I have tried two functions for the Bind call one uses the event the other calls the object.
If anyone knows a work around can you please let me know.
import time
import string
import wx
from wx.lib.floatcanvas import NavCanvas, FloatCanvas
import wx.lib.colourdb
class InputForm(wx.Frame):
'''set up the form and draw axis'''
def __init__(self):
super(InputForm, self).__init__(None, wx.ID_ANY, title='Plot Lines', size=(1300, 830))
# set dictionary of points; key node letter, value tuple of point,
self.pts = {}
# create the form level sizer
Main_Sizer = wx.BoxSizer(wx.HORIZONTAL)
# add the sizer for the left side widgets
sizerL = wx.BoxSizer(wx.VERTICAL)
# add the grid and then set it ot he left panel
btnsizer = wx.BoxSizer(wx.HORIZONTAL)
drw = wx.Button(self, -1, "Draw\nLines")
btnsizer.Add(drw, 0, wx.ALL|wx.ALIGN_CENTER, 5)
# bind the button events to handlers
self.Bind(wx.EVT_BUTTON, self.OnDraw, drw)
sizerL.Add((10, 20))
sizerL.Add(btnsizer, 1, wx.ALIGN_CENTER)
# add the draw panel
rght = NavCanvas.NavCanvas(self,
ProjectionFun=None,
Debug=0,
BackgroundColor="LIGHT GREY",
)
self.Canvas = rght.Canvas
self.InitCanvas()
Main_Sizer.Add(sizerL, 0, wx.EXPAND)
Main_Sizer.Add((10, 10))
Main_Sizer.Add(rght, 1, wx.EXPAND)
self.SetSizer(Main_Sizer)
def InitCanvas(self):
# add the x & y axis
self.Canvas.AddLine([(0, 0), (0, 5)], LineWidth=2, LineColor='Yellow')
self.Canvas.AddLine([(0, 0), (5, 0)], LineWidth=2, LineColor='Green')
origin = self.Canvas.AddScaledTextBox('origin', (0, 0),
Color='blue',
Size=.5,
PadSize=0,
Width=None,
LineColor=None,
Family=wx.MODERN,
Position='tr',
Alignment='bottom',
InForeground=True)
# first Bind of node to EvtLeftDown
origin.Bind(FloatCanvas.EVT_FC_LEFT_DOWN,
lambda evt, selctEnd='Origin':
self.EvtLeftDown(evt, 'Origin'))
wx.CallAfter(self.Canvas.ZoomToBB)
def OnDraw(self, evt):
pts1 = (0, 0)
x = [i for i in range(5, 30, 2)]
y = x[::-1]
pts2 = [(x[i], y[i]) for i in range(0, len(x))]
alph = string.ascii_uppercase
LnLbls = [alph[i] for i in range(0, len(x))]
New_EndPt = True
n = 0
for pt in pts2:
points = []
points.append(pts1)
points.append(pt)
LnLbl = LnLbls[n]
New_EndPt = True
n += 1
self.DrawLine(points, LnLbl, New_EndPt)
def DrawLine(self, points, LnLbl, New_EndPt):
'''Draws the line object as specified in the VarifyData() function'''
# label the end point of the line in lower case
if New_EndPt is True:
new_end = self.Canvas.AddScaledTextBox(LnLbl.lower(), tuple(points[1]),
Color='black',
Size=.5,
PadSize=.2,
Width=None,
LineColor=None,
Family=wx.MODERN,
Position='cc',
Alignment='bottom',
InForeground=True)
new_end.Bind(FloatCanvas.EVT_FC_LEFT_DOWN,
lambda evt, selctEnd=LnLbl.lower():
self.EvtLeftDown(evt, selctEnd))
# define the new line
self.Canvas.AddLine(points, LineWidth=2, LineColor='red')
# add the new line to the list of lines
self.Canvas.AddPoint(tuple(points[1]), 'black', 8)
# locate the center of the new line for the label location
lncntr = ((int(points[0][0])+int(points[1][0]))//2,
(int(points[0][1])+int(points[1][1]))//2)
# place the new line lable
new_line = self.Canvas.AddScaledTextBox(LnLbl, lncntr,
Color='red',
Size=.5,
PadSize=None,
Width=None,
LineColor=None,
Family=wx.MODERN,
Position='tc',
Alignment='bottom',
InForeground=True)
new_line.Name = LnLbl
tic = time.perf_counter()
new_line.Bind(FloatCanvas.EVT_FC_LEFT_DOWN, self.ObjLeftDown)
toc = time.perf_counter()
print(f'time to execute BIND function for DrawLine line 136 = {toc-tic:0.2f}')
wx.CallAfter(self.Canvas.ZoomToBB)
def ObjLeftDown(self, object):
lbl = object.Name
if lbl == 'Origin':
self.Node(lbl)
elif 65 <= ord(lbl) <= 90:
print('you have selected line ', lbl)
elif 97 <= ord(lbl) <= 122:
print('you have selected node ', lbl)
def EvtLeftDown(self, evt, lbl):
if lbl == 'Origin':
print('you have selected the origin')
elif 97 <= ord(lbl) <= 122:
print('you have selected node ', lbl)
# Run the program
if __name__ == "__main__":
app = wx.App(False)
frame = InputForm()
frame.Center()
frame.Show()
app.MainLoop()
With minor adjustments to your code, I am unable to replicate the delays that you refer to.
No matter how many times I re-draw the lines, the timings, always fall within roughly the same time frame.
The bindings are to different objects, and it's just binding an event to an object, so I doubt that that is the issue.
import time
import string
import wx
from wx.lib.floatcanvas import NavCanvas, FloatCanvas
import wx.lib.colourdb
class InputForm(wx.Frame):
'''set up the form and draw axis'''
def __init__(self):
super(InputForm, self).__init__(None, wx.ID_ANY, title='Plot Lines', size=(1300, 830))
# set dictionary of points; key node letter, value tuple of point,
self.pts = {}
self.draw_repetitions = 0
# create the form level sizer
Main_Sizer = wx.BoxSizer(wx.HORIZONTAL)
# add the sizer for the left side widgets
sizerL = wx.BoxSizer(wx.VERTICAL)
# add the grid and then set it ot he left panel
btnsizer = wx.BoxSizer(wx.HORIZONTAL)
drw = wx.Button(self, -1, "Draw\nLines")
btnsizer.Add(drw, 0, wx.ALL|wx.ALIGN_CENTER, 5)
# bind the button events to handlers
self.Bind(wx.EVT_BUTTON, self.OnDraw, drw)
sizerL.Add((10, 20))
sizerL.Add(btnsizer, 1, wx.ALIGN_CENTER)
# add the draw panel
self.rght = NavCanvas.NavCanvas(self,
ProjectionFun=None,
Debug=0,
BackgroundColor="LIGHT GREY",
)
#self.Canvas = self.rght.Canvas
self.InitCanvas()
Main_Sizer.Add(sizerL, 0, wx.EXPAND)
Main_Sizer.Add((10, 10))
Main_Sizer.Add(self.rght, 1, wx.EXPAND)
self.SetSizer(Main_Sizer)
def InitCanvas(self):
# add the x & y axis
self.Canvas = self.rght.Canvas
self.Canvas.ClearAll()
self.Canvas.AddLine([(0, 0), (0, 5)], LineWidth=2, LineColor='Yellow')
self.Canvas.AddLine([(0, 0), (5, 0)], LineWidth=2, LineColor='Green')
origin = self.Canvas.AddScaledTextBox('origin', (0, 0),
Color='blue',
Size=.5,
PadSize=0,
Width=None,
LineColor=None,
Family=wx.MODERN,
Position='tr',
Alignment='bottom',
InForeground=True)
# first Bind of node to EvtLeftDown
origin.Bind(FloatCanvas.EVT_FC_LEFT_DOWN,
lambda evt, selctEnd='Origin':
self.EvtLeftDown(evt, 'Origin'))
wx.CallAfter(self.Canvas.ZoomToBB)
def OnDraw(self, evt):
self.InitCanvas()
pts1 = (0, 0)
x = [i for i in range(5, 30, 2)]
y = x[::-1]
pts2 = [(x[i], y[i]) for i in range(0, len(x))]
alph = string.ascii_uppercase
LnLbls = [alph[i] for i in range(0, len(x))]
New_EndPt = True
n = 0
for pt in pts2:
points = []
points.append(pts1)
points.append(pt)
LnLbl = LnLbls[n]
New_EndPt = True
n += 1
self.DrawLine(points, LnLbl, New_EndPt)
def DrawLine(self, points, LnLbl, New_EndPt):
'''Draws the line object as specified in the VarifyData() function'''
self.draw_repetitions += 1
# label the end point of the line in lower case
if New_EndPt is True:
new_end = self.Canvas.AddScaledTextBox(LnLbl.lower(), tuple(points[1]),
Color='black',
Size=.5,
PadSize=.2,
Width=None,
LineColor=None,
Family=wx.MODERN,
Position='cc',
Alignment='bottom',
InForeground=True)
new_end.Bind(FloatCanvas.EVT_FC_LEFT_DOWN,
lambda evt, selctEnd=LnLbl.lower():
self.EvtLeftDown(evt, selctEnd))
# define the new line
self.Canvas.AddLine(points, LineWidth=2, LineColor='red')
# add the new line to the list of lines
self.Canvas.AddPoint(tuple(points[1]), 'black', 8)
# locate the center of the new line for the label location
lncntr = ((int(points[0][0])+int(points[1][0]))//2,
(int(points[0][1])+int(points[1][1]))//2)
# place the new line lable
new_line = self.Canvas.AddScaledTextBox(LnLbl, lncntr,
Color='red',
Size=.5,
PadSize=None,
Width=None,
LineColor=None,
Family=wx.MODERN,
Position='tc',
Alignment='bottom',
InForeground=True)
new_line.Name = LnLbl
tic = time.perf_counter()
new_line.Bind(FloatCanvas.EVT_FC_LEFT_DOWN, self.ObjLeftDown)
toc = time.perf_counter()
print(f'time to execute BIND function for DrawLine line ',LnLbl, toc-tic)
print(f'Draw repetitions ',self.draw_repetitions)
# wx.CallAfter(self.Canvas.ZoomToBB)
self.Canvas.ZoomToBB()
def ObjLeftDown(self, object):
lbl = object.Name
if lbl == 'Origin':
self.Node(lbl)
print(dir(self.Node))
elif 65 <= ord(lbl) <= 90:
print('you have selected line ', lbl)
elif 97 <= ord(lbl) <= 122:
print('you have selected node ', lbl)
def EvtLeftDown(self, evt, lbl):
if lbl == 'Origin':
print('you have selected the origin')
elif 97 <= ord(lbl) <= 122:
print('you have selected node ', lbl)
# try:
# evt.Skip()
# except:
# pass
# Run the program
if __name__ == "__main__":
app = wx.App(False)
frame = InputForm()
frame.Center()
frame.Show()
app.MainLoop()
TL;DR: As a workaround don't install wxpython via pip inside a venv, install the ubuntu package python3-wxgtk4.0.
I was facing the same issue you ran into - at least I think it is the same issue - and was, in a way, able to solve it. Running the code provided in the answer by Rolf of Saxony, the program would freeze for minutes when trying to draw the 12th or so line. After some minutes however, I at least got an exception out of it:
Traceback (most recent call last):
File "/bug_test.py", line 92, in OnDraw
self.DrawLine(points, LnLbl, New_EndPt)
File "/bug_test.py", line 138, in DrawLine
new_line.Bind(FloatCanvas.EVT_FC_LEFT_DOWN, self.ObjLeftDown)
File "/env/lib/python3.8/site-packages/wx/lib/floatcanvas/FCObjects.py", line 236, in Bind
self.HitColor = next(self._Canvas.HitColorGenerator)
StopIteration
Which to me looks like some problem with the generation of the hit color. I guess the iterator generating it is for some reason not able to come up with an adequate hit color and thus runs for almost ever.
Although interesting, this did not help in solving the problem. What helped was leaving my virtual environment. Initially I was running everything inside an venv and installed wxpython via pip. When I left the venv and installed wxpython via the ubuntu package python3-wxgtk4.0, everything worked as expected. So this is the workaround I found. Nevertheless the problem inside the venv persists. I suspect it might have something to do with the installation via pip which builds wxpython from source.

user-defined kernels on multiple gpu in cupy

I'm trying to launch raw kernels in cupy on multiple gpu.
Example 1 (RawKernel):
import numpy as np
import cupy
gpu_indices = (0,1)
n_gpu = len(gpu_indices)
source = """
extern "C" {
__global__
void my_add(const float *x1, const float *x2, float *y, const int n)
{
const int tid = threadIdx.x + blockDim.x * blockIdx.x;
if (tid < n) {
y[tid] = x1[tid] + x2[tid];
}
}
}
"""
add_kernel = [None] * n_gpu
for gpu_id in gpu_indices:
with cupy.cuda.Device(gpu_id):
add_kernel[gpu_id] = cupy.RawKernel(source, 'my_add')
x1 = [None] * n_gpu
x2 = [None] * n_gpu
y = [None] * n_gpu
streams = [None] * n_gpu
for gpu_id in gpu_indices:
with cupy.cuda.Device(gpu_id):
x1[gpu_id] = cupy.arange(25, dtype=cupy.float32).reshape(5, 5)
x2[gpu_id] = cupy.arange(25, dtype=cupy.float32).reshape(5, 5)
y[gpu_id] = cupy.zeros((5, 5), dtype=cupy.float32)
streams[gpu_id] = cupy.cuda.stream.Stream()
for gpu_id in gpu_indices:
cupy.cuda.Device(gpu_id).use()
streams[gpu_id].use()
add_kernel[gpu_id]((5,), (5,), (x1[gpu_id], x2[gpu_id], y[gpu_id], x1[gpu_id].size), stream=streams[gpu_id])
streams[gpu_id].synchronize()
print(y[gpu_id])
This code runs without error with gpu_id=(0,) (single gpu) and gpu_id=(0,1) (dual gpu) on my machine.
Example 2 (RawModule):
import numpy as np
import cupy
gpu_indices = (0,1)
n_gpu = len(gpu_indices)
source = """
extern "C" {
__global__
void my_add(const float *x1, const float *x2, float *y, const int n)
{
const int tid = threadIdx.x + blockDim.x * blockIdx.x;
if (tid < n) {
y[tid] = x1[tid] + x2[tid];
}
}
}
"""
module = cupy.RawModule(code=source)
add_kernel = [None] * n_gpu
for gpu_id in gpu_indices:
with cupy.cuda.Device(gpu_id):
add_kernel[gpu_id] = module.get_function('my_add')
# same as example 1
x1 = [None] * n_gpu
x2 = [None] * n_gpu
y = [None] * n_gpu
streams = [None] * n_gpu
for gpu_id in gpu_indices:
with cupy.cuda.Device(gpu_id):
x1[gpu_id] = cupy.arange(25, dtype=cupy.float32).reshape(5, 5)
x2[gpu_id] = cupy.arange(25, dtype=cupy.float32).reshape(5, 5)
y[gpu_id] = cupy.zeros((5, 5), dtype=cupy.float32)
streams[gpu_id] = cupy.cuda.stream.Stream()
for gpu_id in gpu_indices:
cupy.cuda.Device(gpu_id).use()
streams[gpu_id].use()
add_kernel[gpu_id]((5,), (5,), (x1[gpu_id], x2[gpu_id], y[gpu_id], x1[gpu_id].size), stream=streams[gpu_id])
streams[gpu_id].synchronize()
print(y[gpu_id])
Only difference between two codes is the way of getting kernel. In Example 2, cupy.RawModule() is called to compile source and kernels are acquired by using get_function(). However, this code fails with gpu_id=(0,1):
[[ 0. 2. 4. 6. 8.]
[10. 12. 14. 16. 18.]
[20. 22. 24. 26. 28.]
[30. 32. 34. 36. 38.]
[40. 42. 44. 46. 48.]]
Traceback (most recent call last):
File "test_rawmodule.py", line 42, in <module>
add_kernel[gpu_id]((5,), (5,), (x1[gpu_id], x2[gpu_id], y[gpu_id], x1[gpu_id].size), stream=streams[gpu_id])
File "cupy/core/raw.pyx", line 66, in cupy.core.raw.RawKernel.__call__
File "cupy/cuda/function.pyx", line 162, in cupy.cuda.function.Function.__call__
File "cupy/cuda/function.pyx", line 144, in cupy.cuda.function._launch
File "cupy/cuda/driver.pyx", line 293, in cupy.cuda.driver.launchKernel
File "cupy/cuda/driver.pyx", line 118, in cupy.cuda.driver.check_status
cupy.cuda.driver.CUDADriverError: CUDA_ERROR_INVALID_HANDLE: invalid resource handle
Could anyone provide an example of cupy.RawModule on multiple gpu? To me RawModule is preferred over RawKernel for some reasons.
Here is my environment:
cupy 7.3, numpy 1.18.1
cuda 10.2 (10.2.89_440.33.01)
python 3.6.10 (anaconda)
Linux Mint 19.1 Tessa (based on Ubuntu 18.04)
Two gtx 1080 Ti gpu (works well with native CUDA C/C++ programming)
add: If I try to compile source on each gpu like:
module = [None] * n_gpu
add_kernel = [None] * n_gpu
for gpu_id in gpu_indices:
with cupy.cuda.Device(gpu_id):
module[gpu_id] = cupy.RawModule(code=source)
add_kernel[gpu_id] = module[gpu_id].get_function('my_add')
Then I have following error messages:
module[gpu_id] = cupy.RawModule(code=source)
File "cupy/core/raw.pyx", line 252, in cupy.core.raw.RawModule.__init__
File "cupy/core/carray.pxi", line 146, in cupy.core.core.compile_with_cache
File "cupy/core/carray.pxi", line 194, in cupy.core.core.compile_with_cache
File "/home/kejang/anaconda3/envs/python3.6/lib/python3.6/site-packages/cupy/cuda/compiler.py", line 287, in compile_with_cache
extra_source, backend)
File "/home/kejang/anaconda3/envs/python3.6/lib/python3.6/site-packages/cupy/cuda/compiler.py", line 335, in _compile_with_cache_cuda
mod.load(cubin)
File "cupy/cuda/function.pyx", line 197, in cupy.cuda.function.Module.load
File "cupy/cuda/function.pyx", line 199, in cupy.cuda.function.Module.load
File "cupy/cuda/driver.pyx", line 240, in cupy.cuda.driver.moduleLoadData
File "cupy/cuda/driver.pyx", line 118, in cupy.cuda.driver.check_status
cupy.cuda.driver.CUDADriverError: CUDA_ERROR_CONTEXT_IS_DESTROYED: context is destroyed
This is strange because I think source should be compiled on each gpu like Example 1.

Sparse Matrix: ValueError: matrix type must be 'f', 'd', 'F', or 'D'

I want to do SVD on a sparse matrix by using scipy:
from svd import compute_svd
print("The size of raw matrix: "+str(len(raw_matrix))+" * "+str(len(raw_matrix[0])))
from scipy.sparse import dok_matrix
dok = dok_matrix(raw_matrix)
matrix = compute_svd( dok )
The function compute_svd is my customized module like this:
def compute_svd( matrix ):
from scipy.sparse import linalg
from scipy import dot, mat
# e.g., matrix = [[2,1,0,0], [4,3,0,0]]
# matrix = mat( matrix );
# print "Original matrix:"
# print matrix
U, s, V = linalg.svds( matrix )
print "U:"
print U
print "sigma:"
print s
print "VT:"
print V
dimensions = 1
rows,cols = matrix.shape
#Dimension reduction, build SIGMA'
for index in xrange(dimensions, rows):
s[index]=0
print "reduced sigma:"
print s
#Reconstruct MATRIX'
# from scipy import dot
reconstructedMatrix= dot(dot(U,linalg.diagsvd(s,len(matrix),len(V))),V)
#Print transform
print "reconstructed:"
print reconstructedMatrix
return reconstructedMatrix
I get an exception:
Traceback (most recent call last):
File "D:\workspace\PyQuEST\src\Practice\baseline_lsi.py", line 96, in <module>
matrix = compute_svd( dok )
File "D:\workspace\PyQuEST\src\Practice\svd.py", line 13, in compute_svd
U, s, V = linalg.svds( matrix )
File "D:\Program\Python26\lib\site-packages\scipy\sparse\linalg\eigen\arpack\arpack.py", line 1596, in svds
eigvals, eigvec = eigensolver(XH_X, k=k, tol=tol ** 2)
File "D:\Program\Python26\lib\site-packages\scipy\sparse\linalg\eigen\arpack\arpack.py", line 1541, in eigsh
ncv, v0, maxiter, which, tol)
File "D:\Program\Python26\lib\site-packages\scipy\sparse\linalg\eigen\arpack\arpack.py", line 519, in __init__
ncv, v0, maxiter, which, tol)
File "D:\Program\Python26\lib\site-packages\scipy\sparse\linalg\eigen\arpack\arpack.py", line 326, in __init__
raise ValueError("matrix type must be 'f', 'd', 'F', or 'D'")
ValueError: matrix type must be 'f', 'd', 'F', or 'D'
This is my first time to do this. How should I fix it? Any ideas? Thank you!
Adding to Anycorn's answer, yes you need to upcast your matrix to float or double. This can be done using the function:
asfptype() from scipy.sparse.coo_matrix
Add this line to upcast it before you call linalg.svds:
matrix = matrix.asfptype()
U, s, V = linalg.svds( matrix )
you have to use float or doubles. you seem to be using unsupported matrix type DOK of ints?.
sparse svd: http://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.linalg.svds.html
ValueError: matrix type must be 'f', 'd', 'F', or 'D'
This error can be removed by changing Datatype from int to float like this: matrix = matrix.astype(float)
...then this will work

Resources