Can't install following python library and packages on Mac - macos

My Python program:
#!/usr/bin/python
from pylab import plot,show
from numpy import vstack,array
from numpy.random import rand
from scipy.cluster.vq import kmeans, vq, whiten
import csv
if __name__ == "__main__":
K = 3
data_arr = []
clust_name_arr = []
with open('clustering.csv', 'rb') as f:
reader = csv.reader(f)
for row in reader:
data_arr.append([float(x) for x in row[1:]])
clust_name_arr.append([row[0]])
data = vstack( data_arr )
clust_name = vstack(clust_name_arr)
data = whiten(data)
centroids, distortion = kmeans(data,3)
print "distortion = " + str(distortion)
idx,_ = vq(data,centroids)
plot(data[idx==0,0], data[idx==0,1],'ob',
data[idx==1,0], data[idx==1,1],'or',
data[idx==2,0], data[idx==2,1],'og')
print clust_name
print data
for i in range(K):
result_names = clust_name[idx==i, 0]
print "================================="
print "Cluster " + str(i+1)
for name in result_names:
print name
plot(centroids[:,0],
centroids[:,1],
'sg',markersize=8)
show()
Error Message:
Traceback (most recent call last):
File "/Users//Desktop/Assignment4(2).py", line 7, in
from pylab import plot,show
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pylab.py", line 1, in
from matplotlib.pylab import *
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/pylab.py", line 222, in
from matplotlib import mpl # pulls in most modules
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/mpl.py", line 1, in
from matplotlib import artist
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/artist.py", line 7, in
from transforms import Bbox, IdentityTransform, TransformedBbox, \
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/transforms.py", line 35, in
from matplotlib._path import (affine_transform, count_bboxes_overlapping_bbox,
ImportError: dlopen(/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/_path.so, 2): no suitable image found. Did find:
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/_path.so: no matching architecture in universal wrapper

Related

visualizing regression tree model with continuous numerical target class?

I am practicing with this life expectancy dataset from Kaggle (https://www.kaggle.com/datasets/kumarajarshi/life-expectancy-who?select=Life+Expectancy+Data.csv) and I want to train and visualize a classification and regression tree model. however, I keep getting an error that says "InvocationException: GraphViz's executables not found". I am wondering if this is because of the nature of the continuous numerical target dataset type? how can I visualize the model?
code:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import seaborn as sn
from sklearn import datasets
from sklearn import metrics
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import export_graphviz
import matplotlib.pyplot as plt,pydotplus
from IPython.display import Image,display
data = pd.read_csv('Life Expectancy Data.csv')
data = data.dropna(how = 'any')
#feature selection
data = data.drop(columns=['infant deaths', ' thinness 5-9 years', 'Alcohol', 'percentage expenditure', 'Hepatitis B', 'Total expenditure', 'Population', ' thinness 5-9 years', 'Year', 'Country'])
# Creating a instance of label Encoder.
le = LabelEncoder()
# Using .fit_transform function to fit label
# encoder and return encoded label
label = le.fit_transform(data['Status'])
# removing the column 'Status' from df
data.drop('Status', axis=1, inplace=True)
# Appending the array to our dataFrame
# with column name 'Status'
data['Status'] = label
#training model
model_data = data
X = data.drop(columns=['Life expectancy '])
y = data['Life expectancy ']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
model = DecisionTreeRegressor()
model.fit(X_train, y_train)
#visualizing tree
LEtree = tree.export_graphviz(model,
feature_names = ['Adult Mortality', 'Measles', ' BMI', 'under-five deaths', 'Polio', 'Diphtheria', ' HIV/AIDS', 'GDP', ' thinness 1-19 years', 'Income composition of resources', 'Schooling', 'Status'],
class_names = y,
label = 'all',
rounded = True,
filled = True)
graph=pydotplus.graph_from_dot_data(LEtree)
display(Image(graph.create_png()))
full error message:
InvocationException Traceback (most recent call last)
Input In [27], in <cell line: 2>()
1 graph=pydotplus.graph_from_dot_data(LEtree)
----> 2 display(Image(graph.create_png()))
File ~\Anaconda3\lib\site-packages\pydotplus\graphviz.py:1797, in Dot.__init__.<locals>.<lambda>(f, prog)
1792 # Automatically creates all the methods enabling the creation
1793 # of output in any of the supported formats.
1794 for frmt in self.formats:
1795 self.__setattr__(
1796 'create_' + frmt,
-> 1797 lambda f=frmt, prog=self.prog: self.create(format=f, prog=prog)
1798 )
1799 f = self.__dict__['create_' + frmt]
1800 f.__doc__ = (
1801 '''Refer to the docstring accompanying the'''
1802 ''''create' method for more information.'''
1803 )
File ~\Anaconda3\lib\site-packages\pydotplus\graphviz.py:1959, in Dot.create(self, prog, format)
1957 self.progs = find_graphviz()
1958 if self.progs is None:
-> 1959 raise InvocationException(
1960 'GraphViz\'s executables not found')
1962 if prog not in self.progs:
1963 raise InvocationException(
1964 'GraphViz\'s executable "%s" not found' % prog)
InvocationException: GraphViz's executables not found
Try Installing the Graphviz in a proper directory
you can install in Anaconda from conda-command-prompt using the below command -
conda install -c conda-forge python-graphviz
and replace the previously installed graphviz directory this might help you with the problem

"name 'pygeos' is not defined"

When doing df = gpd.GeoDataFrame(df1, crs = 'EPSG:4326', geometry = geopandas.points_from_xy(df1.longitude,df1.latitude)) I get "name 'pygeos' is not defined", yet I have installed pygeos in the directory where I dev and
python3.9/site-packages/geopandas/_vectorized.py in points_from_xy(x, y, z)
247
248 if compat.USE_PYGEOS:
--> 249 return pygeos.points(x, y, z)
250 else:
251 out = _points_from_xy(x, y, z)
anf import pygeos is in the script. Is there a specific way to well install pygeos in order to avoid such error ? Thanks
USE_PYGEOS=1
import pyproj
import shapely
import pandas as pd
pd.options.display.max_rows = 100
import geopandas as gpd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
gpd.show_versions()
print(gpd.options.use_pygeos)
location_df = pd.read_csv("location_01-03_01-04.csv", sep = ";")
import rtree
import pygeos
gpd.options.use_pygeos = True
#Point is (longitude, latitude)
# Function making geopandas points of latitude, longitude
location_geo = gpd.GeoDataFrame(location_df, crs = 'EPSG:4326', geometry = gpd.points_from_xy(location_df.longitude, location_df.latitude))
departments_df = gpd.read_file("departements.geojson", sep = ";")
print(departments_df)
import time
start = time.time()
print("hello")
import geopandas
import rtree
# Function to check wether a department contains a position - returns the department of the position. NaN values are probably in another country
dept_points = geopandas.sjoin(location_geo, departments_df)
end = time.time()
print(end-start, ' s')
print(dept_points)
Somehow this did it for me.
It was about setting the constant and importing packages in a specific order.

How to run a transformers bert without pipeline?

I have found myself dealing with an enviroment that does not support multiprocessing. How do I run my DistillBert without transformers pipeline?
Here is code right now:
import json
import os
import sys
sys.path.append("/mnt/access")
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers.pipelines import pipeline
def lambda_handler(event, context):
print("After:",os.listdir("/mnt/access"))
tokenizer = AutoTokenizer.from_pretrained('/mnt/access/Dis_Save/')
model = AutoModelForQuestionAnswering.from_pretrained('/mnt/access/Dis_Save/')
nlp_qa = pipeline('question-answering', tokenizer=tokenizer,model=model)
context = "tra"
question = "tra"
X = nlp_qa(context=context, question=question)
return {
'statusCode': 200,
'body': json.dumps('Hello from Lambda!')
}
Error message I get right now:
{
"errorMessage": "[Errno 38] Function not implemented",
"errorType": "OSError",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 18, in lambda_handler\n X = nlp_qa(context=context, question=question)\n",
" File \"/mnt/access/transformers/pipelines.py\", line 1776, in __call__\n features_list = [\n",
" File \"/mnt/access/transformers/pipelines.py\", line 1777, in <listcomp>\n squad_convert_examples_to_features(\n",
" File \"/mnt/access/transformers/data/processors/squad.py\", line 354, in squad_convert_examples_to_features\n with Pool(threads, initializer=squad_convert_example_to_features_init, initargs=(tokenizer,)) as p:\n",
" File \"/var/lang/lib/python3.8/multiprocessing/context.py\", line 119, in Pool\n return Pool(processes, initializer, initargs, maxtasksperchild,\n",
" File \"/var/lang/lib/python3.8/multiprocessing/pool.py\", line 191, in __init__\n self._setup_queues()\n",
" File \"/var/lang/lib/python3.8/multiprocessing/pool.py\", line 343, in _setup_queues\n self._inqueue = self._ctx.SimpleQueue()\n",
" File \"/var/lang/lib/python3.8/multiprocessing/context.py\", line 113, in SimpleQueue\n return SimpleQueue(ctx=self.get_context())\n",
" File \"/var/lang/lib/python3.8/multiprocessing/queues.py\", line 336, in __init__\n self._rlock = ctx.Lock()\n",
" File \"/var/lang/lib/python3.8/multiprocessing/context.py\", line 68, in Lock\n return Lock(ctx=self.get_context())\n",
" File \"/var/lang/lib/python3.8/multiprocessing/synchronize.py\", line 162, in __init__\n SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)\n",
" File \"/var/lang/lib/python3.8/multiprocessing/synchronize.py\", line 57, in __init__\n sl = self._semlock = _multiprocessing.SemLock(\n"
]
}
Other code:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
import json
import sys
sys.path.append("/mnt/access")
tokenizer = AutoTokenizer.from_pretrained("/mnt/access/Dis_Save/")
model = AutoModelForQuestionAnswering.from_pretrained("/mnt/access/Dis_Save/", return_dict=True)
def lambda_handler(event, context):
text = r"""
🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
TensorFlow 2.0 and PyTorch.
"""
questions = ["How many pretrained models are available in 🤗 Transformers?",]
for question in questions:
inputs = tokenizer(question, text, add_special_tokens=True, return_tensors="pt")
input_ids = inputs["input_ids"].tolist()[0]
text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
answer_start_scores, answer_end_scores = model(**inputs).values()
answer_start = torch.argmax(
answer_start_scores
) # Get the most likely beginning of answer with the argmax of the score
answer_end = torch.argmax(answer_end_scores) + 1 # Get the most likely end of answer with the argmax of the score
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
print(f"Question: {question}")
print(f"Answer: {answer}")
return {
'statusCode': 200,
'body': json.dumps(answer)
}
Edit:
I run the code. It runs well on it's own, however I get an error whne running on API itself:
{
"errorMessage": "'tuple' object has no attribute 'values'",
"errorType": "AttributeError",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 39, in lambda_handler\n answer_start_scores, answer_end_scores = model(**inputs).values()\n"
]
}

I can't load my nn model that I've trained and saved

I used transfer learning to train the model. The fundamental model was efficientNet.
You can read more about it here
from tensorflow import keras
from keras.models import Sequential,Model
from keras.layers import Dense,Dropout,Conv2D,MaxPooling2D,
Flatten,BatchNormalization, Activation
from keras.optimizers import RMSprop , Adam ,SGD
from keras.backend import sigmoid
Activation function
class SwishActivation(Activation):
def __init__(self, activation, **kwargs):
super(SwishActivation, self).__init__(activation, **kwargs)
self.__name__ = 'swish_act'
def swish_act(x, beta = 1):
return (x * sigmoid(beta * x))
from keras.utils.generic_utils import get_custom_objects
from keras.layers import Activation
get_custom_objects().update({'swish_act': SwishActivation(swish_act)})
Model Definition
model = enet.EfficientNetB0(include_top=False, input_shape=(150,50,3), pooling='avg', weights='imagenet')
Adding 2 fully-connected layers to B0.
x = model.output
x = BatchNormalization()(x)
x = Dropout(0.7)(x)
x = Dense(512)(x)
x = BatchNormalization()(x)
x = Activation(swish_act)(x)
x = Dropout(0.5)(x)
x = Dense(128)(x)
x = BatchNormalization()(x)
x = Activation(swish_act)(x)
x = Dense(64)(x)
x = Dense(32)(x)
x = Dense(16)(x)
# Output layer
predictions = Dense(1, activation="sigmoid")(x)
model_final = Model(inputs = model.input, outputs = predictions)
model_final.summary()
I saved it using:
model.save('model.h5')
I get the following error trying to load it:
model=tf.keras.models.load_model('model.h5')
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-12-e3bef1680e4f> in <module>()
1 # Recreate the exact same model, including its weights and the optimizer
----> 2 model = tf.keras.models.load_model('PhoneDetection-CNN_29_July.h5')
3
4 # Show the model architecture
5 model.summary()
10 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in class_and_config_for_serialized_keras_object(config, module_objects, custom_objects, printable_module_name)
319 cls = get_registered_object(class_name, custom_objects, module_objects)
320 if cls is None:
--> 321 raise ValueError('Unknown ' + printable_module_name + ': ' + class_name)
322
323 cls_config = config['config']
ValueError: Unknown layer: FixedDropout
```python
I was getting the same error while trying to do the inference by loading my saved model.
Then i just imported the effiecientNet library in my inference notebook as well and the error was gone.
My import command looked like:
import efficientnet.keras as efn
(Note that if you havent installed effiecientNet already(which is unlikely), you can do so by using !pip install efficientnet command.)
I had this same issue with a recent model. Researching the source code you can find the FixedDropout Class. I added this to my inference code with import of backend and layers. The rate should also match the rate from your efficientnet model, so for the EfficientNetB0 the rate is .2 (others are different).
from tensorflow.keras import backend, layers
class FixedDropout(layers.Dropout):
def _get_noise_shape(self, inputs):
if self.noise_shape is None:
return self.noise_shape
symbolic_shape = backend.shape(inputs)
noise_shape = [symbolic_shape[axis] if shape is None else shape
for axis, shape in enumerate(self.noise_shape)]
return tuple(noise_shape)
model = keras.models.load_model('model.h5',
custom_objects={'FixedDropout':FixedDropout(rate=0.2)})
I was getting the same error. Then I import the below code. then it id working properly
import cv2
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.metrics import confusion_matrix
import itertools
import os, glob
from tqdm import tqdm
from efficientnet.tfkeras import EfficientNetB4
if you don't have to install this. !pip install efficientnet. If you have any problem put here.
In my case, I had two files train.py and test.py.
I was saving my .h5 model inside train.py and was attempting to load it inside test.py and got the same error. To fix it, you need to add the import statements for your efficientnet models inside the file that is attempting to load it as well (in my case, test.py).
from efficientnet.tfkeras import EfficientNetB0

mpi4py Gatherv facing KeyError: '0'

I am new in mpi4py. I wrote the code in order to process a large numpy array data by multiple processor. As I am unable to provide the input file I am mentioning the shape of data. Shape of data is [3000000,15] and it contains string type of data.
from mpi4py import MPI
import numpy as np
import datetime as dt
import math as math
comm = MPI.COMM_WORLD
numprocs = comm.size
rank = comm.Get_rank()
fname = "6.binetflow"
data = np.loadtxt(open(fname,"rb"), dtype=object, delimiter=",", skiprows=1)
X = data[:,[0,1,3,14,6,6,6,6,6,6,6,6]]
num_rows = math.ceil(len(X)/float(numprocs))
X = X.flatten()
sendCounts = list()
displacements = list()
for p in range(numprocs):
if p == (numprocs-1): #for last processor
sendCounts.append(int(len(X) - (p*num_rows*12)))
displacements.append(int(p*num_rows*12))
break
sendCounts.append(int(num_rows*12))
displacements.append(int(p*sendCounts[p]))
sendbuf = np.array(X[displacements[rank]: (displacements[rank]+sendCounts[rank])])
## Each processor will do some task on sendbuf
if rank == 0:
recvbuf = np.empty(sum(sendCounts), dtype=object)
else:
recvbuf = None
print("sendbuf: ",sendbuf)
comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendCounts), root=0)
if rank == 0:
print("Gathered array: {}".format(recvbuf))
But I am facing below error:
Traceback (most recent call last):
File "hello.py", line 36, in <module>
comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendCounts), root=0)
File "MPI/Comm.pyx", line 602, in mpi4py.MPI.Comm.Gatherv (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:97993)
File "MPI/msgbuffer.pxi", line 525, in mpi4py.MPI._p_msg_cco.for_gather (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:34678)
File "MPI/msgbuffer.pxi", line 446, in mpi4py.MPI._p_msg_cco.for_cco_send (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:33938)
File "MPI/msgbuffer.pxi", line 148, in mpi4py.MPI.message_simple (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:30349)
File "MPI/msgbuffer.pxi", line 93, in mpi4py.MPI.message_basic (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:29448)
KeyError: 'O'
Traceback (most recent call last):
File "hello.py", line 36, in <module>
comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendCounts), root=0)
File "MPI/Comm.pyx", line 602, in mpi4py.MPI.Comm.Gatherv (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:97993)
File "MPI/msgbuffer.pxi", line 525, in mpi4py.MPI._p_msg_cco.for_gather (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:34678)
File "MPI/msgbuffer.pxi", line 446, in mpi4py.MPI._p_msg_cco.for_cco_send (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:33938)
File "MPI/msgbuffer.pxi", line 148, in mpi4py.MPI.message_simple (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:30349)
File "MPI/msgbuffer.pxi", line 93, in mpi4py.MPI.message_basic (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:29448)
KeyError: 'O'
Traceback (most recent call last):
File "hello.py", line 36, in <module>
comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendCounts), root=0)
File "MPI/Comm.pyx", line 602, in mpi4py.MPI.Comm.Gatherv (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:97993)
File "MPI/msgbuffer.pxi", line 525, in mpi4py.MPI._p_msg_cco.for_gather (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:34678)
File "MPI/msgbuffer.pxi", line 446, in mpi4py.MPI._p_msg_cco.for_cco_send (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:33938)
File "MPI/msgbuffer.pxi", line 148, in mpi4py.MPI.message_simple (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:30349)
File "MPI/msgbuffer.pxi", line 93, in mpi4py.MPI.message_basic (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:29448)
KeyError: 'O'
Traceback (most recent call last):
File "hello.py", line 36, in <module>
comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendCounts), root=0)
File "MPI/Comm.pyx", line 602, in mpi4py.MPI.Comm.Gatherv (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:97993)
File "MPI/msgbuffer.pxi", line 516, in mpi4py.MPI._p_msg_cco.for_gather (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:34587)
File "MPI/msgbuffer.pxi", line 466, in mpi4py.MPI._p_msg_cco.for_cco_recv (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:34097)
File "MPI/msgbuffer.pxi", line 261, in mpi4py.MPI.message_vector (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:31977)
File "MPI/msgbuffer.pxi", line 93, in mpi4py.MPI.message_basic (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:29448)
KeyError: 'O'
Any help will be much appreciated. I am stuck in this problem for a long time.
Thanks
The problem is dtype=object.
Mpi4py provides two kinds of communication functions, those whose names begin with an upper-case letter, e.g. Scatter, and those whose names begin with a lower-case letter, e.g. scatter. From the Mpi4py documentation:
In MPI for Python, the Bcast(), Scatter(), Gather(), Allgather() and Alltoall() methods of Comm instances provide support for collective communications of memory buffers. The variants bcast(), scatter(), gather(), allgather() and alltoall() can communicate generic Python objects.
What is not clear from this is that even though numpy arrays supposedly expose memory buffers, the buffers apparently need to be to one of a small set of primitive data types, and certainly don't work with generic objects. Compare the following two pieces of code:
from mpi4py import MPI
import numpy
Comm = MPI.COMM_WORLD
Size = Comm.Get_size()
Rank = Comm.Get_rank()
if Rank == 0:
Data = numpy.empty(Size, dtype=object)
else:
Data = None
Data = Comm.scatter(Data, 0) # I work fine!
print("Data on rank %d: " % Rank, Data)
and
from mpi4py import MPI
import numpy
Comm = MPI.COMM_WORLD
Size = Comm.Get_size()
Rank = Comm.Get_rank()
if Rank == 0:
Data = numpy.empty(Size, dtype=object)
else:
Data = None
Datb = numpy.empty(1, dtype=object)
Comm.Scatter(Data, Datb, 0) # I throw KeyError!
print("Datb on rank %d: " % Rank, Datb)
Unfortunately, Mpi4py provides no scatterv. From the same place in the docs:
The vector variants (which can communicate different amounts of data to each process) Scatterv(), Gatherv(), Allgatherv() and Alltoallv() are also supported, they can only communicate objects exposing memory buffers.
These are not exceptions to the upper- vs lower-case rule for dtypes, either:
from mpi4py import MPI
import numpy
Comm = MPI.COMM_WORLD
Size = Comm.Get_size()
Rank = Comm.Get_rank()
if Rank == 0:
Data = numpy.empty(2*Size+1, dtype=numpy.dtype('float64'))
else:
Data = None
if Rank == 0:
Datb = numpy.empty(3, dtype=numpy.dtype('float64'))
else:
Datb = numpy.empty(2, dtype=numpy.dtype('float64'))
Comm.Scatterv(Data, Datb, 0) # I work fine!
print("Datb on rank %d: " % Rank, Datb)
versus
from mpi4py import MPI
import numpy
Comm = MPI.COMM_WORLD
Size = Comm.Get_size()
Rank = Comm.Get_rank()
if Rank == 0:
Data = numpy.empty(2*Size+1, dtype=object)
else:
Data = None
if Rank == 0:
Datb = numpy.empty(3, dtype=object)
else:
Datb = numpy.empty(2, dtype=object)
Comm.Scatterv(Data, Datb, 0) # I throw KeyError!
print("Datb on rank %d: " % Rank, Datb)
You'll unfortunately need to write your code so that it can use scatter, necessitating the same SendCount for each process, or more primitive, point-to-point communication functions, or use some parallel facility other than Mpi4py.
Using Mpi4py 2.0.0, the current stable version at the time of this writing.

Resources