I have found myself dealing with an enviroment that does not support multiprocessing. How do I run my DistillBert without transformers pipeline?
Here is code right now:
import json
import os
import sys
sys.path.append("/mnt/access")
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers.pipelines import pipeline
def lambda_handler(event, context):
print("After:",os.listdir("/mnt/access"))
tokenizer = AutoTokenizer.from_pretrained('/mnt/access/Dis_Save/')
model = AutoModelForQuestionAnswering.from_pretrained('/mnt/access/Dis_Save/')
nlp_qa = pipeline('question-answering', tokenizer=tokenizer,model=model)
context = "tra"
question = "tra"
X = nlp_qa(context=context, question=question)
return {
'statusCode': 200,
'body': json.dumps('Hello from Lambda!')
}
Error message I get right now:
{
"errorMessage": "[Errno 38] Function not implemented",
"errorType": "OSError",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 18, in lambda_handler\n X = nlp_qa(context=context, question=question)\n",
" File \"/mnt/access/transformers/pipelines.py\", line 1776, in __call__\n features_list = [\n",
" File \"/mnt/access/transformers/pipelines.py\", line 1777, in <listcomp>\n squad_convert_examples_to_features(\n",
" File \"/mnt/access/transformers/data/processors/squad.py\", line 354, in squad_convert_examples_to_features\n with Pool(threads, initializer=squad_convert_example_to_features_init, initargs=(tokenizer,)) as p:\n",
" File \"/var/lang/lib/python3.8/multiprocessing/context.py\", line 119, in Pool\n return Pool(processes, initializer, initargs, maxtasksperchild,\n",
" File \"/var/lang/lib/python3.8/multiprocessing/pool.py\", line 191, in __init__\n self._setup_queues()\n",
" File \"/var/lang/lib/python3.8/multiprocessing/pool.py\", line 343, in _setup_queues\n self._inqueue = self._ctx.SimpleQueue()\n",
" File \"/var/lang/lib/python3.8/multiprocessing/context.py\", line 113, in SimpleQueue\n return SimpleQueue(ctx=self.get_context())\n",
" File \"/var/lang/lib/python3.8/multiprocessing/queues.py\", line 336, in __init__\n self._rlock = ctx.Lock()\n",
" File \"/var/lang/lib/python3.8/multiprocessing/context.py\", line 68, in Lock\n return Lock(ctx=self.get_context())\n",
" File \"/var/lang/lib/python3.8/multiprocessing/synchronize.py\", line 162, in __init__\n SemLock.__init__(self, SEMAPHORE, 1, 1, ctx=ctx)\n",
" File \"/var/lang/lib/python3.8/multiprocessing/synchronize.py\", line 57, in __init__\n sl = self._semlock = _multiprocessing.SemLock(\n"
]
}
Other code:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch
import json
import sys
sys.path.append("/mnt/access")
tokenizer = AutoTokenizer.from_pretrained("/mnt/access/Dis_Save/")
model = AutoModelForQuestionAnswering.from_pretrained("/mnt/access/Dis_Save/", return_dict=True)
def lambda_handler(event, context):
text = r"""
🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
TensorFlow 2.0 and PyTorch.
"""
questions = ["How many pretrained models are available in 🤗 Transformers?",]
for question in questions:
inputs = tokenizer(question, text, add_special_tokens=True, return_tensors="pt")
input_ids = inputs["input_ids"].tolist()[0]
text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
answer_start_scores, answer_end_scores = model(**inputs).values()
answer_start = torch.argmax(
answer_start_scores
) # Get the most likely beginning of answer with the argmax of the score
answer_end = torch.argmax(answer_end_scores) + 1 # Get the most likely end of answer with the argmax of the score
answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
print(f"Question: {question}")
print(f"Answer: {answer}")
return {
'statusCode': 200,
'body': json.dumps(answer)
}
Edit:
I run the code. It runs well on it's own, however I get an error whne running on API itself:
{
"errorMessage": "'tuple' object has no attribute 'values'",
"errorType": "AttributeError",
"stackTrace": [
" File \"/var/task/lambda_function.py\", line 39, in lambda_handler\n answer_start_scores, answer_end_scores = model(**inputs).values()\n"
]
}
Related
I am working with the huggingface transformers and training a pretrained byt5-small on my data. I am also trying to do hyperparameter search using Trainer API with optuna as backend. But the following error is appearing every time. Please help me out. below is the while code.
transformer version = 4.23.1
from transformers import HfArgumentParser, TensorFlowBenchmark, TensorFlowBenchmarkArguments
import pandas as pd
from transformers import T5ForConditionalGeneration, ByT5Tokenizer
from transformers import TrainingArguments
from torch.utils.data import Dataset, DataLoader
from transformers import Trainer
import datasets
import transformers
from tqdm import tqdm
from numba import cuda
device = cuda.get_current_device()
device.reset()
train_df = pd.read_csv("/home/bhavuk/project1/data/train_split.csv")
eval_df = pd.read_csv("/home/bhavuk/project1/data/eval_split.csv")
test_df = pd.read_csv("/home/bhavuk/project1/data/test_split.csv")
train_df = train_df.dropna()
eval_df = eval_df.dropna()
test_df = test_df.dropna(subset=["Hypothesis","Reference"])
train_df.shape, eval_df.shape[0], test_df.shape[0]
args_dict = {
"output_dir": './byt5-small-hp-search',
"overwrite_output_dir": True,
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 2,
"gradient_accumulation_steps": 4,
"learning_rate": 1e-1,
"warmup_steps": 2,
"logging_steps": 100,
"evaluation_strategy": "steps",
"eval_steps": 250,
"num_train_epochs": 4,
"do_train": True,
"do_eval": True,
"fp16": False,
"max_steps": 100000,
"load_best_model_at_end":True,
"logging_dir": './logs',
"save_total_limit" : 2,
"weight_decay" : 0.1,
"label_smoothing_factor" : 0.1
}
parser = HfArgumentParser(
(TrainingArguments))
training_args = parser.parse_dict(args_dict)
args = training_args[0]
def optuna_hp_space(trial):
return {
"learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True),
"dropout_rate": trial.suggest_float("dropout_rate", 0.1, 0.6, step=0.1),
"weight_decay": trial.suggest_float("weight_decay", 0.1, 0.3, step=0.1),
"label_smoothing_factor": trial.suggest_float("label_smoothing_factor", 0.1, 0.3, step=0.1)
}
config = '/home/bhavuk/project1/notebooks/models--google--byt5-small/snapshots/ce8f3a48ed7676af36476a01fb01f95ea529599c/config.json'
def model_init(trial):
return T5ForConditionalGeneration.from_pretrained(
'google/byt5-small',
config=config,
dropout_rate = 0.1
)
tokenizer = ByT5Tokenizer.from_pretrained(
"google/byt5-small",
cache_dir=".",
max_length=512
)
class GPReviewDataset(Dataset):
def __init__(self, Text, Label):
self.Text = Text
self.Label = Label
# self.tokenizer = tokenizer
# self.max_len = max_len
def __len__(self):
return len(self.Text)
def __getitem__(self, item):
Text = str(self.Text[item])
Label = self.Label[item]
inputs = tokenizer(Text, padding="max_length", truncation=True, max_length=512)
outputs = tokenizer(Label, padding="max_length", truncation=True, max_length=512)
return {
"input_ids":inputs.input_ids,
"attention_mask" : inputs.attention_mask,
"labels" : outputs.input_ids,
"decoder_attention_mask" : outputs.attention_mask,
# "labels" : lbz
}
ds_train = GPReviewDataset(
Text=train_df.Hypothesis.to_numpy(),
Label=train_df.Reference.to_numpy()
ds_test = GPReviewDataset(
Text=eval_df.Hypothesis.to_numpy(),
Label=eval_df.Reference.to_numpy()
# tokenizer=tokenizer,
# max_len=max_len
)
train_dataset = ds_train
valid_dataset = ds_test
trainer = Trainer(
model=None,
args=args,
train_dataset=train_dataset,
eval_dataset=valid_dataset,
tokenizer=tokenizer,
model_init=model_init
)
best_trial = trainer.hyperparameter_search(
direction="minimize",
backend="optuna",
hp_space=optuna_hp_space,
n_trials=20
)
ERROR:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/home/bhavuk/project1/notebooks/byT5small_hp_search_2.ipynb Cell 14 in <cell line: 1>()
----> 1 best_trial = trainer.hyperparameter_search(
2 direction="minimize",
3 backend="optuna",
4 hp_space=optuna_hp_space,
5 n_trials=20
6 )
File ~/anaconda3/envs/cvenv/lib/python3.9/site-packages/transformers/trainer.py:2368, in Trainer.hyperparameter_search(self, hp_space, compute_objective, n_trials, direction, backend, hp_name, **kwargs)
2360 self.compute_objective = default_compute_objective if compute_objective is None else compute_objective
2362 backend_dict = {
2363 HPSearchBackend.OPTUNA: run_hp_search_optuna,
2364 HPSearchBackend.RAY: run_hp_search_ray,
2365 HPSearchBackend.SIGOPT: run_hp_search_sigopt,
2366 HPSearchBackend.WANDB: run_hp_search_wandb,
2367 }
-> 2368 best_run = backend_dict[backend](self, n_trials, direction, **kwargs)
2370 self.hp_search_backend = None
2371 return best_run
File ~/anaconda3/envs/cvenv/lib/python3.9/site-packages/transformers/integrations.py:189, in run_hp_search_optuna(trainer, n_trials, direction, **kwargs)
187 n_jobs = kwargs.pop("n_jobs", 1)
188 study = optuna.create_study(direction=direction, **kwargs)
...
return forward_call(*input, **kwargs)
File "/home/bhavuk/anaconda3/envs/cvenv/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 937, in forward
raise ValueError(f"You have to specify either {err_msg_prefix}input_ids or {err_msg_prefix}inputs_embeds")
ValueError: You have to specify either decoder_input_ids or decoder_inputs_embeds
I have a Tensorflow (sequence) model where the model takes 2 input streams. Here is the part of the code where the model is defined:
generic_in = Input(shape=(sequence_length, nr_features), name='generic_input')
input1_in = Input(shape=(sequence_length, nr_features), name='input1_input')
input2_in = Input(shape=(sequence_length, nr_features), name='input2_input')
generic_out, generic_state_h, generic_state_c = LSTM(50,
return_sequences = False,
return_state = True,
dropout = 0.15,
recurrent_dropout = 0.15,
name="generic_lstm")(generic_in)
concatenated_gen_out = Concatenate()([ generic_state_h, generic_state_c ])
gen_dense_out = Dense(100,
activation='relu',
name="generic_dense")(concatenated_gen_out)
gen_dense_out = BatchNormalization()(gen_dense_out)
gen_dense_out = Dropout(0.15)(gen_dense_out)
generic_model = Model( inputs = [ generic_in ], outputs = [ gen_dense_out ] )
input1_dense_out = generic_model(input1_in)
input2_dense_out = generic_model(input2_in)
concatenated_out = Concatenate()([ input1_dense_out, input2_dense_out ])
dense2_out = Dense(100,
activation='relu',
name="dense_2")(concatenated_out)
dense2_out = BatchNormalization()(dense2_out)
dense2_out = Dropout(0.2)(dense2_out)
softmax_out = Dense(nr_classes,
activation='softmax',
name="final_output_layer")(dense2_out)
model = Model(inputs = [ input1_in, input2_in ],
outputs = [ softmax_out ])
#opt = tf.keras.optimizers.Adam(lr=0.00008, decay=0.000001)
opt = tf.keras.optimizers.Adam(lr=0.0001)
model.compile(loss='categorical_crossentropy',
optimizer=opt,
metrics=['accuracy'])
history = model.fit(x=train_x,
y=train_y,
batch_size=BATCH_SIZE,
epochs=80,
verbose=2,
validation_data=(dev_x, dev_y),
shuffle=True)
Please note that train_x which is the input to the model.fit method is a list containing 2 inputs as defined in model = Model(inputs = [input1_in, input2_in], outputs = [softmax_out]).
This works perfectly fine in my Tensorflow v1.13.1 installation on Windows. I am trying to migrate my project to MAC OS Big Sur v11.3.1 with M1 chip. The Tensorflow version on the MAC OS is 2.4.0-rc0.
I obviously made some changes to make it work with Tensorflow 2 but these changes are mainly API call updates based on the new API.
The error I get on MAC Tensorflow installation:
Traceback (most recent call last):
File "/Users/me/Developer/AI_Projects/Football/M1_FULL_TIME_MR/src/train_fit_v2.py", line 288, in <module>
history = model.fit(x=train_x,
File "/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py", line 1100, in fit
tmp_logs = self.train_function(iterator)
File "/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 828, in __call__
result = self._call(*args, **kwds)
File "/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 871, in _call
self._initialize(args, kwds, add_initializers_to=initializers)
File "/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 725, in _initialize
self._stateful_fn._get_concrete_function_internal_garbage_collected( # pylint: disable=protected-access
File "/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 2969, in _get_concrete_function_internal_garbage_collected
graph_function, _ = self._maybe_define_function(args, kwargs)
File "/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3361, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/eager/function.py", line 3196, in _create_graph_function
func_graph_module.func_graph_from_py_func(
File "/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 990, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/eager/def_function.py", line 634, in wrapped_fn
out = weak_wrapped_fn().__wrapped__(*args, **kwds)
File "/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/framework/func_graph.py", line 977, in wrapper
raise e.ag_error_metadata.to_exception(e)
AttributeError: in user code:
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function *
return step_function(self, iterator)
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function **
outputs = model.distribute_strategy.run(run_step, args=(data,))
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
return fn(*args, **kwargs)
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step **
outputs = model.train_step(data)
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:758 train_step
self.compiled_metrics.update_state(y, y_pred, sample_weight)
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:387 update_state
self.build(y_pred, y_true)
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:317 build
self._metrics = nest.map_structure_up_to(y_pred, self._get_metric_objects,
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/util/nest.py:1159 map_structure_up_to
return map_structure_with_tuple_paths_up_to(
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/util/nest.py:1257 map_structure_with_tuple_paths_up_to
results = [
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/util/nest.py:1258 <listcomp>
func(*args, **kwargs) for args in zip(flat_path_gen, *flat_value_gen)
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/util/nest.py:1161 <lambda>
lambda _, *values: func(*values), # Discards the path arg.
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:418 _get_metric_objects
return [self._get_metric_object(m, y_t, y_p) for m in metrics]
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:418 <listcomp>
return [self._get_metric_object(m, y_t, y_p) for m in metrics]
/Users/me/miniforge3/envs/tf_dev/lib/python3.8/site-packages/tensorflow/python/keras/engine/compile_utils.py:439 _get_metric_object
y_t_rank = len(y_t.shape.as_list())
AttributeError: 'tuple' object has no attribute 'shape'
I am totally out of solutions. What am I supposed to do to make it work?
As per the error you posted y_t is of type tuple. Tuple has attribute shape.
You can try casting y_t.
numpy.array(y_t)
My Python program:
#!/usr/bin/python
from pylab import plot,show
from numpy import vstack,array
from numpy.random import rand
from scipy.cluster.vq import kmeans, vq, whiten
import csv
if __name__ == "__main__":
K = 3
data_arr = []
clust_name_arr = []
with open('clustering.csv', 'rb') as f:
reader = csv.reader(f)
for row in reader:
data_arr.append([float(x) for x in row[1:]])
clust_name_arr.append([row[0]])
data = vstack( data_arr )
clust_name = vstack(clust_name_arr)
data = whiten(data)
centroids, distortion = kmeans(data,3)
print "distortion = " + str(distortion)
idx,_ = vq(data,centroids)
plot(data[idx==0,0], data[idx==0,1],'ob',
data[idx==1,0], data[idx==1,1],'or',
data[idx==2,0], data[idx==2,1],'og')
print clust_name
print data
for i in range(K):
result_names = clust_name[idx==i, 0]
print "================================="
print "Cluster " + str(i+1)
for name in result_names:
print name
plot(centroids[:,0],
centroids[:,1],
'sg',markersize=8)
show()
Error Message:
Traceback (most recent call last):
File "/Users//Desktop/Assignment4(2).py", line 7, in
from pylab import plot,show
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/pylab.py", line 1, in
from matplotlib.pylab import *
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/pylab.py", line 222, in
from matplotlib import mpl # pulls in most modules
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/mpl.py", line 1, in
from matplotlib import artist
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/artist.py", line 7, in
from transforms import Bbox, IdentityTransform, TransformedBbox, \
File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/transforms.py", line 35, in
from matplotlib._path import (affine_transform, count_bboxes_overlapping_bbox,
ImportError: dlopen(/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/_path.so, 2): no suitable image found. Did find:
/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/matplotlib/_path.so: no matching architecture in universal wrapper
I was able to import a text file on an elasticsearch index in mylocal machine.
Despite using virtual environment, on the production machine is a nightmare, because I keep having errors like:
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 79: ordinal not in range(128)
I am using python3 and I personally was having less issues in python2, maybe it is just frustration of wasted couple of hours.
I can't understand why, I am not able to strip or handle non ascii chars:
I tried to import:
from unidecode import unidecode
def remove_non_ascii(text):
return unidecode(unicode(text, encoding = "utf-8"))
using python2, no success.
back on python3:
import string
printable = set(string.printable)
''.join( filter(lambda x: x in printable, 'mixed non ascii string' )
no success
import codecs
with codecs.open(path, encoding='utf8') as f:
....
no success
tried:
# -*- coding: utf-8 -*-
no success
https://docs.python.org/2/library/unicodedata.html#unicodedata.normalize
no success ...
All of the above seems can't strip or handle the non ascii, it is very cumbersome, I keep on having following errors:
with open(path) as f:
for line in f:
line = line.replace('\n','')
el = line.split('\t')
print (el)
_id = el[0]
_source = el[1]
_name = el[2]
# _description = ''.join( filter(lambda x: x in printable, el[-1]) )
#
_description = remove_non_ascii( el[-1] )
print (_id, _source, _name, _description, setTipe( _source ) )
action = {
"_index": _indexName,
"_type": setTipe( _source ),
"_id": _source,
"_source": {
"name": _name,
"description" : _description
}
}
helpers.bulk(es, [action])
File "<stdin>", line 22, in <module>
File "/usr/local/lib/python2.7/dist-packages/elasticsearch/helpers/__init__.py", line 194, in bulk
for ok, item in streaming_bulk(client, actions, **kwargs):
File "/usr/local/lib/python2.7/dist-packages/elasticsearch/helpers/__init__.py", line 162, in streaming_bulk
for result in _process_bulk_chunk(client, bulk_actions, raise_on_exception, raise_on_error, **kwargs):
File "/usr/local/lib/python2.7/dist-packages/elasticsearch/helpers/__init__.py", line 87, in _process_bulk_chunk
resp = client.bulk('\n'.join(bulk_actions) + '\n', **kwargs)
UnicodeDecodeError: 'ascii' codec can't decode byte 0xc2 in position 79: ordinal not in range(128)
I would like to have a "definite" practice to handle encoding problems in python3 - I am using same scripts on different machines, and having different results...
ASCII characters are 0-255.
def remove_non_ascii(text):
ascii_characters = ""
for character in text:
if ord(character) <= 255:
ascii_characters += character
return ascii_characters
I am new in mpi4py. I wrote the code in order to process a large numpy array data by multiple processor. As I am unable to provide the input file I am mentioning the shape of data. Shape of data is [3000000,15] and it contains string type of data.
from mpi4py import MPI
import numpy as np
import datetime as dt
import math as math
comm = MPI.COMM_WORLD
numprocs = comm.size
rank = comm.Get_rank()
fname = "6.binetflow"
data = np.loadtxt(open(fname,"rb"), dtype=object, delimiter=",", skiprows=1)
X = data[:,[0,1,3,14,6,6,6,6,6,6,6,6]]
num_rows = math.ceil(len(X)/float(numprocs))
X = X.flatten()
sendCounts = list()
displacements = list()
for p in range(numprocs):
if p == (numprocs-1): #for last processor
sendCounts.append(int(len(X) - (p*num_rows*12)))
displacements.append(int(p*num_rows*12))
break
sendCounts.append(int(num_rows*12))
displacements.append(int(p*sendCounts[p]))
sendbuf = np.array(X[displacements[rank]: (displacements[rank]+sendCounts[rank])])
## Each processor will do some task on sendbuf
if rank == 0:
recvbuf = np.empty(sum(sendCounts), dtype=object)
else:
recvbuf = None
print("sendbuf: ",sendbuf)
comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendCounts), root=0)
if rank == 0:
print("Gathered array: {}".format(recvbuf))
But I am facing below error:
Traceback (most recent call last):
File "hello.py", line 36, in <module>
comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendCounts), root=0)
File "MPI/Comm.pyx", line 602, in mpi4py.MPI.Comm.Gatherv (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:97993)
File "MPI/msgbuffer.pxi", line 525, in mpi4py.MPI._p_msg_cco.for_gather (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:34678)
File "MPI/msgbuffer.pxi", line 446, in mpi4py.MPI._p_msg_cco.for_cco_send (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:33938)
File "MPI/msgbuffer.pxi", line 148, in mpi4py.MPI.message_simple (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:30349)
File "MPI/msgbuffer.pxi", line 93, in mpi4py.MPI.message_basic (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:29448)
KeyError: 'O'
Traceback (most recent call last):
File "hello.py", line 36, in <module>
comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendCounts), root=0)
File "MPI/Comm.pyx", line 602, in mpi4py.MPI.Comm.Gatherv (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:97993)
File "MPI/msgbuffer.pxi", line 525, in mpi4py.MPI._p_msg_cco.for_gather (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:34678)
File "MPI/msgbuffer.pxi", line 446, in mpi4py.MPI._p_msg_cco.for_cco_send (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:33938)
File "MPI/msgbuffer.pxi", line 148, in mpi4py.MPI.message_simple (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:30349)
File "MPI/msgbuffer.pxi", line 93, in mpi4py.MPI.message_basic (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:29448)
KeyError: 'O'
Traceback (most recent call last):
File "hello.py", line 36, in <module>
comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendCounts), root=0)
File "MPI/Comm.pyx", line 602, in mpi4py.MPI.Comm.Gatherv (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:97993)
File "MPI/msgbuffer.pxi", line 525, in mpi4py.MPI._p_msg_cco.for_gather (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:34678)
File "MPI/msgbuffer.pxi", line 446, in mpi4py.MPI._p_msg_cco.for_cco_send (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:33938)
File "MPI/msgbuffer.pxi", line 148, in mpi4py.MPI.message_simple (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:30349)
File "MPI/msgbuffer.pxi", line 93, in mpi4py.MPI.message_basic (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:29448)
KeyError: 'O'
Traceback (most recent call last):
File "hello.py", line 36, in <module>
comm.Gatherv(sendbuf=sendbuf, recvbuf=(recvbuf, sendCounts), root=0)
File "MPI/Comm.pyx", line 602, in mpi4py.MPI.Comm.Gatherv (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:97993)
File "MPI/msgbuffer.pxi", line 516, in mpi4py.MPI._p_msg_cco.for_gather (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:34587)
File "MPI/msgbuffer.pxi", line 466, in mpi4py.MPI._p_msg_cco.for_cco_recv (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:34097)
File "MPI/msgbuffer.pxi", line 261, in mpi4py.MPI.message_vector (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:31977)
File "MPI/msgbuffer.pxi", line 93, in mpi4py.MPI.message_basic (d:\build\mpi4py\mpi4py-2.0.0\src\mpi4py.MPI.c:29448)
KeyError: 'O'
Any help will be much appreciated. I am stuck in this problem for a long time.
Thanks
The problem is dtype=object.
Mpi4py provides two kinds of communication functions, those whose names begin with an upper-case letter, e.g. Scatter, and those whose names begin with a lower-case letter, e.g. scatter. From the Mpi4py documentation:
In MPI for Python, the Bcast(), Scatter(), Gather(), Allgather() and Alltoall() methods of Comm instances provide support for collective communications of memory buffers. The variants bcast(), scatter(), gather(), allgather() and alltoall() can communicate generic Python objects.
What is not clear from this is that even though numpy arrays supposedly expose memory buffers, the buffers apparently need to be to one of a small set of primitive data types, and certainly don't work with generic objects. Compare the following two pieces of code:
from mpi4py import MPI
import numpy
Comm = MPI.COMM_WORLD
Size = Comm.Get_size()
Rank = Comm.Get_rank()
if Rank == 0:
Data = numpy.empty(Size, dtype=object)
else:
Data = None
Data = Comm.scatter(Data, 0) # I work fine!
print("Data on rank %d: " % Rank, Data)
and
from mpi4py import MPI
import numpy
Comm = MPI.COMM_WORLD
Size = Comm.Get_size()
Rank = Comm.Get_rank()
if Rank == 0:
Data = numpy.empty(Size, dtype=object)
else:
Data = None
Datb = numpy.empty(1, dtype=object)
Comm.Scatter(Data, Datb, 0) # I throw KeyError!
print("Datb on rank %d: " % Rank, Datb)
Unfortunately, Mpi4py provides no scatterv. From the same place in the docs:
The vector variants (which can communicate different amounts of data to each process) Scatterv(), Gatherv(), Allgatherv() and Alltoallv() are also supported, they can only communicate objects exposing memory buffers.
These are not exceptions to the upper- vs lower-case rule for dtypes, either:
from mpi4py import MPI
import numpy
Comm = MPI.COMM_WORLD
Size = Comm.Get_size()
Rank = Comm.Get_rank()
if Rank == 0:
Data = numpy.empty(2*Size+1, dtype=numpy.dtype('float64'))
else:
Data = None
if Rank == 0:
Datb = numpy.empty(3, dtype=numpy.dtype('float64'))
else:
Datb = numpy.empty(2, dtype=numpy.dtype('float64'))
Comm.Scatterv(Data, Datb, 0) # I work fine!
print("Datb on rank %d: " % Rank, Datb)
versus
from mpi4py import MPI
import numpy
Comm = MPI.COMM_WORLD
Size = Comm.Get_size()
Rank = Comm.Get_rank()
if Rank == 0:
Data = numpy.empty(2*Size+1, dtype=object)
else:
Data = None
if Rank == 0:
Datb = numpy.empty(3, dtype=object)
else:
Datb = numpy.empty(2, dtype=object)
Comm.Scatterv(Data, Datb, 0) # I throw KeyError!
print("Datb on rank %d: " % Rank, Datb)
You'll unfortunately need to write your code so that it can use scatter, necessitating the same SendCount for each process, or more primitive, point-to-point communication functions, or use some parallel facility other than Mpi4py.
Using Mpi4py 2.0.0, the current stable version at the time of this writing.