Problem with ImageGenerator and Model.predict - image

I need as an input of my CNN to have 3 images, that I preprocess using ImageGenerator and flow_from_dataframe :
idg = ImageDataGenerator(rescale = 1./255)
A_gen = idg.flow_from_dataframe(df,directory = path,x_col = 'A',y_col = 'class',target_size = (IMG_HEIGHT,IMG_WIDTH),
class_mode = 'binary',seed=1,batch_size=batch_size)
B_gen = idg.flow_from_dataframe(df,directory = path,x_col = 'taste1',y_col = 'class',target_size = (IMG_HEIGHT,IMG_WIDTH),
class_mode = 'binary',seed=1,batch_size=batch_size)
C_gen = idg.flow_from_dataframe(df,directory = path,x_col = 'taste2',y_col = 'class',target_size = (IMG_HEIGHT,IMG_WIDTH),
class_mode = 'binary',seed=1,batch_size=batch_size)
Then, I put all the 3 generators in one, using:
def combine(A,B,C):
while True:
X1i = A.next()
X2i = B.next()
X3i = C.next()
yield [X1i[0], X2i[0],X3i[0]], X1i[1]
inputgenerator = combine(A_gen,B_gen,C_gen)
The beginning of my CNN look like this :
def simple_cnn():
pic_input1 = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
pic_input2 = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
pic_input3 = Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3))
cnn1 = BatchNormalization()(pic_input1)
cnn2 = BatchNormalization()(pic_input2)
cnn3 = BatchNormalization()(pic_input3)
... (rest is not relevant I guess)
Then, I fit my model using :
model.fit(inputgenerator,steps_per_epoch=len(df) / batch_size, epochs=4)
Till here, anything works perfectly. (I know, I need to use a validation set etc, but first I want to make sure I know how to deal with multiple generators)
But, when I want to make prediction, with my testgenerator that is :
idg2 = ImageDataGenerator(rescale = 1./255)
D_gen = idg2.flow_from_dataframe(df2,directory = path,x_col = 'D',y_col = 'None',target_size = (IMG_HEIGHT,IMG_WIDTH),
class_mode = None,seed=1,batch_size=1)
E_gen = idg2.flow_from_dataframe(df2,directory = path,x_col = 'E',y_col = 'None',target_size = (IMG_HEIGHT,IMG_WIDTH),
class_mode = None,seed=1,batch_size=1)
F_gen = idg2.flow_from_dataframe(df2,directory = path,x_col = 'F',y_col = 'None',target_size = (IMG_HEIGHT,IMG_WIDTH),
class_mode = None,seed=1,batch_size=1)
testgenerator = combine_test(D_gen,E_gen,F_gen)
pred = model.predict(testgenerator)
def combine_test(A,B,C):
while True:
X1i = A.next()
X2i = B.next()
X3i = C.next()
yield [X1i[0], X2i[0],X3i[0]]
I got the following error :
Traceback (most recent call last):
File "/home/maeul/Documents/ETHZ/2ndSemester/IntroToMachineLearning/Task4/Task4.py", line 228, in <module>
pred = model.predict(testgenerator)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py", line 1013, in predict
use_multiprocessing=use_multiprocessing)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 498, in predict
workers=workers, use_multiprocessing=use_multiprocessing, **kwargs)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 426, in _model_iteration
use_multiprocessing=use_multiprocessing)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 706, in _process_inputs
use_multiprocessing=use_multiprocessing)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/data_adapter.py", line 767, in __init__
dataset = standardize_function(dataset)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py", line 684, in standardize_function
return dataset.map(map_fn, num_parallel_calls=dataset_ops.AUTOTUNE)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 1591, in map
self, map_func, num_parallel_calls, preserve_cardinality=True)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 3926, in __init__
use_legacy_function=use_legacy_function)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 3147, in __init__
self._function = wrapper_fn._get_concrete_function_internal()
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 2395, in _get_concrete_function_internal
*args, **kwargs)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 2389, in _get_concrete_function_internal_garbage_collected
graph_function, _, _ = self._maybe_define_function(args, kwargs)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 2703, in _maybe_define_function
graph_function = self._create_graph_function(args, kwargs)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/eager/function.py", line 2593, in _create_graph_function
capture_by_value=self._capture_by_value),
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/framework/func_graph.py", line 978, in func_graph_from_py_func
func_outputs = python_func(*func_args, **func_kwargs)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 3140, in wrapper_fn
ret = _wrapper_helper(*args)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/data/ops/dataset_ops.py", line 3082, in _wrapper_helper
ret = autograph.tf_convert(func, ag_ctx)(*nested_args)
File "/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py", line 237, in wrapper
raise e.ag_error_metadata.to_exception(e)
ValueError: in converted code:
/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py:677 map_fn
batch_size=None)
/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py:2410 _standardize_tensors
exception_prefix='input')
/home/maeul/anaconda3/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_utils.py:573 standardize_input_data
'with shape ' + str(data_shape))
ValueError: Error when checking input: expected input_10 to have 4 dimensions, but got array with shape (None, None, None)
I guess this is related to the batch size of a single generator, but I don't know how to "trick" the model.predict by adding a trivial dimension in each image generated...
Thanks in advance for your help !

I finally found an answer: as there are no label in the generators
D_gen = idg2.flow_from_dataframe(df2,directory = path,x_col = 'D',y_col = 'None',target_size = (IMG_HEIGHT,IMG_WIDTH),
class_mode = None,seed=1,batch_size=1)
E_gen = idg2.flow_from_dataframe(df2,directory = path,x_col = 'E',y_col = 'None',target_size = (IMG_HEIGHT,IMG_WIDTH),
class_mode = None,seed=1,batch_size=1)
F_gen = idg2.flow_from_dataframe(df2,directory = path,x_col = 'F',y_col = 'None',target_size = (IMG_HEIGHT,IMG_WIDTH),
class_mode = None,seed=1,batch_size=1)
it means that the generators generate a list and not a table, so when in combine_test I write Xi[0], I actually called the first element of the list and not the whole list (the technical terms are probably wrong, but that how I get that with my basic knowledge), so I need to replace the Xi[0] by simply Xi.
To fix this error, modifiy the "combine_test" function as following:
def combine_test(A,B,C):
while True:
X1i = A.next()
X2i = B.next()
X3i = C.next()
yield [X1i, X2i, X3i]

Related

Fine tuning Bert for NER attempt on Mac OS

I'm using a MacBook Air/OS Monterey 12.5 (There are updates available; Ventura 13.1
Python version 3.10.8 and also tried using 3.11
Pylance has pointed that all the imports I was trying to execute were not being resolved so I changed the VS Code interpreter to Python 3.10.
Anyways, here's the code:
import pandas as pd
import torch
import numpy as np
from tqdm import tqdm
from transformers import BertTokenizerFast
from transformers import BertForTokenClassification
from torch.utils.data import Dataset, DataLoader
df = pd.read_csv('ner.csv')
labels = [i.split() for i in df['labels'].values.tolist()]
unique_labels = set()
for lb in labels:
[unique_labels.add(i) for i in lb if i not in unique_labels]
# print(unique_labels)
labels_to_ids = {k: v for v, k in enumerate(sorted(unique_labels))}
ids_to_labels = {v: k for v, k in enumerate(sorted(unique_labels))}
# print(labels_to_ids)
text = df['text'].values.tolist()
example = text[36]
#print(example)
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
text_tokenized = tokenizer(example, padding='max_length', max_length=512, truncation=True, return_tensors='pt')
'''
print(text_tokenized)
print(tokenizer.decode(text_tokenized.input_ids[0]))
'''
def align_label_example(tokenized_input, labels):
word_ids = tokenized_input.word_ids()
previous_word_idx = None
label_ids = []
for word_idx in word_ids:
if word_idx is None:
label_ids.append(-100)
elif word_idx != previous_word_idx:
try:
label_ids.append(labels_to_ids[labels[word_idx]])
except:
label_ids.append(-100)
else:
label_ids.append(labels_to_ids[labels[word_idx]] if label_all_tokens else -100)
previous_word_idx = word_idx
return label_ids;
label = labels[36]
label_all_tokens = False
new_label = align_label_example(text_tokenized, label)
'''
print(new_label)
print(tokenizer.convert_ids_to_tokens(text_tokenized['input_ids'][0]))
'''
def align_label(texts, labels):
tokenized_inputs = tokenizer(texts, padding='max_length', max_length=512, truncation=True)
word_ids = tokenized_inputs.word_ids()
previous_word_idx = None
label_ids = []
for word_idx in word_ids:
if word_idx is None:
label_ids.append(-100)
elif word_idx != previous_word_idx:
try:
label_ids.append(labels_to_ids[labels[word_idx]])
except:
label_ids.append(-100)
else:
try:
label_ids.append(labels_to_ids[labels[word_idx]] if label_all_tokens else -100)
except:
label_ids.append(-100)
previous_word_idx = word_idx
return label_ids
class DataSequence(torch.utils.data.Dataset):
def __init__(self, df):
lb = [i.split() for i in df['labels'].values.tolist()]
txt = df['text'].values.tolist()
self.texts = [tokenizer(str(i),
padding='max_length', max_length=512, truncation=True, return_tensors='pt') for i in txt]
self.labels = [align_label(i,j) for i,j in zip(txt, lb)]
def __len__(self):
return len(self.labels)
def get_batch_labels(self, idx):
return torch.LongTensor(self.labels[idx])
def __getitem__(self, idx):
batch_data = self.get_batch_data(idx)
batch_labels = self.get_batch_labels(idx)
return batch_data, batch_labels
df = df[0:1000]
df_train, df_val, df_test = np.split(df.sample(frac=1, random_state=42),
[int(.8 * len(df)), int(.9 * len(df))])
class BertModel(torch.nn.Module):
def __init__(self):
super(BertModel, self).__init__()
self.bert = BertForTokenClassification.from_pretrained('bert-base-cased', num_labels=len(unique_labels))
def forward(self, input_id, mask, label):
output = self.bert(input_ids=input_id, attention_mask=mask, labels=label, return_dict=False)
return output
def train_loop(model, df_train, df_val):
train_dataset = DataSequence(df_train)
val_dataset = DataSequence(df_val)
train_dataloader = DataLoader(train_dataset, num_workers=4, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, num_workers=4, batch_size=BATCH_SIZE)
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
if use_cuda:
model = model.cuda()
best_acc = 0
best_loss = 1000
for epoch_num in range(EPOCHS):
total_acc_train = 0
total_loss_train = 0
model.train()
for train_data, train_label in tqdm(train_dataloader):
train_label = train_label.to(device)
mask = train_data['attention_mask'].squeeze(1).to(device)
input_id = train_data['input_ids'].squeeze(1).to(device)
optimizer.zero_grad()
loss, logits = model(input_id, mask, train_label)
for i in range(logits.shape[0]):
logits_clean = logits[i][train_label[i] != -100]
label_clean = train_label[i][train_label[i] != -100]
predictions = logits_clean.argmax(dim=1)
acc = (predictions == label_clean).float().mean()
total_acc_train += acc
total_loss_train += loss.item()
loss.backward()
optimizer.step()
model.eval()
total_acc_val = 0
total_loss_val = 0
for val_data, val_label in val_dataloader:
val_label = val_label.to(device)
mask = val_data['attention_mask'].squeeze(1).to(device)
input_id = val_data['input_ids'].squeeze(1).to(device)
loss, logits = model(input_id, mask, val_label)
for i in range(logits.shape[0]):
logits_clean = logits[i][val_label[i] != -100]
label_clean = val_label[i][val_label[i] != -100]
predictions = logits_clean.argmax(dim=1)
acc = (predictions == label_clean).float().mean()
total_acc_val += acc
total_loss_val += loss.item()
val_accuracy = total_acc_val / len(df_val)
val_loss = total_loss_val / len(df_val)
print(
f'Epochs: {epoch_num + 1} | Loss: {total_loss_train / len(df_train): .3f} | Accuracy: {total_acc_train / len(df_train): .3f} | Val_Loss: {total_loss_val / len(df_val): .3f} | Accuracy: {total_acc_val / len(df_val): .3f}')
LEARNING_RATE = 5e-3
EPOCHS = 5
BATCH_SIZE = 2
model = BertModel()
train_loop(model, df_train, df_val)
And the debugger says:
Exception has occurred: RuntimeError (note: full exception trace is shown but execution is paused at: <module>)
An attempt has been made to start a new process before the
current process has finished its bootstrapping phase.
This probably means that you are not using fork to start your
child processes and you have forgotten to use the proper idiom
in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce an executable.
File "/Users/filipedonatti/Projects/pyCodes/second_try.py", line 141, in train_loop
for train_data, train_label in tqdm(train_dataloader):
File "/Users/filipedonatti/Projects/pyCodes/second_try.py", line 197, in <module>
train_loop(model, df_train, df_val)
File "<string>", line 1, in <module> (Current frame)
By the way,
Despite using Mac, I have downloaded Anaconda-Navigator, however I've been trying and executing this code on VS Code. I've downloaded numpy, torch, datasets and other libraries through Brew with the pip3 command.
I'm at a loss, I can run the code on a google collab notebook or Jupiter notebook, and I know training models and such in my humble Mac would not be advised, but I am just exercising this so I can train and use the model in a much more powerful machine.
Please help me with this issue, I've been trying to find a solution for days.
Peace and happy holidays.
I've tried solving the issue by writing:
if __name__ == '__main__':
freeze_support()
I've tried using this:
import parallelTestModule
extractor = parallelTestModule.ParallelExtractor()
extractor.runInParallel(numProcesses=2, numThreads=4)
So...
It turns out the correct way to solve this is to implement a function to train the loop as such:
def run():
model = BertModel()
torch.multiprocessing.freeze_support()
print('loop')
train_loop(model, df_train, df_val)
if __name__ == '__main__':
run()
Redefining that train_loop line in the end. Issue solved. For more see this link: https://github.com/pytorch/pytorch/issues/5858

How to correctly use Distributed Data Parallel when customizing 'parameters' in the model ?

I have customized a parameter in my model:
self.params = list(self.backbone.parameters())
for head in self.headlist:
self.params += list(head.parameters())
When I wrap my model with DDP, an error occurs when defining the optimizer
optimizer = optim.SGD(model.params, lr=FLAGS.lr, momentum=FLAGS.momentum, weight_decay=FLAGS.weight_decay)
AttributeError 'DistributedDataParallel' object has no attribute 'params '
I think the error is probably caused by my customized "self.params"
Is the following code correct:
model = torch.nn.parallel.DistributedDataParallel(model,device_ids=local_rank)
model_without_ddp = model.module
**
optimizer = optim.SGD(model_without_ddp.params, lr=FLAGS.lr, momentum=FLAGS.momentum, weight_decay=FLAGS.weight_decay)
Or is there any simpler code?
###################################
The detailed definition of the network is as follows:
class multiheadModel():
def __init__(self, num_heads, device, model_name):
self.device = device
self.num_heads = num_heads # global+K
if model_name == 'fcn8s':
self.backbone = VGG16_FCN8s(num_classes=19, backbone=1, head=0).to(device)
self.headlist = [VGG16_FCN8s(num_classes=19, backbone=0, head=1).to(device) for i in range(num_heads)]
self.model = VGG16_FCN8s(num_classes=19).to(device)
for name, param in self.backbone.named_parameters():
if ('conv3' in name) or ('conv4' in name):
param.requires_grad = True
else:
param.requires_grad = False
elif model_name == 'deeplab':
self.backbone = Res_Deeplab(num_classes=19, backbone=1, head=0).to(device)
self.headlist = [Res_Deeplab(num_classes=19, backbone=0, head=1).to(device) for i in range(num_heads)]
self.model = Res_Deeplab(num_classes=19).to(device)
for name, param in self.backbone.named_parameters():
if 'layer3' in name:
param.requires_grad = True
else:
param.required_grad = False
else:
print('ERROR : wrong model name')
sys.exit()
self.params = list(self.backbone.parameters())
for head in self.headlist:
self.params += list(head.parameters())
self.loss_fn = None
#self.k2head = {0:2,1:1,2:0,3:0,4:0,5:4,6:4,7:5}
#self.k2head = {0:2,1:1,2:0,3:0,4:0,5:3,6:3,7:4}
self.k2head = {0:2,1:1,2:0,3:0,4:3,5:3,6:4}
# set train and eval mode
def train(self):
self.backbone.train()
for head in self.headlist:
head.train()
def eval(self):
self.backbone.eval()
for head in self.headlist:
head.eval()
def computePredLoss(self, rgb, lbl, k):
x = self.backbone(rgb)
head_id = list(range(self.num_heads))
head_id.remove(self.k2head[k])
input_size = rgb.size()[2:]
loss = 0
for i in head_id:
pred = self.headlist[i](x)
pred = F.interpolate(pred, size=input_size, mode='bilinear', align_corners=True)
loss += self.loss_fn(pred, lbl)
return pred, loss
def forward(self, input):
output = {}
if "label" in input:
self.train()
pred,loss = self.computePredLoss(input['rgb'], input['label'], input['k'])
output['pred'], output['loss']=pred, loss
else:
self.eval()
x = self.backbone(input['rgb'])
k = -1
if "k" in input:
k = self.k2head[input['k']]
pred = self.headlist[k](x)
input_size = input['rgb'].size()[2:]
pred = F.interpolate(pred, size=input_size, mode='bilinear', align_corners=True)
output['pred'] = pred
return output
def validate(self, loader, k=-2):
self.eval()
if k!=-2:
val_metrics = StreamSegMetrics(19)
val_metrics.reset()
with torch.no_grad():
for i, (batch, rgb_batch) in enumerate(loader):
rgb_batch = rgb_batch.to(device=self.device, dtype=torch.float)
batch = batch.to(device=self.device, dtype=torch.int64)
input_size = rgb_batch.size()[2:]
x = self.backbone(rgb_batch)
pred = self.headlist[k](x)
pred = F.interpolate(pred, size=input_size, mode='bilinear', align_corners=True)
preds = pred.detach().max(dim=1)[1].cpu().numpy()
targets = batch.cpu().numpy()
val_metrics.update(targets, preds)
score = val_metrics.get_results()
else:
val_metrics = [StreamSegMetrics(19) for i in range(self.num_heads)]
for metric in val_metrics:
metric.reset()
with torch.no_grad():
for i, (batch, rgb_batch) in enumerate(loader):
rgb_batch = rgb_batch.to(device=self.device, dtype=torch.float)
batch = batch.to(device=self.device, dtype=torch.int64)
input_size = rgb_batch.size()[2:]
x = self.backbone(rgb_batch)
for k in range(self.num_heads):
pred = self.headlist[k](x)
pred = F.interpolate(pred, size=input_size, mode='bilinear', align_corners=True)
preds = pred.detach().max(dim=1)[1].cpu().numpy()
targets = batch.cpu().numpy()
val_metrics[k].update(targets, preds)
score = [val_metrics[k].get_results() for k in range(self.num_heads)]
return score
def getHeadPaths(self, model_path, iteration=-1):
head_paths = []
if '_iter' in model_path:
base_path = model_path.split('_iter')[0]
else:
base_path = model_path.split('.pth')[0]
if iteration==-1:
for i in range(self.num_heads-1):
head_paths.append(base_path+'_except_g'+chr(97+i)+'.pth')
head_paths.append(model_path)
else:
for i in range(self.num_heads-1):
head_paths.append(base_path+'_except_g'+chr(97+i)+'_iter'+str(iteration)+'.pth')
head_paths.append(base_path+'_iter'+str(iteration)+'.pth')
return head_paths
def save(self, model_path, iteration=-1):
self.model.load_state_dict(self.backbone.state_dict(), strict=False)
head_paths = self.getHeadPaths(model_path, iteration)
for i in range(self.num_heads):
self.model.load_state_dict(self.headlist[i].state_dict(), strict=False)
torch.save(self.model.state_dict(), head_paths[i])
def load(self, model_path):
iteration = -1
if '_iter' in model_path:
iteration = int(model_path.split('_iter')[1].split('.pth')[0])
self.model.load_state_dict(torch.load(model_path))
self.backbone.load_state_dict(self.model.state_dict(), strict=False)
head_paths = self.getHeadPaths(model_path, iteration)
existance = 1
for path in head_paths:
if os.path.isfile(path)==False:
existance = 0
if existance==1:
print('loading from multiheads')
for i in range(self.num_heads):
self.model.load_state_dict(torch.load(head_paths[i]))
self.headlist[i].load_state_dict(self.model.state_dict(), strict=False)
else:
print('loading from singlehead')
for i in range(self.num_heads):
self.model.load_state_dict(torch.load(head_paths[-1]))
self.headlist[i].load_state_dict(self.model.state_dict(), strict=False)
def __call__(self, input):
return self.forward(input)

No gradients provided for any variable tensorflow 2.0

what this error is all about?
I am using Tensorflow 2.0, and while running the code, I am receiving this error. I am coding image caption generator.
def create_model():
inputs1 = Input(shape=(4096,))
fe1 = Dropout(0.8)(inputs1)
fe3 = Dense(256, activation='relu')(fe1)
inputs2 = Input(shape=(max_length,))
se1 = Embedding(vocab_size, embedding_dim, mask_zero=True)(inputs2)
se2 = Dropout(0.8)(se1)
se4 = LSTM(256)(se2)
decoder1 = add([fe3, se4])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.layers[2].set_weights([embedding_matrix])
model.layers[2].trainable = False
return model
model = create_model()
model.compile(loss='categorical_crossentropy', optimizer='adam')
model.fit(train_gen, epochs=1, steps_per_epoch=steps, verbose=1, validation_data=dev_gen, validation_steps=dev_steps, callbacks=[model_checkpoint_callback])
ValueError: in user code:
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:571 train_function *
outputs = self.distribute_strategy.run(
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:951 run **
return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2290 call_for_each_replica
return self._call_for_each_replica(fn, args, kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/distribute/distribute_lib.py:2649 _call_for_each_replica
return fn(*args, **kwargs)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:541 train_step **
self.trainable_variables)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py:1804 _minimize
trainable_variables))
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:521 _aggregate_gradients
filtered_grads_and_vars = _filter_grads(grads_and_vars)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/optimizer_v2/optimizer_v2.py:1219 _filter_grads
([v.name for _, v in grads_and_vars],))
ValueError: No gradients provided for any variable: ['dense_3/kernel:0', 'dense_3/bias:0', 'lstm_1/lstm_cell_1/kernel:0', 'lstm_1/lstm_cell_1/recurrent_kernel:0', 'lstm_1/lstm_cell_1/bias:0', 'dense_4/kernel:0', 'dense_4/bias:0', 'dense_5/kernel:0', 'dense_5/bias:0'].

Can't pickle <function <lambda> when using multiprocessing Pool.map()

I'm trying to parallelize my python script with the multiprocessing library. My function is part of a class and I used Pool.map.
import numpy as np
import pandas as pd
import netCDF4
import itertools
import multiprocessing as mpp
from tqdm import tqdm
Class catch2grid(object):
def __init__(self):
"""Init of catch2grid."""
self.pbar = None
...
def main(self, db_Qobs_meta_dir, ens_mean_dir, ens_sd_dir, db_Qobs_dir,
range_start, range_end):
"""Sequential computing of several flow percentiles for Qobs and Qsim,
the standard deviation of the flow percentiles of Qsim and the
KGE alpha.
db_Qobs_meta_dir -- Path to file with meta informations on the
Catchments
ens_mean_dir -- Path to file with runoff ensemble mean
ens_sd_dir -- Path to file with runoff ensemble standard deviation
db_Qobs_dir -- Path to folder with observed runoff database_Qobs_new
range_start -- starting value of range
range_end -- stopping value of range
"""
range_catch = range(range_start, range_end)
df_meta = self.import_meta(db_Qobs_meta_dir)
df_meta = self.select_catchments(df_meta)
Ens_mean, Ens_mean_Q = self.import_ens_mean(ens_mean_dir)
Ens_sd, Ens_sd_Q = self.import_ens_sd(ens_sd_dir)
Grid_lats_cen, Grid_lons_cen = self.grid_cen_arr(Ens_mean)
df_Qobs_percs = pd.DataFrame(index=range_catch, columns=
['Catch_name', 't_scale_Qobs', 'Time_cov',
'Q_5', 'Q_25', 'Q_50',
'Q_75', 'Q_95'])
df_Qsim_percs = pd.DataFrame(index=range_catch, columns=
['Catch_name', 'Q_5', 'Q_25', 'Q_50',
'Q_75', 'Q_95'])
df_sdQsim_percs = pd.DataFrame(index=range_catch, columns=
['Catch_name', 'sdQsim_5', 'sdQsim_25',
'sdQsim_50', 'sdQsim_75', 'sdQsim_95'])
df_KGE_alpha = pd.DataFrame(index=range_catch, columns=['KGE_alpha'])
df_Qobs_percs['Catch_name'] = df_meta['Catchments']\
[range_catch[0]:range_catch[-1]+1]
df_Qsim_percs['Catch_name'] = df_meta['Catchments']\
[range_catch[0]:range_catch[-1]+1]
df_sdQsim_percs['Catch_name'] = df_meta['Catchments']\
[range_catch[0]:range_catch[-1]+1]
df_KGE_alpha['Catch_name'] = df_meta['Catchments']\
[range_catch[0]:range_catch[-1]+1]
for k in range_catch:
sum_Lat_bool, sum_Lon_bool, Lat_idx, Lon_idx = self.matchgrid(df_meta,
db_Qobs_dir,
Grid_lats_cen,
Grid_lons_cen,
k)
df_Q, t_scale_Qobs = self.Qsim_to_catch(df_meta, db_Qobs_dir,
Ens_mean, Ens_mean_Q,
sum_Lat_bool, sum_Lon_bool,
Lat_idx, Lon_idx, k)
df_sdQsim = self.sdQsim_to_catch(df_meta, db_Qobs_dir, Ens_sd,
Ens_sd_Q, sum_Lat_bool,
sum_Lon_bool, Lat_idx, Lon_idx, k)
df_Qobs_percs['t_scale_Qobs'][k] = t_scale_Qobs
no_NAs = df_Q['Qobs'].isnull().sum().sum()
df_Qobs_percs['Time_cov'][k] = 1 - (no_NAs/len(df_Q.index))
df_Qobs_percs['Q_95'][k] = self.flow_perc(df_Q['Qobs'], perc=95)
df_Qobs_percs['Q_75'][k] = self.flow_perc(df_Q['Qobs'], perc=75)
df_Qobs_percs['Q_50'][k] = self.flow_perc(df_Q['Qobs'], perc=50)
df_Qobs_percs['Q_25'][k] = self.flow_perc(df_Q['Qobs'], perc=25)
df_Qobs_percs['Q_5'][k] = self.flow_perc(df_Q['Qobs'], perc=5)
df_Qsim_percs['Q_95'][k] = self.flow_perc(df_Q['Qsim'], perc=95)
df_Qsim_percs['Q_75'][k] = self.flow_perc(df_Q['Qsim'], perc=75)
df_Qsim_percs['Q_50'][k] = self.flow_perc(df_Q['Qsim'], perc=50)
df_Qsim_percs['Q_25'][k] = self.flow_perc(df_Q['Qsim'], perc=25)
df_Qsim_percs['Q_5'][k] = self.flow_perc(df_Q['Qsim'], perc=5)
df_sdQsim_percs['sdQsim_95'][k] = self.flow_perc_sd(df_Q['Qsim'], df_sdQsim['sdQsim'], perc=95)
df_sdQsim_percs['sdQsim_75'][k] = self.flow_perc_sd(df_Q['Qsim'], df_sdQsim['sdQsim'], perc=75)
df_sdQsim_percs['sdQsim_50'][k] = self.flow_perc_sd(df_Q['Qsim'], df_sdQsim['sdQsim'], perc=50)
df_sdQsim_percs['sdQsim_25'][k] = self.flow_perc_sd(df_Q['Qsim'], df_sdQsim['sdQsim'], perc=25)
df_sdQsim_percs['sdQsim_5'][k] = self.flow_perc_sd(df_Q['Qsim'], df_sdQsim['sdQsim'], perc=5)
df_KGE_alpha['KGE_alpha'][k] = self.KGE_alpha(df_Q['Qsim'], df_Q['Qobs'])
# display progress
self.pbar.update(1)
df_Qobs_percs.index = df_Qobs_percs['Catch_name']
df_Qsim_percs.index = df_Qsim_percs['Catch_name']
df_sdQsim_percs.index = df_sdQsim_percs['Catch_name']
df_KGE_alpha.index = df_KGE_alpha['Catch_name']
df_Qobs_percs = df_Qobs_percs.loc[:, 'Q_5':'Q_95']
df_Qsim_percs = df_Qsim_percs.loc[:, 'Q_5':'Q_95']
df_sdQsim_percs = df_sdQsim_percs.loc[:, 'sdQsim_5':'sdQsim_95']
df_KGE_alpha = df_KGE_alpha.loc[:, 'KGE_alpha']
return df_Qobs_percs, df_Qsim_percs, df_sdQsim_percs, df_KGE_alpha
def main_par(self, db_Qobs_meta_dir, ens_mean_dir, ens_sd_dir, db_Qobs_dir):
"""Parallel computing of several flow percentiles for Qobs and Qsim,
the standard deviation of the flow percentiles of Qsim and the
KGE alpha.
db_Qobs_meta_dir -- Path to file with meta informations on the
Catchments
ens_mean_dir -- Path to file with runoff ensemble mean
ens_sd_dir -- Path to file with runoff ensemble standard deviation
db_Qobs_dir -- Path to folder with observed runoff database_Qobs_new
"""
cpu_cores = mpp.cpu_count() - 1
df_meta = self.import_meta(db_Qobs_meta_dir)
df_meta = self.select_catchments(df_meta)
# chunking subsets for parallelization
ll_start = []
ll_end = []
lin_dist = np.linspace(0, len(df_meta.index), cpu_cores+1)
l = len(lin_dist)
# list of tuples with input arguments for map
for i in range(len(lin_dist) - 1):
temp = list(range(int(lin_dist[i]), int(lin_dist[i+1]), 1))
ll_start.append(temp[0])
ll_end.append(temp[-1]+1)
ll_db_Qobs_meta_dir = list(itertools.repeat(db_Qobs_meta_dir, l))
ll_Ens_mean_dir = list(itertools.repeat(ens_mean_dir, l))
ll_Ens_sd_dir = list(itertools.repeat(ens_sd_dir, l))
ll_db_Qobs_dir = list(itertools.repeat(db_Qobs_dir, l))
subsets = zip(ll_db_Qobs_meta_dir, ll_Ens_mean_dir, ll_Ens_sd_dir,
ll_db_Qobs_dir, ll_start, ll_end)
p = mpp.Pool(cpu_cores) # launch pool of workers
res = p.starmap(self.main, subsets)
p.close()
p.join()
res_obs = []
res_sim = []
res_simsd = []
res_kgealpha = []
# collect dataframes and merge them
[res_obs.append(res[:][i][0]) for i in range(len(res))]
[res_sim.append(res[:][i][1]) for i in range(len(res))]
[res_simsd.append(res[:][i][2]) for i in range(len(res))]
[res_kgealpha.append(res[:][i][3]) for i in range(len(res))]
df_Qobs_percs = pd.concat(res_obs[:], ignore_index=True)
df_Qsim_percs = pd.concat(res_sim[:], ignore_index=True)
df_sdQsim_percs = pd.concat(res_simsd[:], ignore_index=True)
df_KGE_alpha = pd.concat(res_kgealpha[:], ignore_index=True)
return df_Qobs_percs, df_Qsim_percs, df_sdQsim_percs, df_KGE_alpha
...
if __name__ == "__main__":
cpu_cores = mp.cpu_count() - 1
c2g = catch2grid()
p = mp.Pool(cpu_cores) # launch pool of workers
c2g.init_pbar(l)
ll_range_catch = list(range(0, 5000))
res = p.map(c2g.main_par, ll_range_catch)
p.close()
p.join()
After running it the following error message is displayed:
File "<ipython-input-1-3828921ab3bd>", line 1, in <module>
runfile('/Users/robinschwemmle/Desktop/MSc_Thesis/Python/catch2grid.py', wdir='/Users/robinschwemmle/Desktop/MSc_Thesis/Python')
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 705, in runfile
execfile(filename, namespace)
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/site-packages/spyder/utils/site/sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/Users/robinschwemmle/Desktop/MSc_Thesis/Python/catch2grid.py", line 1285, in <module>
c2g.main_par(db_Qobs_meta_dir, Ens_mean_dir, Ens_sd_dir, db_Qobs_dir)
File "/Users/robinschwemmle/Desktop/MSc_Thesis/Python/catch2grid.py", line 798, in main_par
res = p.starmap(self.main, subsets)
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/multiprocessing/pool.py", line 274, in starmap
return self._map_async(func, iterable, starmapstar, chunksize).get()
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/multiprocessing/pool.py", line 644, in get
raise self._value
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/multiprocessing/pool.py", line 424, in _handle_tasks
put(task)
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/multiprocessing/connection.py", line 206, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "/Users/robinschwemmle/anaconda/envs/py36/lib/python3.6/multiprocessing/reduction.py", line 51, in dumps
cls(buf, protocol).dump(obj)
PicklingError: Can't pickle <function <lambda> at 0x1164e42f0>: attribute lookup <lambda> on jupyter_client.session failed
The error occured just a few days ago. Before the code was working properly. Have there been any changes to the mulitprocessing or pickling library I'm not aware of? Or has anyone an advice for me which parallel library I could choose instead?

tensorflow: After adding a rnn the whole work doesn't work

I have downloaded a code of FCN for image segmentation and it ran well. Now I want to add a rnn layer attempting to refine the result according to the work "ReSeg: A Recurrent Neural Network-Based Model for Semantic Segmentation". My code shows as follows:
This part is for the inference:
def inference(image, keep_prob):
"""
Semantic segmentation network definition
:param image: input image. Should have values in range 0-255
:param keep_prob:
:return:
"""
print("setting up vgg initialized conv layers ...")
#model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL)
model_data = scipy.io.loadmat("H:/Deep Learning/FCN.tensorflow-master/imagenet-vgg-verydeep-19.mat")
mean = model_data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0, 1))
weights = np.squeeze(model_data['layers'])
processed_image = utils.process_image(image, mean_pixel)
with tf.variable_scope("inference"):
image_net = vgg_net(weights, processed_image)
conv_final_layer = image_net["conv5_3"]
pool5 = utils.max_pool_2x2(conv_final_layer)
W6 = utils.weight_variable([7, 7, 512, 4096], name="W6")
b6 = utils.bias_variable([4096], name="b6")
conv6 = utils.conv2d_basic(pool5, W6, b6)
relu6 = tf.nn.relu(conv6, name="relu6")
if FLAGS.debug:
utils.add_activation_summary(relu6)
relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob)
W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7")
b7 = utils.bias_variable([4096], name="b7")
conv7 = utils.conv2d_basic(relu_dropout6, W7, b7)
relu7 = tf.nn.relu(conv7, name="relu7")
if FLAGS.debug:
utils.add_activation_summary(relu7)
relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob)
W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8")
b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8")
conv8 = utils.conv2d_basic(relu_dropout7, W8, b8)
# annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1")
# now to upscale to actual image size
deconv_shape1 = image_net["pool4"].get_shape()
W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1")
b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1")
conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"]))
#fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1")
deconv_shape2 = image_net["pool3"].get_shape()
W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2")
b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2")
conv_t2 = utils.conv2d_transpose_strided(conv_t1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"]))
#fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2")
shape = tf.shape(image)
deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS])
W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3")
b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3")
conv_t3 = utils.conv2d_transpose_strided(conv_t2, W_t3, b_t3, output_shape=deconv_shape3, stride=8)
/////////////////////////////////////////////////////this is from where i added the rnn
shape_5 = tf.shape(image)
W_a = 224
H_a = 224
p_size_a = NUM_OF_CLASSESS
# x = tf.reshape(conv_t1, [shape_5[0],H_a,W_a, p_size_a])
x = tf.transpose(conv_t3, perm=[0,2,1,3])
x = tf.reshape(x,[-1,H_a,p_size_a])
mat = tf.unstack(x, H_a, 1)
lstm_fw_cell = rnn.BasicLSTMCell(N_HIDDEN, forget_bias=1.0)
lstm_bw_cell = rnn.BasicLSTMCell(N_HIDDEN, forget_bias=1.0)
#with tf.variable_scope('rnn1_1'):
try:
outputs, _, _ = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, mat,
dtype=tf.float32,scope='rnn1_1')
except Exception: # Old TensorFlow version only returns outputs not states
outputs = rnn.static_bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, mat,
dtype=tf.float32)
outputs1 = tf.reshape(outputs,[H_a, shape_5[0], W_a, 2 * N_HIDDEN])
outputs1 = tf.transpose(outputs1,(1,0,2,3))
x_1 = tf.reshape(outputs1,[-1,W_a,2 * N_HIDDEN])
mat_1 = tf.unstack(x_1, W_a, 1)
lstm_lw_cell = rnn.BasicLSTMCell(N_HIDDEN, forget_bias=1.0)
lstm_rw_cell = rnn.BasicLSTMCell(N_HIDDEN, forget_bias=1.0)
#with tf.variable_scope('rnn1_2'):
try:
outputs2, _, _ = rnn.static_bidirectional_rnn(lstm_lw_cell, lstm_rw_cell, mat_1,
dtype=tf.float32,scope = 'rnn1_2')
except Exception: # Old TensorFlow version only returns outputs not states
outputs2 = rnn.static_bidirectional_rnn(lstm_lw_cell, lstm_rw_cell, mat_1,
dtype=tf.float32)
outputs2 = tf.reshape(outputs,[W_a, shape_5[0], H_a, 2 * N_HIDDEN])
outputs2 = tf.transpose(outputs2,(1,2,0,3))
///////////////////////////////////////////////////till here
annotation_pred = tf.argmax(outputs2, dimension=3, name="prediction")
return tf.expand_dims(annotation_pred, dim=3), outputs2
and this part is for the training:
def train(loss_val, var_list):
optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
grads = optimizer.compute_gradients(loss_val, var_list=var_list)
if FLAGS.debug:
# print(len(var_list))
for grad, var in grads:
utils.add_gradient_summary(grad, var)
return optimizer.apply_gradients(grads)
def main(argv=None):
keep_probability = tf.placeholder(tf.float32, name="keep_probabilty")
image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image")
annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation")
pred_annotation, logits = inference(image, keep_probability)
tf.summary.image("input_image", image, max_outputs=2)
tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2)
tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2)
loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
labels=tf.squeeze(annotation, squeeze_dims=[3]),
name="entropy")))
tf.summary.scalar("entropy", loss)
trainable_var = tf.trainable_variables()
if FLAGS.debug:
for var in trainable_var:
utils.add_to_regularization_and_summary(var)
train_op = train(loss, trainable_var)
print("Setting up summary op...")
summary_op = tf.summary.merge_all()
print("Setting up image reader...")
train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir)
print(len(train_records))
print(len(valid_records))
print("Setting up dataset reader")
image_options = {'resize': True, 'resize_size': IMAGE_SIZE}
if FLAGS.mode == 'train':
train_dataset_reader = dataset.BatchDatset(train_records, image_options)
validation_dataset_reader = dataset.BatchDatset(valid_records, image_options)
sess = tf.Session()
print("Setting up Saver...")
saver = tf.train.Saver()
summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph)
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print("Model restored...")
if FLAGS.mode == "train":
for itr in xrange(MAX_ITERATION):
train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size)
feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85}
sess.run(train_op, feed_dict=feed_dict)
if itr % 10 == 0:
train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict)
print("Step: %d, Train_loss:%g" % (itr, train_loss))
summary_writer.add_summary(summary_str, itr)
if itr % 500 == 0:
valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size)
valid_loss = sess.run(loss, feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss))
saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr)
elif FLAGS.mode == "visualize":
valid_images, valid_annotations = validation_dataset_reader.get_random_batch(FLAGS.batch_size)
pred = sess.run(pred_annotation, feed_dict={image: valid_images, annotation: valid_annotations,
keep_probability: 1.0})
valid_annotations = np.squeeze(valid_annotations, axis=3)
pred = np.squeeze(pred, axis=3)
for itr in range(FLAGS.batch_size):
utils.save_image(valid_images[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr))
utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr))
utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5+itr))
print("Saved image: %d" % itr)
The error was described as:
Not found: Key inference/rnn1_2/fw/basic_lstm_cell/weights not found in checkpoint
So i think there must be something wrong with the variables.
I'll be very appreciate if someone could tell me how to fix it!
looking forward to your help!

Resources