I am trying to do hyperparametric tuning using the following code :
def df_to_new(df,window_size):
df_as_np = df.to_numpy()
X = []
y = []
for i in range(len(df_as_np)-window_size):
row = [[a] for a in df_as_np[i:i+window_size]]
X.append(row)
label = df_as_np[i+window_size]
y.append(label)
return np.array(X),np.array(y)
Xgrid,Xnotuse,ygrid,ynouse = train_test_split(Xtrain,ytrain,test_size=0.8)
input_dim = Xgrid.shape[1]
def define_model(learning_rate=0.01):
model = Sequential()
model.add(LSTM(128,input_dim = input_dim))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(1, activation='linear'))
optimizer = Adam(lr=learning_rate)
# compile the model
model.compile(loss=MeanSquaredError(),
optimizer=optimizer,
metrics=[RootMeanSquaredError()])
return model
model = KerasClassifier(build_fn=define_model,
epochs=epochs,
batch_size = batch_size,
verbose=1)
learning_rate = [0.0001, 0.001, 0.01, 0.1]
epochs = [10, 30, 60, 150]
batch_size = [5, 15, 25, 50]
param_grid = dict(learning_rate=learning_rate, epochs=epochs,batch_size=batch_size)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(Xgrid, ygrid)
But I am getting the following error:
ValueError: Invalid parameter learning_rate for estimator KerasClassifier.
This issue can likely be resolved by setting this parameter in the KerasClassifier constructor:
KerasClassifier(learning_rate=0.0001)
Check the list of available parameters with estimator.get_params().keys()
What I'm trying to do is to get the best epochs, batch_size, and learning_rate combination for my LSTM model. Any help?
Related
I am doing classification text and for the training of the model I am using trainer function from HuggingFace, the code is:
def get_model(name_model):
model = AutoModelForSequenceClassification.from_pretrained(
name_model,
num_labels=2,
problem_type = "single_label_classification"
)
return model
model = get_model(name_model)
training_args = TrainingArguments(
learning_rate = 3e-5,
max_grad_norm = 1.0,
#weight_decay = 0.01,
num_train_epochs = 3,
per_device_train_batch_size = 32,
per_device_eval_batch_size = 1,
logging_steps = 300,
output_dir = "./training_output",
overwrite_output_dir = True,
seed =42,
fp16=True,
remove_unused_columns = False
)
trainer = Trainer(
model = model,
args = training_args,
train_dataset = train
)
trainer.args._n_gpu = 2
So, when it finish to train the model (which is BERT model) it says
I am afraid that the model is not correctly trained and that predictions that made are not okay.
Do you know how to fix this?, with only one gpu the are not warnings.
I tried to set fp16=True because I read in another forum that it could help, and I tried to set is_model_parallel= True but I didn't fix it. I tried too to set place_model_on_device = True too but did not work.
I am working with the huggingface transformers and training a pretrained byt5-small on my data. I am also trying to do hyperparameter search using Trainer API with optuna as backend. But the following error is appearing every time. Please help me out. below is the while code.
transformer version = 4.23.1
from transformers import HfArgumentParser, TensorFlowBenchmark, TensorFlowBenchmarkArguments
import pandas as pd
from transformers import T5ForConditionalGeneration, ByT5Tokenizer
from transformers import TrainingArguments
from torch.utils.data import Dataset, DataLoader
from transformers import Trainer
import datasets
import transformers
from tqdm import tqdm
from numba import cuda
device = cuda.get_current_device()
device.reset()
train_df = pd.read_csv("/home/bhavuk/project1/data/train_split.csv")
eval_df = pd.read_csv("/home/bhavuk/project1/data/eval_split.csv")
test_df = pd.read_csv("/home/bhavuk/project1/data/test_split.csv")
train_df = train_df.dropna()
eval_df = eval_df.dropna()
test_df = test_df.dropna(subset=["Hypothesis","Reference"])
train_df.shape, eval_df.shape[0], test_df.shape[0]
args_dict = {
"output_dir": './byt5-small-hp-search',
"overwrite_output_dir": True,
"per_device_train_batch_size": 2,
"per_device_eval_batch_size": 2,
"gradient_accumulation_steps": 4,
"learning_rate": 1e-1,
"warmup_steps": 2,
"logging_steps": 100,
"evaluation_strategy": "steps",
"eval_steps": 250,
"num_train_epochs": 4,
"do_train": True,
"do_eval": True,
"fp16": False,
"max_steps": 100000,
"load_best_model_at_end":True,
"logging_dir": './logs',
"save_total_limit" : 2,
"weight_decay" : 0.1,
"label_smoothing_factor" : 0.1
}
parser = HfArgumentParser(
(TrainingArguments))
training_args = parser.parse_dict(args_dict)
args = training_args[0]
def optuna_hp_space(trial):
return {
"learning_rate": trial.suggest_float("learning_rate", 1e-5, 1e-1, log=True),
"dropout_rate": trial.suggest_float("dropout_rate", 0.1, 0.6, step=0.1),
"weight_decay": trial.suggest_float("weight_decay", 0.1, 0.3, step=0.1),
"label_smoothing_factor": trial.suggest_float("label_smoothing_factor", 0.1, 0.3, step=0.1)
}
config = '/home/bhavuk/project1/notebooks/models--google--byt5-small/snapshots/ce8f3a48ed7676af36476a01fb01f95ea529599c/config.json'
def model_init(trial):
return T5ForConditionalGeneration.from_pretrained(
'google/byt5-small',
config=config,
dropout_rate = 0.1
)
tokenizer = ByT5Tokenizer.from_pretrained(
"google/byt5-small",
cache_dir=".",
max_length=512
)
class GPReviewDataset(Dataset):
def __init__(self, Text, Label):
self.Text = Text
self.Label = Label
# self.tokenizer = tokenizer
# self.max_len = max_len
def __len__(self):
return len(self.Text)
def __getitem__(self, item):
Text = str(self.Text[item])
Label = self.Label[item]
inputs = tokenizer(Text, padding="max_length", truncation=True, max_length=512)
outputs = tokenizer(Label, padding="max_length", truncation=True, max_length=512)
return {
"input_ids":inputs.input_ids,
"attention_mask" : inputs.attention_mask,
"labels" : outputs.input_ids,
"decoder_attention_mask" : outputs.attention_mask,
# "labels" : lbz
}
ds_train = GPReviewDataset(
Text=train_df.Hypothesis.to_numpy(),
Label=train_df.Reference.to_numpy()
ds_test = GPReviewDataset(
Text=eval_df.Hypothesis.to_numpy(),
Label=eval_df.Reference.to_numpy()
# tokenizer=tokenizer,
# max_len=max_len
)
train_dataset = ds_train
valid_dataset = ds_test
trainer = Trainer(
model=None,
args=args,
train_dataset=train_dataset,
eval_dataset=valid_dataset,
tokenizer=tokenizer,
model_init=model_init
)
best_trial = trainer.hyperparameter_search(
direction="minimize",
backend="optuna",
hp_space=optuna_hp_space,
n_trials=20
)
ERROR:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/home/bhavuk/project1/notebooks/byT5small_hp_search_2.ipynb Cell 14 in <cell line: 1>()
----> 1 best_trial = trainer.hyperparameter_search(
2 direction="minimize",
3 backend="optuna",
4 hp_space=optuna_hp_space,
5 n_trials=20
6 )
File ~/anaconda3/envs/cvenv/lib/python3.9/site-packages/transformers/trainer.py:2368, in Trainer.hyperparameter_search(self, hp_space, compute_objective, n_trials, direction, backend, hp_name, **kwargs)
2360 self.compute_objective = default_compute_objective if compute_objective is None else compute_objective
2362 backend_dict = {
2363 HPSearchBackend.OPTUNA: run_hp_search_optuna,
2364 HPSearchBackend.RAY: run_hp_search_ray,
2365 HPSearchBackend.SIGOPT: run_hp_search_sigopt,
2366 HPSearchBackend.WANDB: run_hp_search_wandb,
2367 }
-> 2368 best_run = backend_dict[backend](self, n_trials, direction, **kwargs)
2370 self.hp_search_backend = None
2371 return best_run
File ~/anaconda3/envs/cvenv/lib/python3.9/site-packages/transformers/integrations.py:189, in run_hp_search_optuna(trainer, n_trials, direction, **kwargs)
187 n_jobs = kwargs.pop("n_jobs", 1)
188 study = optuna.create_study(direction=direction, **kwargs)
...
return forward_call(*input, **kwargs)
File "/home/bhavuk/anaconda3/envs/cvenv/lib/python3.9/site-packages/transformers/models/t5/modeling_t5.py", line 937, in forward
raise ValueError(f"You have to specify either {err_msg_prefix}input_ids or {err_msg_prefix}inputs_embeds")
ValueError: You have to specify either decoder_input_ids or decoder_inputs_embeds
Error message
Traceback (most recent call last):
File "pred.py", line 134, in
output = model(data)
Runtime Error: Expected 4-dimensional input for 4-dimensional weight [16, 3, 3, 3], but got 3-dimensional input of size [1, 32, 32] instead.
Prediction code
normalize = transforms.Normalize(mean=[0.4914, 0.4824, 0.4467],
std=[0.2471, 0.2435, 0.2616])
train_set = transforms.Compose([
transforms.RandomCrop(32, padding=4),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize,
])
model = models.condensenet(args)
model = nn.DataParallel(model)
PATH = "results/savedir/save_models/checkpoint_001.pth.tar"
model.load_state_dict(torch.load(PATH)['state_dict'])
device = torch.device("cpu")
model.eval()
image = Image.open("horse.jpg")
input = train_set(image)
train_loader = torch.utils.data.DataLoader(
input,
batch_size=1,shuffle=True, num_workers=1)
for i, data in enumerate(train_loader):
#input_var = torch.autograd.Variable(data, volatile=True)
#input_var = input_var.view(1, 3, 32,32)
**output = model(data)
topk=(1,5)
maxk = max(topk)
_, pred = output.topk(maxk, 1, True, True)
Am getting this error when am trying to predict on a single image
Image shape/size error message
Link to saved model
Training code repository
Plz uncomment this line #input_var = input_var.view(1, 3, 32,32) so that your input dimension is 4.
I assume that your no. of input channels are 3 if its one then use input_var = input_var.view(1, 1, 32,32) if gray scale
Instead of doing the for loop and train_loader, solved this by just passing the input directly into the model. like this
input = train_set(image)
input = input.unsqueeze(0)
model.eval()
output = model(input)
More details can be found here link
I am working on a deep learning problem. I am solving it using pytorch. I have two GPU's which are on the same machine (16273MiB,12193MiB). I want to use both the GPU's for my training (video dataset).
I get a warning:
There is an imbalance between your GPUs. You may want to exclude GPU 1 which
has less than 75% of the memory or cores of GPU 0. You can do so by setting
the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
environment variable.
warnings.warn(imbalance_warn.format(device_ids[min_pos], device_ids[max_pos]))
I also get an error:
raise TypeError('Broadcast function not implemented for CPU tensors')
TypeError: Broadcast function not implemented for CPU tensors
if __name__ == '__main__':
opt.scales = [opt.initial_scale]
for i in range(1, opt.n_scales):
opt.scales.append(opt.scales[-1] * opt.scale_step)
opt.arch = '{}-{}'.format(opt.model, opt.model_depth)
opt.mean = get_mean(opt.norm_value)
opt.std = get_std(opt.norm_value)
print("opt",opt)
with open(os.path.join(opt.result_path, 'opts.json'), 'w') as opt_file:
json.dump(vars(opt), opt_file)
torch.manual_seed(opt.manual_seed)
model, parameters = generate_model(opt)
#print(model)
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable parameters: ", pytorch_total_params)
# Define Class weights
if opt.weighted:
print("Weighted Loss is created")
if opt.n_finetune_classes == 2:
weight = torch.tensor([1.0, 3.0])
else:
weight = torch.ones(opt.n_finetune_classes)
else:
weight = None
criterion = nn.CrossEntropyLoss()
if not opt.no_cuda:
criterion = nn.DataParallel(criterion.cuda())
if opt.no_mean_norm and not opt.std_norm:
norm_method = Normalize([0, 0, 0], [1, 1, 1])
elif not opt.std_norm:
norm_method = Normalize(opt.mean, [1, 1, 1])
else:
norm_method = Normalize(opt.mean, opt.std)
train_loader = torch.utils.data.DataLoader(
training_data,
batch_size=opt.batch_size,
shuffle=True,
num_workers=opt.n_threads,
pin_memory=True)
train_logger = Logger(
os.path.join(opt.result_path, 'train.log'),
['epoch', 'loss', 'acc', 'precision','recall','lr'])
train_batch_logger = Logger(
os.path.join(opt.result_path, 'train_batch.log'),
['epoch', 'batch', 'iter', 'loss', 'acc', 'precision', 'recall', 'lr'])
if opt.nesterov:
dampening = 0
else:
dampening = opt.dampening
optimizer = optim.SGD(
parameters,
lr=opt.learning_rate,
momentum=opt.momentum,
dampening=dampening,
weight_decay=opt.weight_decay,
nesterov=opt.nesterov)
# scheduler = lr_scheduler.ReduceLROnPlateau(
# optimizer, 'min', patience=opt.lr_patience)
if not opt.no_val:
spatial_transform = Compose([
Scale(opt.sample_size),
CenterCrop(opt.sample_size),
ToTensor(opt.norm_value), norm_method
])
print('run')
for i in range(opt.begin_epoch, opt.n_epochs + 1):
if not opt.no_train:
adjust_learning_rate(optimizer, i, opt.lr_steps)
train_epoch(i, train_loader, model, criterion, optimizer, opt,
train_logger, train_batch_logger)
I have also made changes in my train file:
model = nn.DataParallel(model(),device_ids=[0,1]).cuda()
outputs = model(inputs)
It does not seem to work properly and is giving error. Please advice, I am new to pytorch.
Thanks
As mentioned in this link, you have to do model.cuda() before passing it to nn.DataParallel.
net = nn.DataParallel(model.cuda(), device_ids=[0,1])
https://github.com/pytorch/pytorch/issues/17065
I want to Visualize my CNN filters on every layer. I write a code for this but this is giving me some error.I want to see filter images of every layer and also want to see the heat maps of the area which my neural net use the most to predict the particular label. By doing this I am able to understand the working of my cnn and do further work on my model for better results
I searched it on google but I found mostly sited with theory but i need to see code for the solution
x = Conv2D(64,(3,3),strides = (1,1),name='layer_conv1',padding='same')(input)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D((2,2),name='maxPool1')(x)
x = Conv2D(64,(3,3),strides = (1,1),name='layer_conv2',padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D((2,2),name='maxPool2')(x)
x = Conv2D(32,(3,3),strides = (1,1),name='conv3',padding='same')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D((2,2),name='maxPool3')(x)
x = Flatten()(x)
x = Dense(64,activation = 'relu',name='fc0')(x)
x = Dropout(0.25)(x)
x = Dense(32,activation = 'relu',name='fc1')(x)
x = Dropout(0.25)(x)
x = Dense(2,activation = 'softmax',name='fc2')(x)
model = Model(inputs = input,outputs = x,name='Predict')
a=np.expand_dims( X_train[10],axis=0)
a.shape
from keras.models import Model
layer_outputs = [layer.output for layer in model.layers]
activation_model = Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(a)
I am getting this error
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-249-119bf7ea835a> in <module>()
2 layer_outputs = [layer.output for layer in model.layers]
3 activation_model = Model(inputs=model.input, outputs=layer_outputs)
----> 4 activations = activation_model.predict(a)
5
6
/opt/conda/lib/python3.6/site-packages/Keras-2.2.4-py3.6.egg/keras/engine/training.py in predict(self, x, batch_size, verbose, steps, callbacks)
1185 verbose=verbose,
1186 steps=steps,
-> 1187 callbacks=callbacks)
1188
1189 def train_on_batch(self, x, y,
/opt/conda/lib/python3.6/site-packages/Keras-2.2.4-py3.6.egg/keras/engine/training_arrays.py in predict_loop(model, f, ins, batch_size, verbose, steps, callbacks)
320 batch_logs = {'batch': batch_index, 'size': len(batch_ids)}
321 callbacks._call_batch_hook('predict', 'begin', batch_index, batch_logs)
--> 322 batch_outs = f(ins_batch)
323 batch_outs = to_list(batch_outs)
324 if batch_index == 0:
/opt/conda/lib/python3.6/site-packages/Keras-2.2.4-py3.6.egg/keras/backend/tensorflow_backend.py in __call__(self, inputs)
2919 return self._legacy_call(inputs)
2920
-> 2921 return self._call(inputs)
2922 else:
2923 if py_any(is_tensor(x) for x in inputs):
/opt/conda/lib/python3.6/site-packages/Keras-2.2.4-py3.6.egg/keras/backend/tensorflow_backend.py in _call(self, inputs)
2873 feed_symbols,
2874 symbol_vals,
-> 2875 session)
2876 if self.run_metadata:
2877 fetched = self._callable_fn(*array_vals, run_metadata=self.run_metadata)
/opt/conda/lib/python3.6/site-packages/Keras-2.2.4-py3.6.egg/keras/backend/tensorflow_backend.py in _make_callable(self, feed_arrays, feed_symbols, symbol_vals, session)
2825 callable_opts.run_options.CopyFrom(self.run_options)
2826 # Create callable.
-> 2827 callable_fn = session._make_callable_from_options(callable_opts)
2828 # Cache parameters corresponding to the generated callable, so that
2829 # we can detect future mismatches and refresh the callable.
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in _make_callable_from_options(self, callable_options)
1469 """
1470 self._extend_graph()
-> 1471 return BaseSession._Callable(self, callable_options)
1472
1473
/opt/conda/lib/python3.6/site-packages/tensorflow/python/client/session.py in __init__(self, session, callable_options)
1423 with errors.raise_exception_on_not_ok_status() as status:
1424 self._handle = tf_session.TF_SessionMakeCallable(
-> 1425 session._session, options_ptr, status)
1426 finally:
1427 tf_session.TF_DeleteBuffer(options_ptr)
/opt/conda/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
526 None, None,
527 compat.as_text(c_api.TF_Message(self.status.status)),
--> 528 c_api.TF_GetCode(self.status.status))
529 # Delete the underlying status object from memory otherwise it stays alive
530 # as there is a reference to status from this from the traceback due to
InvalidArgumentError: input_14:0 is both fed and fetched.
I tried by removing some layers and adding some layer but it didnt help me. I found very less code on google.
To access any layer's output, you can use function in keras, something like this:
from keras import backend as K
last_layer_output = K.function([model.layers[0].input],
[model.layers[-1].output])
layer_output = last_layer_output([x])[0]
So to access all layer's output, you can create as many such function as follows:
outputs = [layer.output for layer in model.layers]
functors = [K.function([model.input, K.learning_phase()], [out]) for out in outputs]
layer_outs = [func([x_test[:4], 1.]) for func in functors]
Note: keras-function produce one output for one layer.
More you can read it here
with this model my problem is not solving so I make a simple model and use keras fucntions to get layers output and this is easy as compared to my previous model.
model = Sequential()
model.add(Conv2D(16,kernel_size = (5,5),activation = 'relu', activity_regularizer=regularizers.l2(1e-8)))
model.add(Conv2D(32,kernel_size = (5,5),activation = 'relu', activity_regularizer = regularizers.l2(1e-8)))
model.add(MaxPooling2D(3,3))
model.add(Conv2D(64,kernel_size = (5,5),activation = 'relu', activity_regularizer = regularizers.l2(1e-8)))
model.add(MaxPooling2D(3,3))
model.add(Conv2D(128,activation = 'relu',kernel_size = (3,3),activity_regularizer = regularizers.l2(1e-8)))
model.add(Flatten())
model.add(Dropout(0.8))
model.add(Dense(64,activation = 'relu',activity_regularizer = regularizers.l2(1e-8)))
model.add(Dropout(0.8))
model.add(Dense(64,activation = 'relu',activity_regularizer = regularizers.l2(1e-8)))
model.add(Dropout(0.8))
model.add(Dense(2,activation = 'softmax'))
model.compile(loss=keras.losses.binary_crossentropy, optimizer=keras.optimizers.SGD(lr = 0.001,clipnorm = 1,momentum= 0.9), metrics=["accuracy"])
model.fit(X_train,y_train, epochs = 10 ,batch_size = 16,validation_data=(X_test,y_test_Categorical))
model.summary()
#a is my one example from test set
a=np.expand_dims( X_train[10],axis=0)
a.shape
layer_outputs = [layer.output for layer in model.layers]
activation_model = Model(inputs=model.input, outputs=layer_outputs)
activations = activation_model.predict(a)
def display_activation(activations, col_size, row_size, act_index):
activation = activations[act_index]
activation_index=0
fig, ax = plt.subplots(row_size, col_size, figsize=(row_size*2.5,col_size*1.5))
for row in range(0,row_size):
for col in range(0,col_size):
ax[row][col].imshow(activation[0, :, :, activation_index])
activation_index += 1
display_activation(activations, 4, 4,0)
by doing this I am able to get my output