Translate CNN from keras to pytorch - image

I need some help. I am trying to convert a CNN from keras to pytorch. I am reconstructing an MR image. The input is the image coming from the scanner in the fourier domain and the output is the reconstructed image. The input image has two channels (first channel: real part, second channel: imaginary part). Unfortunately, the results are quite different, so I believe I am doing something wrong. I just cannot find out myself what it is. Here is the keras code:
def AUTOMAP_Basic_Model(param):
fc_1 = keras.Input(shape=(64,64,2), name='input')
fc_2 = layers.Conv2D(64,(64,1),strides=1,padding='same',activation='relu')(fc_1)
fc_4 = layers.Conv2D(64,(1,64),strides=1,padding='same',activation='relu')(fc_2)
fc_4 = layers.Conv2D(64,(64,1),strides=1,padding='same',activation='relu')(fc_4)
fc_5 = layers.Conv2D(64,(1,64),strides=1,padding='same',activation='relu')(fc_4)
c_1 = layers.Conv2D(64,5,strides=1,padding='same',activation='relu')(fc_5)
c_2 = layers.Conv2D(64,5,strides=1,padding='same',activation='relu')(c_1)
c_3 = layers.Conv2DTranspose(1,7,strides=1,activation='sigmoid',padding='same')(c_2)
model = keras.Model(inputs = fc_1,outputs = c_3)
return model
And this is my translation to pytorch:
class AUTOMAP_Basic_Model(nn.Module):
def __init__(self, inputShape, nrFilters):
super(AUTOMAP_Basic_Model, self).__init__()
self.conv1 = nn.Conv2d(2, 64, (64,1), padding='same')
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(64, 64, (1,64), padding ='same')
self.conv3 = nn.Conv2d(64, 64, (64,1), padding='same')
self.conv4 = nn.Conv2d(64,64,5,padding='same')
self.conv5 = nn.Conv2d(64,64,5,padding='same')
self.convTranspose = nn.ConvTranspose2d(64,1,7,padding=3,output_padding=0)
self.sigmoid = nn.Sigmoid()
self.tan = nn.Tanh()
def forward(self, x):
batch_size = len(x)
out = self.conv1(x)
out = self.relu(out)
out = self.conv2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.relu(out)
out = self.conv2(out)
out = self.relu(out)
out = self.conv4(out)
out = self.relu(out)
out = self.conv5(out)
out = self.relu(out)
out = self.convTranspose(out)
out=self.sigmoid(out)
return out
I am new to pytorch, thats why I do not know what could be wrong. While the keras model is converging to the right image, the pytorch model is giving me a constant value of 0.45 for every pixel in the image.

It seems like you have some strange ordering and naming in both Keras and PyTorch. In the first one fc_3 is missing, which might still be viable but I don't understand why.
In the PyTorch implementation, you need to have 4 conv layer (fc_2, fc_3, fc_4, fc_5), but you only implemented 3 (conv1, conv2, conv3).
I'd rewrite the model with something like:
class AUTOMAP_Basic_Model(nn.Module):
def __init__(self, inputShape, nrFilters):
super(AUTOMAP_Basic_Model, self).__init__()
self.conv1 = nn.Conv2d(2, 64, (64, 1), padding='same')
self.conv2 = nn.Conv2d(64, 64, (1, 64), padding='same')
self.conv3 = nn.Conv2d(64, 64, (64, 1), padding='same')
self.conv4 = nn.Conv2d(64, 64, (1, 64), padding='same')
self.conv5 = nn.Conv2d(64, 64, 5, padding='same')
self.conv6 = nn.Conv2d(64, 64, 5, padding='same')
self.relu = nn.ReLU(inplace=True)
self.convTranspose = nn.ConvTranspose2d(64, 1, 7, padding=3, output_padding=0)
self.sigmoid = nn.Sigmoid()
self.tan = nn.Tanh()
def forward(self, x):
batch_size = len(x)
out = self.relu(self.conv1(x))
out = self.relu(self.conv2(out))
out = self.relu(self.conv3(out))
out = self.relu(self.conv4(out))
out = self.relu(self.conv5(out))
out = self.relu(self.conv6(out))
out = self.convTranspose(out)
out = self.sigmoid(out)
return out
I'd also suggest using Conv+BN+Relu

Related

Optuna LightGBM RuntimeError Directions

I'm attempting to tune the hyperparameters of my lightGBM model but I keep getting the same error:
RuntimeError: A single direction cannot be retrieved from a multi-objective study. Consider using Study.directions to retrieve a list containing all directions.
Which is really confusing because I'm following the advice explained in this answer which means I'm passing a list of directions to the study. Any and all help will be greatly appreciated.
def objective(trial, X, y, group):
param = {
"objective": "binary",
"metric": "auc",
"verbosity": -1,
"boosting_type": "gbdt",
"lambda_l1": trial.suggest_float("lambda_l1", 1e-8, 10.0, log=True),
"lambda_l2": trial.suggest_float("lambda_l2", 1e-8, 10.0, log=True),
"num_leaves": trial.suggest_int("num_leaves", 2, 256),
"feature_fraction": trial.suggest_float("feature_fraction", 0.4, 1.0),
"bagging_fraction": trial.suggest_float("bagging_fraction", 0.4, 1.0),
"bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
"min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
}
cv = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=RANDOM_STATE)
cv_scores = np.empty(5)
auc_scores = np.empty(5)
for idx, (train_idx, test_idx) in enumerate(cv.split(X, y,groups=group)):
X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]
pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "auc")
model = lgb.LGBMClassifier(**param)
model.fit(
X_train,
y_train,
eval_set=[(X_test, y_test)],
early_stopping_rounds=100,
callbacks=[pruning_callback])
preds = model.predict_proba(X_test)
cv_scores[idx] = log_loss(y_test, preds)
auc_scores[idx] = roc_auc_score(y_test, preds)
return np.mean(cv_scores), np.mean(auc_scores)
study = optuna.create_study(directions=["minimize", "maximize"], study_name="LGBM Classifier")
func = lambda trial: objective(trial, X, y, group)
study.optimize(func, n_trials=2)

VGG16 Custom Activation Function used in ResNet function

Here's my code:
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.datasets import cifar10
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
def main():
#loading data and image augmentation
(X_train, Y_train), (X_test, Y_test) = keras.datasets.cifar10.load_data()
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.20, random_state=42)
X_test, X_train, X_val = X_test.astype("float32"), X_train.astype("float32"), X_val.astype("float32")
Y_train, Y_test, Y_val = keras.utils.to_categorical(Y_train, 10), keras.utils.to_categorical(Y_test, 10), keras.utils.to_categorical(Y_val, 10)
datagen = keras.preprocessing.image.ImageDataGenerator(rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)
datagen.fit(X_train)
X_train = X_train.reshape(X_train.shape[0], 32, 32, 3)
X_test = X_test.reshape(X_test.shape[0], 32, 32, 3)
X_val = X_val.reshape(X_val.shape[0], 32, 32, 3)
mean = np.mean(X_train)
std = np.std(X_train)
X_test = (X_test - mean) / std
X_val = (X_val - mean) / std
X_train = (X_train - mean) / std
#constructing ResNet function
def residual_module(layer_in, n_filters, kernel_size, padding, initializer, activation, regularizer, triple=False):
activation2 = 'linear'
filters2 = layer_in.shape[-1]
size2 = 1
conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
conv1 = layers.Activation(activation)(conv1)
batch1 = layers.BatchNormalization()(conv1)
conv2 = layers.Conv2D(filters2, size2, padding='same', kernel_regularizer=regularizer)(batch1)
conv2 = layers.Activation(activation2)(conv2)
batch2 = layers.BatchNormalization()(conv2)
if triple == True:
activation2 = activation
filters2 = n_filters
size2 = kernel_size
conv3 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch2)
batch3 = layers.BatchNormalization()(conv3)
layer_out = layers.add([batch3, layer_in])
layer_out = layers.Activation(activation)(layer_out)
else:
layer_out = layers.add([batch2, layer_in])
layer_out = layers.Activation(activation)(layer_out)
return layer_out
#VGG16 model with SIREN
weight_decay = 0.0005
model = keras.Sequential()
input_layer = layers.Input(shape=(32,32,3))
model.add(residual_module(input_layer, n_filters=64, kernel_size=(3,3), padding='same', initializer="he_uniform", activation=tf.math.sin, regularizer=keras.regularizers.l2(weight_decay)))
first = model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(residual_module(first, 128, (3,3), 'same', None, tf.math.sin, keras.regularizers.l2(weight_decay)))
second = model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(residual_module(second, 256, (3,3), 'same', None, tf.math.sin, keras.regularizers.l2(weight_decay), triple=True))
third = model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(residual_module(third, 512, (3,3), 'same', None, tf.math.sin, keras.regularizers.l2(weight_decay), triple=True))
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10))
'''
model.add(layers.Conv2D(64, (3, 3), padding='same', kernel_initializer="he_uniform", activation=tf.math.sin, input_shape=(32,32,3), kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(64, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Conv2D(128, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(128, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(256, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.Conv2D(512, (3, 3), padding='same', activation=math.sin, kernel_regularizer=keras.regularizers.l2(weight_decay)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D(pool_size=(2, 2), strides=2))
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(4096, activation="relu"))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(10))
'''
#training model
lr = 0.001
loss = keras.losses.CategoricalCrossentropy(from_logits=True)
decayed_lr = tf.keras.optimizers.schedules.ExponentialDecay(lr, 10000, 0.85, True)
optim = keras.optimizers.SGD(decayed_lr, momentum=0.9, nesterov=True)
batch_size = 128
#optim = keras.optimizers.Adam(decayed_lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(loss=loss, optimizer=optim, metrics=["accuracy"])
checkpoint_filepath = '/Users/JamesRONewton/Documents/Programming/MachineLearning/SIREN_projects/BrainTumor/checkpoint.hdf5'
checkpoint = keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath, monitor='accuracy', verbose=2, save_best_only=True, save_weights_only=True, mode='auto', save_freq ="epoch")
try:
model.load_weights(checkpoint_filepath, custom_objects = {"sin": tf.math.sin})
except Exception as e:
pass
model.fit(datagen.flow(X_train, Y_train, batch_size=batch_size), steps_per_epoch = len(X_train) / batch_size, epochs=25, callbacks = [checkpoint], validation_data=(X_val, Y_val))
model.evaluate(X_test, Y_test, verbose=1)
#saving model
model.save("VGG16.h5")
if __name__ == '__main__':
main()
And here's the error I keep getting:
TypeError: The added layer must be an instance of class Layer. Received: layer=KerasTensor(type_spec=TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), name='activation_2/Sin:0', description="created by layer 'activation_2'") of type <class 'keras.engine.keras_tensor.KerasTensor'>.
The errors so far have mostly been about inputting a custom activation function into the ResNet function I created. For example,
TypeError: The added layer must be an instance of class Layer. Received: layer=KerasTensor(type_spec=TensorSpec(shape=(None, 32, 32, 3), dtype=tf.float32, name=None), name='activation/Sin:0', description="created by layer 'activation'") of type <class 'keras.engine.keras_tensor.KerasTensor'>.
So I thought maybe using
layers.Activation(activation)
instead of just putting the activation in the Conv2D layer would fix it, but that clearly did not work, as you can see. I've also tried defining the custom activation function as a class inheriting from layers.Layer, but that also did not work. I used this code to try that:
class Sin(layers.Layer):
def __init__(self, **kwargs):
super(Sin, self).__init__(**kwargs)
def call(self, inputs):
return tf.math.sin(inputs)
But alas, it did not work.
-Update!
I tried using keras backend, but that failed. I also tried using a lambda layer in my ResNet function. Here's my most recent attempt, which combines both:
#custom sinusoidal activation function
def sin(x):
return K.sin(x)
#constructing ResNet function
def residual_module(layer_in, n_filters, kernel_size, padding, initializer, regularizer):
conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
conv1 = layers.Lambda(lambda x: sin(x))(conv1)
batch1 = layers.BatchNormalization()(conv1)
conv2 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch1)
batch2 = layers.BatchNormalization()(conv2)
layer_out = layers.add([batch2, layer_in])
layer_out = layers.Lambda(lambda x: sin(x))(layer_out)
return layer_out
def residual_module_triple(layer_in, n_filters, kernel_size, padding, initializer, regularizer):
conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
conv1 = layers.Lambda(lambda x: sin(x))(conv1)
batch1 = layers.BatchNormalization()(conv1)
conv2 = layers.Conv2D(n_filters, kernel_size, padding='same', kernel_regularizer=regularizer)(batch1)
conv2 = layers.Lambda(lambda x: sin(x))(conv2)
batch2 = layers.BatchNormalization()(conv2)
conv3 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch2)
batch3 = layers.BatchNormalization()(conv3)
layer_out = layers.add([batch3, layer_in])
layer_out = layers.Lambda(lambda x: sin(x))(layer_out)
return layer_out
I got it up and working so I can resume training!! Here's my fixed code:
def main():
import os
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.datasets import cifar10
from sklearn.model_selection import train_test_split
import numpy as np
import matplotlib.pyplot as plt
from keras import backend as K
#loading data and image augmentation
(X_train, Y_train), (X_test, Y_test) = keras.datasets.cifar10.load_data()
X_train, X_val, Y_train, Y_val = train_test_split(X_train, Y_train, test_size=0.20, random_state=42)
X_test, X_train, X_val = X_test.astype("float32"), X_train.astype("float32"), X_val.astype("float32")
Y_train, Y_test, Y_val = keras.utils.to_categorical(Y_train, 10), keras.utils.to_categorical(Y_test, 10), keras.utils.to_categorical(Y_val, 10)
datagen = keras.preprocessing.image.ImageDataGenerator(rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)
datagen.fit(X_train)
X_train = X_train.reshape(X_train.shape[0], 32, 32, 3)
X_test = X_test.reshape(X_test.shape[0], 32, 32, 3)
X_val = X_val.reshape(X_val.shape[0], 32, 32, 3)
mean = np.mean(X_train)
std = np.std(X_train)
X_test = (X_test - mean) / std
X_val = (X_val - mean) / std
X_train = (X_train - mean) / std
#custom sinusoidal activation function
class sin(layers.Layer):
def __init__(self, **kwargs):
super(sin, self).__init__(**kwargs)
def call(self, inputs, **kwargs):
return K.sin(inputs)
#constructing ResNet function
def residual_module(layer_in, n_filters, kernel_size, padding, initializer, regularizer):
inputs = layer_in
conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
conv1 = layers.Lambda(lambda x: sin()(x))(conv1)
batch1 = layers.BatchNormalization()(conv1)
conv2 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch1)
batch2 = layers.BatchNormalization()(conv2)
layer_out = layers.add([batch2, inputs])
layer_out = layers.Lambda(lambda x: sin()(x))(layer_out)
return layer_out
def residual_module_triple(layer_in, n_filters, kernel_size, padding, initializer, regularizer):
inputs = layer_in
conv1 = layers.Conv2D(n_filters, kernel_size, padding=padding, kernel_initializer=initializer, kernel_regularizer=regularizer)(layer_in)
conv1 = layers.Lambda(lambda x: sin()(x))(conv1)
batch1 = layers.BatchNormalization()(conv1, training=True)
conv2 = layers.Conv2D(n_filters, kernel_size, padding='same', kernel_regularizer=regularizer)(batch1)
conv2 = layers.Lambda(lambda x: sin()(x))(conv2)
batch2 = layers.BatchNormalization()(conv2, training=True)
conv3 = layers.Conv2D(layer_in.shape[-1], 1, padding='same', activation='linear', kernel_regularizer=regularizer)(batch2)
batch3 = layers.BatchNormalization()(conv3, training=True)
layer_out = layers.add([batch3, inputs])
layer_out = layers.Lambda(lambda x: sin()(x))(layer_out)
return layer_out
#VGG16 model with SIREN
weight_decay = 0.0005
inputs = layers.Input(shape=(32,32,3))
x = residual_module(inputs, n_filters=64, kernel_size=(3,3), padding='same', initializer="he_uniform", regularizer=keras.regularizers.l2(weight_decay))
x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
x = residual_module(x, 128, (3,3), 'same', None, keras.regularizers.l2(weight_decay))
x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
x = residual_module_triple(x, 256, (3,3), 'same', None, keras.regularizers.l2(weight_decay))
x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
x = residual_module_triple(x, 512, (3,3), 'same', None, keras.regularizers.l2(weight_decay))
x = layers.MaxPooling2D(pool_size=(2, 2), strides=2)(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(4096, activation="relu")(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(4096, activation="relu")(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(10)(x)
model = keras.Model(inputs, outputs)
#training model
lr = 0.001
loss = keras.losses.CategoricalCrossentropy(from_logits=True)
decayed_lr = tf.keras.optimizers.schedules.ExponentialDecay(lr, 10000, 0.85, True)
optim = keras.optimizers.SGD(decayed_lr, momentum=0.9, nesterov=True)
batch_size = 128
model.compile(loss=loss, optimizer=optim, metrics=["accuracy"])
checkpoint_filepath = '/Users/JamesRONewton/Documents/Programming/MachineLearning/SIREN_projects/BrainTumor/checkpoint.hdf5'
checkpoint = keras.callbacks.ModelCheckpoint(filepath = checkpoint_filepath, monitor='accuracy', verbose=2, save_best_only=True, save_weights_only=True, mode='auto', save_freq ="epoch")
try:
model.load_weights(checkpoint_filepath, custom_objects = {"sin": tf.math.sin})
except Exception as e:
pass
model.fit(datagen.flow(X_train, Y_train, batch_size=batch_size), steps_per_epoch = len(X_train) / batch_size, epochs=25, callbacks = [checkpoint], validation_data=(X_val, Y_val))
model.evaluate(X_test, Y_test, verbose=1)
#saving model
model.save("VGG16.h5")
if __name__ == '__main__':
main()
Two issues with my code were causing the errors:
I found out that when I was adding a layer, it didn't return anything (e.g. "first" was just a NoneType), so I used "x" as the output of each variable and chained them together with the Model class.
I figured out how to modify the "sin" class I defined to accept multiple arguments. Also, I decided to pass a class function to the Lambda layer instead of a function because it kept passing the Tensor returned from function instead of the function itself.

RuntimeError:shape ‘[4, 98304]’ is invalid for input of size 113216

I am learning to train a basic nn model for image classification, the error happened when I was trying to feed in image data into the model. I understand that I should input correct size of image data. My image data is 128*256 with 3 channels,4 classes, and the batch size is 4. What I don't understand is where does the size 113216 come from? I checked all related parameters or image meta data, but didn't find a clue. Here is my code:
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(3*128*256, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(4, 3*128*256)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
for epoch in range(2): # loop over the dataset multiple times
print('round start')
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
print(inputs.shape)
outputs = net(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' %
(epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
Thanks for your help!
Shapes
Conv2d changes width and height of image without padding. Rule of thumb (if you want to keep the same image size with stride=1 (default)): padding = kernel_size // 2
You are changing number of channels, while your linear layer has 3 for some reason?
Use print(x.shape) after each step if you want to know how your tensor data is transformed!
Commented code
Fixed code with comments about shapes after each step:
class Net(torch.nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = torch.nn.Conv2d(3, 6, 5)
self.pool = torch.nn.MaxPool2d(2, 2)
self.conv2 = torch.nn.Conv2d(6, 16, 5)
# Output shape from convolution is input shape to fc
self.fc1 = torch.nn.Linear(16 * 29 * 61, 120)
self.fc2 = torch.nn.Linear(120, 84)
self.fc3 = torch.nn.Linear(84, 10)
def forward(self, x):
# In: (4, 3, 128, 256)
x = F.relu(self.conv1(x))
# (4, 3, 124, 252) because kernel_size=5 takes 2 pixels
x = self.pool(x)
# (4, 6, 62, 126) # Because pooling halving the size
x = F.relu(self.conv2(x))
# (4, 16, 58, 122) # Same reason as above
x = self.pool(x)
# (4, 16, 29, 61) Because pooling halving the size
# Better use torch.flatten(x, dim=1) so you don't have to input size here
x = x.view(-1, 16 * 29 * 61) # Use -1 to be batch size independent
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
Other things that might help
Try torch.nn.AdaptiveMaxPool2d(1) before ReLU, it will make your network width and height independent
Use flatten (or torch.nn.Flatten() layer) after this pooling
If so, pass num_channels set in last convolution as in_features for nn.Linear

How to set the validation data for a concatenated deep CNN models?

I have built a CNN model (keras-2.1.6) with two different structures, each with different set of input data.
I am trying to use validation set in the model fitting. I couldn't get the valid dimension of "validation_data" since I have two different sets of data to be tested.
validation_data = ([tvar_test_data, mfcc_test_data], mfcc_test_labels)
With "tvar_test_data" and "mfcc_test_data" have equal dimensions of (40754, 12, 96) (samples, height, width)
The model:
branch_tvar = Sequential()
branch_tvar.add(Conv2D(kernel_size=8, strides=1, filters=6, padding='same',
input_shape=(n,m,1), activation='relu'))
branch_tvar.add(MaxPooling2D(pool_size=2, strides=2))
branch_tvar.add(Flatten())
branch_tvar.add(Dense(512, activation='relu'))
branch_tvar.add(Dropout(0.2))
branch_mfcc = Sequential()
branch_mfcc.add(Conv2D(kernel_size=16, strides=1, filters=5, padding='same',
input_shape=(n,m,1), activation='relu'))
branch_mfcc.add(MaxPooling2D(pool_size=2, strides=2))
branch_mfcc.add(Dense(512, activation='relu'))
branch_mfcc.add(Dropout(0.2))
branch_mfcc.add(Dense(512, activation='relu'))
branch_mfcc.add(Dropout(0.2))
model = Sequential()
model.add(Concatenate([branch_tvar, branch_mfcc]))
model.add(Dense(number_of_classes, activation='softmax'))
optimizer = Adam(lr=0.000384305959)
model.compile(loss = 'binary_crossentropy', optimizer = optimizer, metrics = ['accuracy'])
seed(2017)
model.fit([tvar_train_data, mfcc_train_data], tvar_train_labels,
batch_size = 128, nb_epoch = 10, verbose = 1,
validation_data=validation_data)
Problem solved. The labels dimension was not correct.
from keras.layers import concatenate
from keras.layers import Dropout, Dense, Flatten, MaxPooling2D, Conv2D
from keras.models import Input, Model
from keras.optimizers import Adam
input_tvar = Input(shape=(n,m,1))
tvar_branch = Conv2D(kernel_size=8, strides=1, filters=6, padding='same',
activation='relu')(input_tvar)
tvar_branch = MaxPooling2D(pool_size=2, strides=2)(tvar_branch)
tvar_branch = Flatten()(tvar_branch)
tvar_branch = Dense(512, activation='relu')(tvar_branch)
tvar_branch = Dropout(0.2)(tvar_branch)
input_mfcc = Input(shape=(n,m,1))
mfcc_branch = Conv2D(kernel_size=16, strides=1, filters=5, padding='same',
activation='relu')(input_tvar)
mfcc_branch = MaxPooling2D(pool_size=2, strides=2)(mfcc_branch)
mfcc_branch = Dense(512, activation='relu')(tvar_branch)
mfcc_branch = Dropout(0.2)(mfcc_branch)
mfcc_branch = Dense(512, activation='relu')(tvar_branch)
mfcc_branch = Dropout(0.2)(mfcc_branch)
con = concatenate(inputs = [tvar_branch,mfcc_branch] ) # merge in metadata
tvar_mfcc = Dense(50)(con)
tvar_mfcc = Dropout(0.3)(tvar_mfcc)
output = Dense(number_of_classes, activation='relu')(tvar_mfcc)
tvar_mfcc_net = Model(inputs=[input_tvar, input_mfcc], outputs=output)
optimizer = Adam(lr=0.000384305959)
tvar_mfcc_net.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
#%%
seed(2017)
tvar_mfcc_net.fit([tvar_train_data, mfcc_train_data], tvar_train_labels,
batch_size = 128, epochs = 10, verbose = 1,
validation_data=validation_data)

CNN model on image classification is not convergent, which is based on Tensorflow

I try to train a CNN model, 2 classes, which is based on tensorflow to do the image classification.
I have tried much modification about epochs, learning rate, batch size and the CNN size, but nothing works.
about data
86(label: 0) + 63(label: 1) images
shape: (128, 128)
about current parameters
learning_rate = 0.00005(I have tried from 0.00000001 to 0.8...)
batch size = 30(I also have tried from 5 to 130)
epoch = 20
about network
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev = 0.1, dtype = tf.float32)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape = shape, dtype = tf.float32)
return tf.Variable(initial)
def conv2d(x, W):
#(input, filter, strides, padding)
#[batch, height, width, in_channels]
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
#(value, ksize, strides, padding)
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
def cnn_model():
epochs = 20
batch_size = 30
learning_rate = 0.00005
hidden = 2
cap_c = 86
cap_h = 63
num = cap_c + cap_h
image_size = 128
label_size = 2
print ((num//(batch_size)) * epochs)
train_loss = np.empty((num//(batch_size)) * epochs)
train_acc = np.empty((num//(batch_size)) * epochs)
x = tf.placeholder(tf.float32, shape = [None, image_size, image_size])
y = tf.placeholder(tf.float32, shape = [None, label_size])
weight_balance = tf.constant([0.1])
X_train_ = tf.reshape(x, [-1, image_size, image_size, 1])
#First layer
W_conv1 = weight_variable([5, 5, 1, 4])
b_conv1 = bias_variable([4])
h_conv1 = tf.nn.relu(conv2d(X_train_, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# #Second layer
# W_conv2 = weight_variable([5, 5, 4, 8])
# b_conv2 = bias_variable([8])
#
# h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
# h_pool2 = max_pool_2x2(h_conv2)
#
# Third layer
# W_conv3 = weight_variable([5, 5, 8, 16])
# b_conv3 = bias_variable([16])
#
# h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
# h_pool3 = max_pool_2x2(h_conv3)
#Full connect layer
W_fc1 = weight_variable([64 * 64 * 4, hidden])
b_fc1 = bias_variable([hidden])
h_pool2_flat = tf.reshape(h_pool1, [-1, 64 * 64 * 4])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
keep_prob = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
#Output_Softmax
W_fc2 = weight_variable([hidden, label_size])
b_fc2 = bias_variable([label_size])
y_conv = tf.nn.softmax(tf.matmul(h_fc1_drop, W_fc2) + b_fc2)
print y_conv.shape
#Train
loss = tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(y, y_conv, weight_balance))
optimize = tf.train.AdamOptimizer(learning_rate).minimize(loss)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
about result
The loss is not convergent and also the accuracy.
I don't know if my CNN model is not suitable for my data?
or
The Activate function and loss function of the network is not suitable?
Really thank you
There are couple of problems with the code:
You are applying softmax on the last layer and then calling tf.nn.weighted_cross_entropy_with_logits which in turn applies sigmoid activation, so you are applying activation twice.
For initialisation of the weights, use Xavier or Variance_scaling for faster convergence. Better to use tf.layers API in implementing your model as its default settings follows best practices.

Resources