Predict with pytorch lightning when using BCEWithLogitsLoss for training - pytorch-lightning

I'm trying to see how my trained model would predict a single instance of y and have of list of predicted and actual y.
It seems I'm missing a few steps and I'm not sure how to implement the predict_step, here is what I currently have:
mutag = ptgeom.datasets.TUDataset(root='.', name='MUTAG')
train_idx, test_idx = train_test_split(range(len(mutag)), stratify=[m.y[0].item() for m in mutag], test_size=0.25)
train_loader = ptgeom.loader.DataLoader(mutag[train_idx], batch_size=32, shuffle=True)
test_loader = ptgeom.loader.DataLoader(mutag[test_idx], batch_size=32)
class MUTAGClassifier(ptlight.LightningModule):
def __init__(self):
# The model is just GCNConv --> GCNConv --> graph pooling --> Dropout --> Linear
super().__init__()
self.gc1 = ptgeom.nn.GCNConv(7, 256)
self.gc2 = ptgeom.nn.GCNConv(256, 256)
self.linear = torch.nn.Linear(256, 1)
def forward(self, x, edge_index=None, batch=None, edge_weight=None):
# Note: "edge_weight" is not used for training, but only for the explainability part
if edge_index == None:
x, edge_index, batch = x.x, x.edge_index, x.batch
x = F.relu(self.gc1(x, edge_index, edge_weight))
x = F.relu(self.gc2(x, edge_index, edge_weight))
x = ptgeom.nn.global_mean_pool(x, batch)
x = F.dropout(x)
x = self.linear(x)
return x
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
def training_step(self, batch, _):
y_hat = self.forward(batch.x, batch.edge_index, batch.batch)
loss = F.binary_cross_entropy_with_logits(y_hat, batch.y.unsqueeze(1).float())
self.log("train_loss", loss)
self.log("train_accuracy", accuracy(y_hat, batch.y.unsqueeze(1)), prog_bar=True, batch_size=32)
return loss
def validation_step(self, batch, _):
x, edge_index, batch_idx = batch.x, batch.edge_index, batch.batch
y_hat = self.forward(x, edge_index, batch_idx)
self.log("val_accuracy", accuracy(y_hat, batch.y.unsqueeze(1)), prog_bar=True, batch_size=32)
checkpoint_callback = ptlight.callbacks.ModelCheckpoint(
dirpath='./checkpoints/',
filename='gnn-{epoch:02d}',
every_n_epochs=50,
save_top_k=-1)
trainer = ptlight.Trainer(max_epochs=200, callbacks=[checkpoint_callback])
trainer.fit(gnn, train_loader, test_loader)

The crux here is that you use F.binary_cross_entropy_with_logits in your training_step (for numerical stability I suppose). This means that nn.Sigmoid has to be applied to your output both in validation_step and predict_step as the operation is not part of forward(). Check this for more information. Notice that you may also need to round your predicted results depending on which accuracy method you are using in order to get correct metric results.
class MUTAGClassifier(ptlight.LightningModule):
def __init__(self):
# The model is just GCNConv --> GCNConv --> graph pooling --> Dropout --> Linear
super().__init__()
self.gc1 = ptgeom.nn.GCNConv(7, 256)
self.gc2 = ptgeom.nn.GCNConv(256, 256)
self.linear = torch.nn.Linear(256, 1)
self.s = nn.Sigmoid()
def forward(self, x, edge_index=None, batch=None, edge_weight=None):
# Note: "edge_weight" is not used for training, but only for the explainability part
if edge_index == None:
x, edge_index, batch = x.x, x.edge_index, x.batch
x = F.relu(self.gc1(x, edge_index, edge_weight))
x = F.relu(self.gc2(x, edge_index, edge_weight))
x = ptgeom.nn.global_mean_pool(x, batch)
x = F.dropout(x)
x = self.linear(x)
return x
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
def training_step(self, batch, _):
y_hat = self.forward(batch.x, batch.edge_index, batch.batch)
loss = F.binary_cross_entropy_with_logits(y_hat, batch.y.unsqueeze(1).float())
self.log("train_loss", loss)
self.log("train_accuracy", accuracy(y_hat, batch.y.unsqueeze(1)), prog_bar=True, batch_size=32)
return loss
def validation_step(self, batch, _):
x, edge_index, batch_idx = batch.x, batch.edge_index, batch.batch
y_hat = self.forward(x, edge_index, batch_idx)
y_hat = self.s(y_hat)
y_hat = torch.where(y_hat > 0.5, 1, 0) # may be needed
self.log("val_accuracy", accuracy(y_hat, batch.y.unsqueeze(1)), prog_bar=True, batch_size=32)
def predict_step(self, batch, _):
x, edge_index, batch_idx = batch.x, batch.edge_index, batch.batch
y_hat = self.forward(x, edge_index, batch_idx)
y_hat = self.s(y_hat)
y_hat = torch.where(y_hat > 0.5, 1, 0) # may be needed
return y_hat
You could then do the following in order to get a list of predictions with their corresponding ground truth:
batch = next(iter(train_loader)) # get a batch
y_hat = trainer.predict(your_model, batch)
print(list(y_hat))
print(list(batch.y))

Related

Pytorch model gradients no updating with some custom code

I have an object detection task that requires to recognise multiple objects from an image. The idea is to define a simple network from scratch (i.e. without using ready implementations of YoLo or similar algorithms). Our approach was to define an architecture that was able to detect a single bounding box, and then to execute different independent copies of the same model on the data in parallel. Our architecture was defined as the following:
class MultiObjectNet(nn.Module):
def __init__(self, image_width, image_height, num_classes=13, num_boxes=5, device=torch.device('mps')):
super(MultiObjectNet, self).__init__()
self.device = device
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.initial_conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=4, stride=2, padding=1, device=self.device)
self.initial_conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2, padding=1, device=self.device)
self.initial_conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=2, stride=2, device=self.device)
self.no_pools1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, device=self.device)
self.no_pools2 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=3, device=self.device)
self.fc1 = nn.Linear(in_features=1024, out_features=512, device=self.device)
self.fc2 = nn.Linear(in_features=512, out_features=256, device=self.device)
self.fc3 = nn.Linear(in_features=256, out_features=128, device=self.device)
self.fc4 = nn.Linear(in_features=128, out_features=64, device=self.device)
self.pc_layer = nn.Linear(in_features=64, out_features=1, device=self.device)
self.box_layer = nn.Linear(in_features=64, out_features=4, device=self.device)
self.category_layer = nn.Linear(in_features=64, out_features=num_classes, device=self.device)
self.sigmoid = nn.Sigmoid()
self.num_classes = num_classes
self.num_boxes = num_boxes
def __device__(self):
return self.device
def forward(self, x):
x = self.initial_conv1(x)
x = F.relu(x)
x = self.pool(x)
x = self.initial_conv2(x)
x = F.relu(x)
x = self.pool(x)
x = self.initial_conv3(x)
x = F.relu(x)
x = self.pool(x)
x = self.no_pools1(x)
x = F.relu(x)
x = self.no_pools2(x)
x = F.relu(x)
x = torch.transpose(x,1,3)[::,0][:,0]
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = F.relu(x)
x = self.fc3(x)
x = F.relu(x)
x = self.fc4(x)
x = F.relu(x)
return self.sigmoid(self.pc_layer(x)), self.box_layer(x), self.category_layer(x)
The network outputs a triple $(P_C, [x_{min}, y_{min}, x_{max}, y_{max}], CAT)$, where the inner list contains the coordinates of the bounding box and $CAT$ is a probability distribution over the classes of the dataset.
The training and loss calculation were computed as following:
nets = [ MultiObjectNet(resizing_width, resizing_height, device=torch.device('mps')) for _ in range(max_boxes) ]
loss_df = pd.DataFrame(index=list(range(100)), columns=["loss"])
learning_rate = 1e-02
optims = [ torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9) for net in nets ]
bce_loss = torch.nn.BCELoss()
mse_loss = torch.nn.MSELoss()
xent_loss = torch.nn.CrossEntropyLoss()
for epoch in range(5):
running_loss = 0.0
for batch_number, batch in tqdm(enumerate(train_dataloader)) :
inputs, labels = batch
batch_size = len(inputs)
for i, (optim, net, label) in enumerate(zip(optims, nets, labels)) :
pred_pc, pred_box, pred_category = net(inputs)
y_pc, y_box, y_category = label[0].reshape(batch_size, 1), label[1].reshape(batch_size, 4), label[2].reshape(batch_size, num_classes)
optim.zero_grad()
confidence_loss = bce_loss(pred_pc, y_pc)
box_loss = mse_loss(pred_box, y_box))
category_loss = xent_loss(pred_category, y_category)
loss = confidence_loss + box_loss + category_loss
loss.backward()
optim.step()
My model fails to generalize the problem and fixates on a single output, no matter the input given to the network (always outputs very similar confidence probabilities, boxes and categories distributions). By looking into the first layer parameters, I can see that the gradient is not None, thus I suppose that there is some kind of learning, but I can't understand what is happening.

SVM to multiclass SVM

I have a code snippet for implementing SVM from scratch (binary classefier)
These code wouldn't work on minist because output predictions 0 or 1
class SVM:
def __init__(self, learning_rate=1e-3, lambda_param=1e-2, n_iters=1000):
self.lr = learning_rate
self.lambda_param = lambda_param
self.n_iters = n_iters
self.w = None
self.b = None
def _init_weights_bias(self, X):
n_features = X.shape[1]
self.w = np.zeros(n_features)
self.b = 0
def _get_cls_map(self, y):
return np.where(y <= 0, -1, 1)
def _satisfy_constraint(self, x, idx):
linear_model = np.dot(x, self.w) + self.b
return self.cls_map[idx] * linear_model >= 1
def _get_gradients(self, constrain, x, idx):
if constrain:
dw = self.lambda_param * self.w
db = 0
return dw, db
dw = self.lambda_param * self.w - np.dot(self.cls_map[idx], x)
db = - self.cls_map[idx]
return dw, db
def _update_weights_bias(self, dw, db):
self.w -= self.lr * dw
self.b -= self.lr * db
def fit(self, X, y):
self._init_weights_bias(X)
self.cls_map = self._get_cls_map(y)
for _ in range(self.n_iters):
for idx, x in enumerate(X):
constrain = self._satisfy_constraint(x, idx)
dw, db = self._get_gradients(constrain, x, idx)
self._update_weights_bias(dw, db)
def predict(self, X):
estimate = np.dot(X, self.w) + self.b
prediction = np.sign(estimate)
return np.where(prediction == -1, 0, 1)
What can I do to classify more than 20 classes
In other words make it multiclass SVM
any hint please ?
SVM does not handle multiclass cases natively. Most implementations just fit as much binary classifiers as there are classes (one vs rest) or as much as there are possible pairs (one vs one).
A more elegant approach would be using Hamming codes (see e.g. https://github.com/christianversloot/machine-learning-articles/blob/main/using-error-correcting-output-codes-for-multiclass-svm-classification.md), though it sounds like quite a challenge to implement from scratch.

fitting keras model for cat and dog image classification takes 50 minutes at each epoch. any way i can reduce time?

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
image_size = (180, 180)
batch_size = 32
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
"PetImages",
validation_split=0.2,
subset="training",
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
"PetImages",
validation_split=0.2,
subset="validation",
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
]
)
train_ds = train_ds.prefetch(buffer_size=32)
val_ds = val_ds.prefetch(buffer_size=32)
def make_model(input_shape, num_classes):
inputs = keras.Input(shape=input_shape)
# Image augmentation block
x = data_augmentation(inputs)
# Entry block
x = layers.Rescaling(1.0 / 255)(x)
x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2D(64, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x # Set aside residual
for size in [128, 256, 512, 728]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
# Project residual
residual = layers.Conv2D(size, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
x = layers.SeparableConv2D(1024, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.GlobalAveragePooling2D()(x)
if num_classes == 2:
activation = "sigmoid"
units = 1
else:
activation = "softmax"
units = num_classes
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(units, activation=activation)(x)
return keras.Model(inputs, outputs)
model = make_model(input_shape=image_size + (3,), num_classes=2)
keras.utils.plot_model(model, show_shapes=True)
epochs = 50
callbacks = [
keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
]
model.compile(
optimizer=keras.optimizers.Adam(1e-3),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
train_ds, epochs=epochs, callbacks=callbacks, validation_data=val_ds,
)
So the strategy was to begin the model with the data_augmentation preprocessor, followed by a Rescaling layer and a dropout layer before the final classification layer as shown in the make_model function
for training the model as you can see I set epochs=50 and used buffered prefetching for my input data as it would yield data from disk without having I/O blocking. As for the rest of the parameters I think it was pretty standard. nothing too complicated but when I run my code each epoch is taking approximately 40 minutes and I don't know why.
Any suggestions?

I got error in code to train model - multivariant timeseries

I need help with multivariant time series forecasting using gru / lstm.
The dataset I am using about 4000 rows and 7 columns.
I already used this for input shaping
def create_dataset (X, look_back = 1):
Xs, ys = [], []
for i in range(len(X)-look_back):
v = X[i:i+look_back]
Xs.append(v)
ys.append(X[i+look_back][0])
return np.array(Xs), np.array(ys)
LOOK_BACK = 30
X_train, y_train = create_dataset(train_scaled,LOOK_BACK)
X_test, y_test = create_dataset(test_scaled,LOOK_BACK)
print('X_train.shape: ', X_train.shape)
print('y_train.shape: ', y_train.shape)
print('X_test.shape: ', X_test.shape)
print('y_test.shape: ', y_test.shape)
this part for model creation
def create_gru(units):
model = Sequential()
# Input layer
model.add(GRU (units = units, return_sequences = True,
input_shape = [X_train.shape[1], X_train.shape[2]]))
model.add(Dropout(0.2))
# Hidden layer
model.add(GRU(units = units))
model.add(Dropout(0.2))
model.add(Dense(units = 1))
When I execute I got an error says that non-broadcastable output operand with shape (854,1) doesn't match the broadcast shape (854,7)
this error happen when the execution each this part
y_train = scaler.inverse_transform(y_train)
y_test = scaler.inverse_transform(y_test)
def prediction(model):
prediction = model.predict(X_test)
prediction = scaler.inverse_transform(prediction)
return prediction
prediction_gru = prediction(model_gru)
prediction_bilstm = prediction(model_bilstm)

not enough values to unpack (expected 2, got 1) adaboost algorithm

def adaboost(X_train, Y_train, X_test, Y_test, lamb=0.01, num_iterations=200, learning_rate=0.001):
label_train = 2*Y_train -1
label_test = 2*Y_test -1
[n,p] = X_train.shape
[ntest, ptest] = X_test.shape
X_train_1 = np.concatenate((np.ones([n,1]), X_train), axis=1)
X_test_1 = np.concatenate((np.ones([ntest,1]), X_test), axis=1)
beta = np.zeros([p+1])
acc_train = []
acc_test = []
#margins = []
for it in range(num_iterations):
score = np.matmul(X_train_1, beta)
error = (score*label_train < 1)
dbeta = np.mean(X_train_1 * (error * label_train).reshape(-1,1), axis=0)
beta += learning_rate * dbeta
beta[1:] -= lamb * beta[1:]
#margins.append(np.min(score*label_train))
# train
predict = (np.sign(score) == label_train)
acc = np.sum(predict)/n
acc_train.append(acc)
# test
score_test = np.matmul(X_test_1, beta)
predict = (np.sign(score_test) == label_test)
acc = np.sum(predict)/ntest
acc_test.append(acc)
return beta, acc_train, acc_test
I am calling this function by:
_, train_acc, test_acc = adaboost(X_train, y_train, X_test, y_test)
and it is giving the error provided in title:
for line 68 '''[ntest, ptest] = X_test.shape'''
Any idea how to stop getting this error?
Can someone explain what I am doing wrong??
Whatever X_test is, it must have only a single dimension when it should be two dimensional

Resources