I have an object detection task that requires to recognise multiple objects from an image. The idea is to define a simple network from scratch (i.e. without using ready implementations of YoLo or similar algorithms). Our approach was to define an architecture that was able to detect a single bounding box, and then to execute different independent copies of the same model on the data in parallel. Our architecture was defined as the following:
class MultiObjectNet(nn.Module):
def __init__(self, image_width, image_height, num_classes=13, num_boxes=5, device=torch.device('mps')):
super(MultiObjectNet, self).__init__()
self.device = device
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.initial_conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=4, stride=2, padding=1, device=self.device)
self.initial_conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2, padding=1, device=self.device)
self.initial_conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=2, stride=2, device=self.device)
self.no_pools1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, device=self.device)
self.no_pools2 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=3, device=self.device)
self.fc1 = nn.Linear(in_features=1024, out_features=512, device=self.device)
self.fc2 = nn.Linear(in_features=512, out_features=256, device=self.device)
self.fc3 = nn.Linear(in_features=256, out_features=128, device=self.device)
self.fc4 = nn.Linear(in_features=128, out_features=64, device=self.device)
self.pc_layer = nn.Linear(in_features=64, out_features=1, device=self.device)
self.box_layer = nn.Linear(in_features=64, out_features=4, device=self.device)
self.category_layer = nn.Linear(in_features=64, out_features=num_classes, device=self.device)
self.sigmoid = nn.Sigmoid()
self.num_classes = num_classes
self.num_boxes = num_boxes
def __device__(self):
return self.device
def forward(self, x):
x = self.initial_conv1(x)
x = F.relu(x)
x = self.pool(x)
x = self.initial_conv2(x)
x = F.relu(x)
x = self.pool(x)
x = self.initial_conv3(x)
x = F.relu(x)
x = self.pool(x)
x = self.no_pools1(x)
x = F.relu(x)
x = self.no_pools2(x)
x = F.relu(x)
x = torch.transpose(x,1,3)[::,0][:,0]
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = F.relu(x)
x = self.fc3(x)
x = F.relu(x)
x = self.fc4(x)
x = F.relu(x)
return self.sigmoid(self.pc_layer(x)), self.box_layer(x), self.category_layer(x)
The network outputs a triple $(P_C, [x_{min}, y_{min}, x_{max}, y_{max}], CAT)$, where the inner list contains the coordinates of the bounding box and $CAT$ is a probability distribution over the classes of the dataset.
The training and loss calculation were computed as following:
nets = [ MultiObjectNet(resizing_width, resizing_height, device=torch.device('mps')) for _ in range(max_boxes) ]
loss_df = pd.DataFrame(index=list(range(100)), columns=["loss"])
learning_rate = 1e-02
optims = [ torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9) for net in nets ]
bce_loss = torch.nn.BCELoss()
mse_loss = torch.nn.MSELoss()
xent_loss = torch.nn.CrossEntropyLoss()
for epoch in range(5):
running_loss = 0.0
for batch_number, batch in tqdm(enumerate(train_dataloader)) :
inputs, labels = batch
batch_size = len(inputs)
for i, (optim, net, label) in enumerate(zip(optims, nets, labels)) :
pred_pc, pred_box, pred_category = net(inputs)
y_pc, y_box, y_category = label[0].reshape(batch_size, 1), label[1].reshape(batch_size, 4), label[2].reshape(batch_size, num_classes)
optim.zero_grad()
confidence_loss = bce_loss(pred_pc, y_pc)
box_loss = mse_loss(pred_box, y_box))
category_loss = xent_loss(pred_category, y_category)
loss = confidence_loss + box_loss + category_loss
loss.backward()
optim.step()
My model fails to generalize the problem and fixates on a single output, no matter the input given to the network (always outputs very similar confidence probabilities, boxes and categories distributions). By looking into the first layer parameters, I can see that the gradient is not None, thus I suppose that there is some kind of learning, but I can't understand what is happening.
Related
I'm trying to see how my trained model would predict a single instance of y and have of list of predicted and actual y.
It seems I'm missing a few steps and I'm not sure how to implement the predict_step, here is what I currently have:
mutag = ptgeom.datasets.TUDataset(root='.', name='MUTAG')
train_idx, test_idx = train_test_split(range(len(mutag)), stratify=[m.y[0].item() for m in mutag], test_size=0.25)
train_loader = ptgeom.loader.DataLoader(mutag[train_idx], batch_size=32, shuffle=True)
test_loader = ptgeom.loader.DataLoader(mutag[test_idx], batch_size=32)
class MUTAGClassifier(ptlight.LightningModule):
def __init__(self):
# The model is just GCNConv --> GCNConv --> graph pooling --> Dropout --> Linear
super().__init__()
self.gc1 = ptgeom.nn.GCNConv(7, 256)
self.gc2 = ptgeom.nn.GCNConv(256, 256)
self.linear = torch.nn.Linear(256, 1)
def forward(self, x, edge_index=None, batch=None, edge_weight=None):
# Note: "edge_weight" is not used for training, but only for the explainability part
if edge_index == None:
x, edge_index, batch = x.x, x.edge_index, x.batch
x = F.relu(self.gc1(x, edge_index, edge_weight))
x = F.relu(self.gc2(x, edge_index, edge_weight))
x = ptgeom.nn.global_mean_pool(x, batch)
x = F.dropout(x)
x = self.linear(x)
return x
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
def training_step(self, batch, _):
y_hat = self.forward(batch.x, batch.edge_index, batch.batch)
loss = F.binary_cross_entropy_with_logits(y_hat, batch.y.unsqueeze(1).float())
self.log("train_loss", loss)
self.log("train_accuracy", accuracy(y_hat, batch.y.unsqueeze(1)), prog_bar=True, batch_size=32)
return loss
def validation_step(self, batch, _):
x, edge_index, batch_idx = batch.x, batch.edge_index, batch.batch
y_hat = self.forward(x, edge_index, batch_idx)
self.log("val_accuracy", accuracy(y_hat, batch.y.unsqueeze(1)), prog_bar=True, batch_size=32)
checkpoint_callback = ptlight.callbacks.ModelCheckpoint(
dirpath='./checkpoints/',
filename='gnn-{epoch:02d}',
every_n_epochs=50,
save_top_k=-1)
trainer = ptlight.Trainer(max_epochs=200, callbacks=[checkpoint_callback])
trainer.fit(gnn, train_loader, test_loader)
The crux here is that you use F.binary_cross_entropy_with_logits in your training_step (for numerical stability I suppose). This means that nn.Sigmoid has to be applied to your output both in validation_step and predict_step as the operation is not part of forward(). Check this for more information. Notice that you may also need to round your predicted results depending on which accuracy method you are using in order to get correct metric results.
class MUTAGClassifier(ptlight.LightningModule):
def __init__(self):
# The model is just GCNConv --> GCNConv --> graph pooling --> Dropout --> Linear
super().__init__()
self.gc1 = ptgeom.nn.GCNConv(7, 256)
self.gc2 = ptgeom.nn.GCNConv(256, 256)
self.linear = torch.nn.Linear(256, 1)
self.s = nn.Sigmoid()
def forward(self, x, edge_index=None, batch=None, edge_weight=None):
# Note: "edge_weight" is not used for training, but only for the explainability part
if edge_index == None:
x, edge_index, batch = x.x, x.edge_index, x.batch
x = F.relu(self.gc1(x, edge_index, edge_weight))
x = F.relu(self.gc2(x, edge_index, edge_weight))
x = ptgeom.nn.global_mean_pool(x, batch)
x = F.dropout(x)
x = self.linear(x)
return x
def configure_optimizers(self):
optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
return optimizer
def training_step(self, batch, _):
y_hat = self.forward(batch.x, batch.edge_index, batch.batch)
loss = F.binary_cross_entropy_with_logits(y_hat, batch.y.unsqueeze(1).float())
self.log("train_loss", loss)
self.log("train_accuracy", accuracy(y_hat, batch.y.unsqueeze(1)), prog_bar=True, batch_size=32)
return loss
def validation_step(self, batch, _):
x, edge_index, batch_idx = batch.x, batch.edge_index, batch.batch
y_hat = self.forward(x, edge_index, batch_idx)
y_hat = self.s(y_hat)
y_hat = torch.where(y_hat > 0.5, 1, 0) # may be needed
self.log("val_accuracy", accuracy(y_hat, batch.y.unsqueeze(1)), prog_bar=True, batch_size=32)
def predict_step(self, batch, _):
x, edge_index, batch_idx = batch.x, batch.edge_index, batch.batch
y_hat = self.forward(x, edge_index, batch_idx)
y_hat = self.s(y_hat)
y_hat = torch.where(y_hat > 0.5, 1, 0) # may be needed
return y_hat
You could then do the following in order to get a list of predictions with their corresponding ground truth:
batch = next(iter(train_loader)) # get a batch
y_hat = trainer.predict(your_model, batch)
print(list(y_hat))
print(list(batch.y))
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
image_size = (180, 180)
batch_size = 32
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
"PetImages",
validation_split=0.2,
subset="training",
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
"PetImages",
validation_split=0.2,
subset="validation",
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
]
)
train_ds = train_ds.prefetch(buffer_size=32)
val_ds = val_ds.prefetch(buffer_size=32)
def make_model(input_shape, num_classes):
inputs = keras.Input(shape=input_shape)
# Image augmentation block
x = data_augmentation(inputs)
# Entry block
x = layers.Rescaling(1.0 / 255)(x)
x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2D(64, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x # Set aside residual
for size in [128, 256, 512, 728]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
# Project residual
residual = layers.Conv2D(size, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
x = layers.SeparableConv2D(1024, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.GlobalAveragePooling2D()(x)
if num_classes == 2:
activation = "sigmoid"
units = 1
else:
activation = "softmax"
units = num_classes
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(units, activation=activation)(x)
return keras.Model(inputs, outputs)
model = make_model(input_shape=image_size + (3,), num_classes=2)
keras.utils.plot_model(model, show_shapes=True)
epochs = 50
callbacks = [
keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
]
model.compile(
optimizer=keras.optimizers.Adam(1e-3),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
train_ds, epochs=epochs, callbacks=callbacks, validation_data=val_ds,
)
So the strategy was to begin the model with the data_augmentation preprocessor, followed by a Rescaling layer and a dropout layer before the final classification layer as shown in the make_model function
for training the model as you can see I set epochs=50 and used buffered prefetching for my input data as it would yield data from disk without having I/O blocking. As for the rest of the parameters I think it was pretty standard. nothing too complicated but when I run my code each epoch is taking approximately 40 minutes and I don't know why.
Any suggestions?
I need help with multivariant time series forecasting using gru / lstm.
The dataset I am using about 4000 rows and 7 columns.
I already used this for input shaping
def create_dataset (X, look_back = 1):
Xs, ys = [], []
for i in range(len(X)-look_back):
v = X[i:i+look_back]
Xs.append(v)
ys.append(X[i+look_back][0])
return np.array(Xs), np.array(ys)
LOOK_BACK = 30
X_train, y_train = create_dataset(train_scaled,LOOK_BACK)
X_test, y_test = create_dataset(test_scaled,LOOK_BACK)
print('X_train.shape: ', X_train.shape)
print('y_train.shape: ', y_train.shape)
print('X_test.shape: ', X_test.shape)
print('y_test.shape: ', y_test.shape)
this part for model creation
def create_gru(units):
model = Sequential()
# Input layer
model.add(GRU (units = units, return_sequences = True,
input_shape = [X_train.shape[1], X_train.shape[2]]))
model.add(Dropout(0.2))
# Hidden layer
model.add(GRU(units = units))
model.add(Dropout(0.2))
model.add(Dense(units = 1))
When I execute I got an error says that non-broadcastable output operand with shape (854,1) doesn't match the broadcast shape (854,7)
this error happen when the execution each this part
y_train = scaler.inverse_transform(y_train)
y_test = scaler.inverse_transform(y_test)
def prediction(model):
prediction = model.predict(X_test)
prediction = scaler.inverse_transform(prediction)
return prediction
prediction_gru = prediction(model_gru)
prediction_bilstm = prediction(model_bilstm)
I tried using SGD, Adadelta, Adabound, Adam. Everything gives me fluctuations in validation accuracy. I tried all the activation functions in keras, but still, I'm getting fluctuations in val_acc.
Training samples: 1352
Validation Samples: 339
Validation Accuracy
# first (and only) CONV => RELU => POOL block
inpt = Input(shape = input_shape)
x = Conv2D(32, (3, 3), padding = "same")(inpt)
x = Activation("swish")(x)
x = BatchNormalization(axis = channel_dim)(x)
x = MaxPooling2D(pool_size = (3, 3))(x)
# x = Dropout(0.25)(x)
# first CONV => RELU => CONV => RELU => POOL block
x = Conv2D(64, (3, 3), padding = "same")(x)
x = Activation("swish")(x)
x = BatchNormalization(axis = channel_dim)(x)
x = Conv2D(64, (3, 3), padding = "same")(x)
x = Activation("swish")(x)
x = BatchNormalization(axis = channel_dim)(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
# x = Dropout(0.25)(x)
# second CONV => RELU => CONV => RELU => POOL Block
x = Conv2D(128, (3, 3), padding = "same")(x)
x = Activation("swish")(x)
x = BatchNormalization(axis = channel_dim)(x)
x = Conv2D(128, (3, 3), padding = "same")(x)
x = Activation("swish")(x)
x = BatchNormalization(axis = channel_dim)(x)
x = MaxPooling2D(pool_size = (2, 2))(x)
# x = Dropout(0.25)(x)
# first (and only) FC layer
x = Flatten()(x) # Change to GlobalMaxPooling2D
x = Dense(256, activation = 'swish')(x)
x = BatchNormalization(axis = channel_dim)(x)
x = Dropout(0.4)(x)
x = Dense(128, activation = 'swish')(x)
x = BatchNormalization()(x)
x = Dropout(0.4)(x)
x = Dense(64, activation = 'swish')(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(32, activation = 'swish')(x)
x = BatchNormalization()(x)
x = Dense(nc, activation = 'softmax')(x)
model = Model(inputs=inpt, outputs = x)
model.compile(loss = 'categorical_crossentropy', optimizer = 'sgd', metrics = ['accuracy'])
Your model may be too noise sensitive, see this answer.
Based on the answer in the link and what I see from your model, your network may be too deep for the amount of data you have (large model and not enough datas ==> overfitting ==> noise sensitivity). I suggest to use a simpler model as a sanity check.
The learning rate could also be a possible reason (as stated by Neb). You are using the default learning rate of sgd (which is 0.01, maybe too high). Try with 1.e-3 or below.
I have a very simple question. I recently started working on python.
Here is the R codes for H2O Automl
aml <- h2o.automl(x = x, y = y, project_name =gtp,max_runtime_secs = 99, max_runtime_secs_per_model = 3600,
leaderboard_frame = test,
training_frame = train, validation_frame = test,nfolds =0,
max_models = 1000,exclude_algos = c("GLM", "DeepLearning", "GBM","DRF","StackedEnsemble"),
seed = 22)
How can I write these in Python?
aml = H2OAutoML(max_runtime_secs = 600, exclude_algos = "GLM", "DeepLearning", "GBM","DRF","StackedEnsemble" ,
seed = 42,project_name =gtp)
aml.train(x = X,
y = y, validation_frame =hf_v
training_frame = hf_train,
leaderboard_frame = hf_test,)
aml = H2OAutoML(max_runtime_secs = 600, exclude_algos = ["GLM", "DeepLearning", "GBM","DRF","StackedEnsemble"] ,
seed = 42,project_name = 'gtp')
aml.train(x = X,
y = y, validation_frame =hf_v
training_frame = hf_train,
leaderboard_frame = hf_test,)