H2O Automl in R and Python - h2o

I have a very simple question. I recently started working on python.
Here is the R codes for H2O Automl
aml <- h2o.automl(x = x, y = y, project_name =gtp,max_runtime_secs = 99, max_runtime_secs_per_model = 3600,
leaderboard_frame = test,
training_frame = train, validation_frame = test,nfolds =0,
max_models = 1000,exclude_algos = c("GLM", "DeepLearning", "GBM","DRF","StackedEnsemble"),
seed = 22)
How can I write these in Python?
aml = H2OAutoML(max_runtime_secs = 600, exclude_algos = "GLM", "DeepLearning", "GBM","DRF","StackedEnsemble" ,
seed = 42,project_name =gtp)
aml.train(x = X,
y = y, validation_frame =hf_v
training_frame = hf_train,
leaderboard_frame = hf_test,)

aml = H2OAutoML(max_runtime_secs = 600, exclude_algos = ["GLM", "DeepLearning", "GBM","DRF","StackedEnsemble"] ,
seed = 42,project_name = 'gtp')
aml.train(x = X,
y = y, validation_frame =hf_v
training_frame = hf_train,
leaderboard_frame = hf_test,)

Related

Pytorch model gradients no updating with some custom code

I have an object detection task that requires to recognise multiple objects from an image. The idea is to define a simple network from scratch (i.e. without using ready implementations of YoLo or similar algorithms). Our approach was to define an architecture that was able to detect a single bounding box, and then to execute different independent copies of the same model on the data in parallel. Our architecture was defined as the following:
class MultiObjectNet(nn.Module):
def __init__(self, image_width, image_height, num_classes=13, num_boxes=5, device=torch.device('mps')):
super(MultiObjectNet, self).__init__()
self.device = device
self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
self.initial_conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=4, stride=2, padding=1, device=self.device)
self.initial_conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=4, stride=2, padding=1, device=self.device)
self.initial_conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=2, stride=2, device=self.device)
self.no_pools1 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, device=self.device)
self.no_pools2 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3, stride=3, device=self.device)
self.fc1 = nn.Linear(in_features=1024, out_features=512, device=self.device)
self.fc2 = nn.Linear(in_features=512, out_features=256, device=self.device)
self.fc3 = nn.Linear(in_features=256, out_features=128, device=self.device)
self.fc4 = nn.Linear(in_features=128, out_features=64, device=self.device)
self.pc_layer = nn.Linear(in_features=64, out_features=1, device=self.device)
self.box_layer = nn.Linear(in_features=64, out_features=4, device=self.device)
self.category_layer = nn.Linear(in_features=64, out_features=num_classes, device=self.device)
self.sigmoid = nn.Sigmoid()
self.num_classes = num_classes
self.num_boxes = num_boxes
def __device__(self):
return self.device
def forward(self, x):
x = self.initial_conv1(x)
x = F.relu(x)
x = self.pool(x)
x = self.initial_conv2(x)
x = F.relu(x)
x = self.pool(x)
x = self.initial_conv3(x)
x = F.relu(x)
x = self.pool(x)
x = self.no_pools1(x)
x = F.relu(x)
x = self.no_pools2(x)
x = F.relu(x)
x = torch.transpose(x,1,3)[::,0][:,0]
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = F.relu(x)
x = self.fc3(x)
x = F.relu(x)
x = self.fc4(x)
x = F.relu(x)
return self.sigmoid(self.pc_layer(x)), self.box_layer(x), self.category_layer(x)
The network outputs a triple $(P_C, [x_{min}, y_{min}, x_{max}, y_{max}], CAT)$, where the inner list contains the coordinates of the bounding box and $CAT$ is a probability distribution over the classes of the dataset.
The training and loss calculation were computed as following:
nets = [ MultiObjectNet(resizing_width, resizing_height, device=torch.device('mps')) for _ in range(max_boxes) ]
loss_df = pd.DataFrame(index=list(range(100)), columns=["loss"])
learning_rate = 1e-02
optims = [ torch.optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9) for net in nets ]
bce_loss = torch.nn.BCELoss()
mse_loss = torch.nn.MSELoss()
xent_loss = torch.nn.CrossEntropyLoss()
for epoch in range(5):
running_loss = 0.0
for batch_number, batch in tqdm(enumerate(train_dataloader)) :
inputs, labels = batch
batch_size = len(inputs)
for i, (optim, net, label) in enumerate(zip(optims, nets, labels)) :
pred_pc, pred_box, pred_category = net(inputs)
y_pc, y_box, y_category = label[0].reshape(batch_size, 1), label[1].reshape(batch_size, 4), label[2].reshape(batch_size, num_classes)
optim.zero_grad()
confidence_loss = bce_loss(pred_pc, y_pc)
box_loss = mse_loss(pred_box, y_box))
category_loss = xent_loss(pred_category, y_category)
loss = confidence_loss + box_loss + category_loss
loss.backward()
optim.step()
My model fails to generalize the problem and fixates on a single output, no matter the input given to the network (always outputs very similar confidence probabilities, boxes and categories distributions). By looking into the first layer parameters, I can see that the gradient is not None, thus I suppose that there is some kind of learning, but I can't understand what is happening.

data generation cluster analysis

I' don't know with what R commands you can generate datasets with combination of parameters (dimension, cluster proportions,model). I use this commands
formmatrix = function(a,b,c,d,e,f,p){
m1 = c(0,8,rep(0, p-2))
m2 = c(8,0,rep(0, p-2))
m3 = c(-8,-8,rep(0, p-2))
sig1 = diag(c(1,a, rep(1, p-2)))
sig2 = diag(c(b,c, rep(1, p-2)))
sig3 = matrix(c(d,e,e,f),2,2,byrow=T)
sig3 = cbind(sig3, matrix(rep(0,2*(p-2)),2))
sig3 = rbind(sig3, cbind(t(matrix(rep(0,2*(p-2)),2)), diag(rep(1, p-2))))
return(list(m1=m1, m2=m2, m3=m3, sig1=sig1, sig2=sig2, sig3=sig3))
}
p = 2
sim1 = formmatrix(1,1,1,1,0,1, p)
sim2 = formmatrix(5,1,5,1,0,5, p)
sim3 = formmatrix(5,5,1,3,-2,3, p)
sim4 = formmatrix(1,20,5,15,-10,15, p)
sim5 = formmatrix(1, 45, 30, 15, -10, 15, p)
mixt1 <- rbind(rmvnorm(200, mean = sim1$m1, sigma = sim1$sig1),
rmvnorm(300, mean = sim1$m2 ,sigma = sim1$sig2 ),
rmvnorm(500, mean = sim1$m3,sigma = sim1$sig3))

fitting keras model for cat and dog image classification takes 50 minutes at each epoch. any way i can reduce time?

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
image_size = (180, 180)
batch_size = 32
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
"PetImages",
validation_split=0.2,
subset="training",
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
"PetImages",
validation_split=0.2,
subset="validation",
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
]
)
train_ds = train_ds.prefetch(buffer_size=32)
val_ds = val_ds.prefetch(buffer_size=32)
def make_model(input_shape, num_classes):
inputs = keras.Input(shape=input_shape)
# Image augmentation block
x = data_augmentation(inputs)
# Entry block
x = layers.Rescaling(1.0 / 255)(x)
x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2D(64, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x # Set aside residual
for size in [128, 256, 512, 728]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
# Project residual
residual = layers.Conv2D(size, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
x = layers.SeparableConv2D(1024, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.GlobalAveragePooling2D()(x)
if num_classes == 2:
activation = "sigmoid"
units = 1
else:
activation = "softmax"
units = num_classes
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(units, activation=activation)(x)
return keras.Model(inputs, outputs)
model = make_model(input_shape=image_size + (3,), num_classes=2)
keras.utils.plot_model(model, show_shapes=True)
epochs = 50
callbacks = [
keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
]
model.compile(
optimizer=keras.optimizers.Adam(1e-3),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
train_ds, epochs=epochs, callbacks=callbacks, validation_data=val_ds,
)
So the strategy was to begin the model with the data_augmentation preprocessor, followed by a Rescaling layer and a dropout layer before the final classification layer as shown in the make_model function
for training the model as you can see I set epochs=50 and used buffered prefetching for my input data as it would yield data from disk without having I/O blocking. As for the rest of the parameters I think it was pretty standard. nothing too complicated but when I run my code each epoch is taking approximately 40 minutes and I don't know why.
Any suggestions?

I came into this Error: object 'lambda' not found in R2jags

enter image description here
Hi I came into this error but I already defined lambda. I really don't know where it went wrong. Does anybody know how to solve this?
Here is my code
binomial.model.JAGS = function(){
y ~ dbin(p,n)
p<-lambda*mu+rho*(1-mu)
lambda ~ dunif(min = 0.2,max = 1.4)
mu ~ dunif(min = 0,max = 1)
rho ~ dunif(min = 0.1,max = 1.7)
}
n = 100000
y = 30000
data.JAGS = list(y = y,n = n)
inits.JAGS = list(list(lambda = 0.8,mu = 0.5, rho = 0.9))
para.JAGS = c('p', 'lambda', 'mu', 'rho')
fit.JAGS = jags(data = data.JAGS, inits = inits.JAGS,
parameters.to.save = para.JAGS,
n.chains = 1,
n.iter = 9000,
n.burnin = 1000,
model.file = binomial.model.JAGS())

why I get max_weights=1 after gradient check?

I set a 3 layers neural network,it has 2 hidden layers,But when I try to implement gradient check ,I got my max_weigh=1 ,which means I have some error in my backprop.here are my backprop function,I really need some help
is there something wrong with my codes?
thanks!
def loss(self,X,y,reg = 0.0):
#forward prop
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
W3, b3 = self.params['W3'], self.params['b3']
N,D = X.shape
H1out = np.maximum(0,X.dot(W1)+b1) #H1out (N,H1)
H2out = np.maximum(0,H1out.dot(W2)+b2) #H2out (N,H2)
scores = None
scores = H2out.dot(W3)+b3
scores_shift = scores-np.max(scores,axis = 1).reshape(-1,1)
softmaxout = np.exp(scores_shift)/np.sum(np.exp(scores_shift),axis=1).reshape(-1,1)
loss_main = None
loss = None
loss_main = -np.sum(np.log(softmaxout[range(N),list(y)]))
loss = loss_main/N + reg*np.sum(W1*W1)*np.sum(
W2*W2)+np.sum(W3*W3)
#backward prop
dscores = softmaxout.copy() #dscores (N,C)
dscores[range(N),list(y)] -= 1
dscores /= N
dW3 = H2out.T.dot(dscores)
db3 = np.sum(dscores,axis = 0)
dh2 = dscores.dot(W3.T) #dh2 (N,H2)
dh_Relu2 = (H2out > 0) * dh2 #dh_ReLu2 (N,H2)
dW2 = H1out.T.dot(dh_Relu2)
db2 = np.sum(dh_Relu2,axis = 0)
dh1 = dh_Relu2.dot(W2.T) #dh1 (N,H1)
dh_Relu1 = (H1out>0) * dh1
dW1 = X.T.dot(dh_Relu1)
db1 = np.sum(dh_Relu1,axis = 0)
grad = {}
grad['W1'] = dW1
grad['b1'] = db1
grad['W2'] = dW2
grad['b2'] = db2
grad['W3'] = dW3
grad['b3'] = db3
return loss,grad

Resources