Good training and validation accuracy but poor confusion matrix - matrix

I have training my model to detect normal vs pneumonia chest x-ray classes. This is my dataset as listed below:
train_batch= ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input)\
.flow_from_directory(directory=train_path, target_size=(224,224), classes=['NORMAL', 'PNEUMONIA'],
batch_size=32,class_mode='categorical')
val_batch= ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
.flow_from_directory(directory=val_path, target_size=(224,224), classes=['NORMAL', 'PNEUMONIA'], batch_size=32, class_mode='categorical')
test_batch= ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
.flow_from_directory(directory=test_path, target_size=(224,224), classes=['NORMAL', 'PNEUMONIA'], batch_size=16,class_mode='categorical', shuffle=False)
Found 3616 images belonging to 2 classes. #training
Found 1616 images belonging to 2 classes. #validation
Found 624 images belonging to 2 classes. #test
my model consist of 5 CNN layers where image w,h = (224* 224,3) with 16 feature map as first layer and then 32, 64, 128,256. Batch normalization , max pooling and dropout is added to every cnn layer, but last dense layer is as follow
model.add(Dense(units=2 , activation='softmax'))
optim = Adam( lr=0.001 )
model.compile(optimizer=optim , loss= 'categorical_crossentropy' , metrics= ['accuracy'])
history=model.fit_generator(train_batch,
steps_per_epoch= 113, #3616/32=113
epochs = 25,
validation_data = val_batch,
validation_steps = 51 #1616/32=51
#verbose=2
#callbacks=callbacks #remove to chk
)
as it can be seen in the graph that my training and validation accuracy and loss is good but when I plot confusion matrix it dose not seems good why??
prediction = model.predict_generator(test_batch,steps= stepss) #, verbose=0)
prediction1 = np.argmax(prediction, axis=1)
cm = confusion_matrix (test_batch.classes, prediction1)
print(cm)
this is my confusion matrix as below
as you can see my graph which is as below
after that I did fine tuning of my model with VGG!6 by replacing last dense layer with my own dense layer with two outputs and here is the graph and confusion matrix:
I do not understand why my testing in not going good even with vgg16 model as you can see the results so please give me your valuable suggestions THANKS

Related

Binary Image Classification - Validation loss is much higher than training loss

I´m facing a strange behaviour which I can´t figure out why it is happening. I´m getting a really high loss(BinaryCrossentropy) on my validation batch around 20 or even higher while training. But after the training I do a prediction on the tet set and I get a loss which is lower than 1. Why is that? I went through my code over and over and can´t find the problem.
I´m doing a binary image classification for brian tumors on a dataset provided via kaggle(Link.
And you can find my notebook here: Google-Colab Notebook
My data is loaded this way:
batch_size = 20
train_ds = tf.keras.utils.image_dataset_from_directory(
train_data_path,
subset='training',
seed=42,
color_mode='grayscale',
batch_size=batch_size,
validation_split=0.30
)
valid_ds = tf.keras.utils.image_dataset_from_directory(
train_data_path,
subset='validation',
seed=42,
batch_size=batch_size,
color_mode='grayscale',
validation_split=0.30
)
test_ds = tf.keras.utils.image_dataset_from_directory(
test_data_path,
color_mode='grayscale',
batch_size=batch_size,
shuffle=False
)
This is my modle strcuture
input_shape = image_batch[0].shape
# set up the model structure
model = tf.keras.Sequential([
layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
layers.MaxPooling2D((2,2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.3),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.3),
layers.Flatten(),
tf.keras.layers.Dense(32, activation="relu"),
layers.Dropout(0.3),
layers.Dense(1, activation="sigmoid")
])
model.summary()
This is my callback function which returns the plots during training:
class PlotLearning(tf.keras.callbacks.Callback):
"""
Callback to plot the learning curves of the model during training.
"""
def on_train_begin(self, logs={}):
self.metrics = {}
for metric in logs:
self.metrics[metric] = []
def on_epoch_end(self, epoch, logs={}):
# Storing metrics
print(logs)
for metric in logs:
if metric in self.metrics:
self.metrics[metric].append(logs.get(metric))
else:
self.metrics[metric] = [logs.get(metric)]
# Plotting
metrics = [x for x in logs if 'val' not in x]
f, axs = plt.subplots(1, len(metrics), figsize=(15,5))
clear_output(wait=True)
for i, metric in enumerate(metrics):
axs[i].plot(range(1, epoch + 2),
self.metrics[metric],
label=metric)
if logs['val_' + metric]:
axs[i].plot(range(1, epoch + 2),
self.metrics['val_' + metric],
label='val_' + metric)
axs[i].legend()
axs[i].grid()
plt.tight_layout()
plt.show()
callbacks_list = [PlotLearning()]
and this is the part where I start the training
# compile model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer,
loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
metrics=['accuracy']
)
# fit model
history = model.fit(prep_train_ds,
epochs=30,
validation_data=valid_ds,
callbacks=callbacks_list)
This is the output of the callback function after the last epoch run through:
As you can see the loss is really high and oscillating around 20, so I guess it is overfitting.
But as mentiod above, here is what I get when I make a prediction on the test set and calculate the binary crossentropy. The loss is again less than 1 and at least in the range of the training loss
I tried so many things like, chaning batch size, bcs. not enough samples of one class might be in one batch. Then I wanted to see if it is overfitting and changed the number of filters, applyed droput etc. But I couldn´t get the loss function down on the validation set. I´m quite new in the field of image classification and maybe I´m oversseing a thing.

How to reduce tensorflow dataset input pipeline host device (cpu) time (currently ~40%)?

I am trying to replicate the resnet18 paper. Before running this on the full Image Net dataset on disk, I'm doing some evaluation runs with the publicly available imagenette/320px dataset from TFDS (much much smaller subset of imagenet with 10 classes, already in .tfrecord format._
Note: the full notebook to do training and tracing is available here: resnet18_baseline.ipynb Just switch to a GPU runtime and run all the cells. It's already set-up with tensorboard profiling on the second batch. (You can use TPU as well, but some keras.layers.experimental.preprocessing layers do not support TPU ops yet and you have to enable soft device placement. Please use a GPU).
Input Operations
read images from the input dataset. These images usually have got different dimensions and we need some crop function because input tensors can not have different dimensions for batching. Therefore, for training I use random crop and for testing/validation datasets a center crop.
random_crop_layer = keras.layers.experimental.preprocessing.RandomCrop(224, 224)
center_crop_layer = keras.layers.experimental.preprocessing.CenterCrop(224, 224)
#tf.function(experimental_relax_shapes=True) # avoid retracing
def train_crop_fn(x, y):
return random_crop_layer(x), y
#tf.function(experimental_relax_shapes=True)
def eval_crop_fn(x, y):
return center_crop_layer(x), y
Perform some simple preprocessing/augmentations to the input data. These include rescaling to 0-1 and also scaling based on mean and stdev of the rgb colours on imagenet. Also, random
rescaling_layer = keras.layers.experimental.preprocessing.Rescaling(1./255)
train_preproc = keras.Sequential([
rescaling_layer
])
# from https://github.com/tensorflow/models/blob/master/official/vision/image_classification/preprocessing.py
# Calculated from the ImageNet training set
MEAN_RGB = (0.485 , 0.456, 0.406)
STDDEV_RGB = (0.229, 0.224, 0.225)
#tf.function
def z_score_scale(x):
return (x - MEAN_RGB) / STDDEV_RGB
#tf.function
def train_preproc_fn(x, y):
return z_score_scale(train_preproc(x)), y
#tf.function
def eval_preproc_fn(x, y):
return z_score_scale(eval_preproc(x)), y
Input Pipeline
def get_input_pipeline(input_ds, bs, crop_fn, augmentation_fn):
ret_ds = (
input_ds
.batch(1) # pre-crop are different dimensions and can't be batched
.map(crop_fn,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
.unbatch()
.batch(bs)
.map(augmentation_fn, # augmentations can be batched though.
num_parallel_calls=tf.data.experimental.AUTOTUNE)
)
return ret_ds
# dataset loading
def load_imagenette():
train_ds, ds_info = tfds.load('imagenette/320px', split='train', as_supervised=True, with_info=True)
valid_ds = tfds.load('imagenette/320px', split='validation', as_supervised=True)
return train_ds, valid_ds, valid_ds, ds_info.features['label'].num_classes
# pipeline construction
train_ds, valid_ds, test_ds, num_classes = load_imagenette()
# datasets used for training (notice that I use prefetch here)
train_samples = get_input_pipeline(train_ds, BS, train_crop_fn, train_preproc_fn).prefetch(tf.data.experimental.AUTOTUNE)
valid_samples = get_input_pipeline(valid_ds, BS, eval_crop_fn, eval_preproc_fn).prefetch(tf.data.experimental.AUTOTUNE)
test_samples = get_input_pipeline(test_ds, BS, eval_crop_fn, eval_preproc_fn).prefetch(tf.data.experimental.AUTOTUNE)
Training and Profiling
I use tensorboard profiler to check the second batch size and I get a warning that this is highly input bound, with about 40% of processing wasted on inputs.
For a classic resnet18 model, you can drive the batch size up to 768 without getting a OOM error, which is what I use. A single step with bs 256 takes about 2-3 seconds.
I also get a warning that on_train_batch_size_end is slow, at around ~1.5 seconds, compared to the 1s batch time.
The model training code is very simple keras:
model.fit(
train_samples,
validation_data=valid_samples,
epochs=100,
batch_size=BS,
use_multiprocessing=True
callbacks=[tensorboard_callback, model_checkpoint_callback, early_stop_callback, reduce_lr_callback]
)
and the callbacks are specified as:
log_dir = os.path.join(os.getcwd(), 'logs')
tensorboard_callback = TensorBoard(log_dir=log_dir, update_freq="epoch", profile_batch=2)
reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.001, verbose=1)
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='model.{epoch:02d}-{val_loss:.4f}.h5',
monitor='val_loss',
verbose=1,
save_best_only=True)
early_stop_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)
Lastly, here are some sample tensorboard profiling screenshots. I can't figure out how to make this run faster:

Tensorflow 1 vs Tensorflow 2 Keras Inference Speed Differ by 2+ times

I'm trying to figure out the reason behind speed different from two different models.
an LSTM RNN model built using tensorflow 1.x:
self.input_placeholder = tf.placeholder(
tf.int32, shape=[self.config.batch_size, self.config.num_steps], name='Input')
self.labels_placeholder = tf.placeholder(
tf.int32, shape=[self.config.batch_size, self.config.num_steps], name='Target')
embedding = tf.get_variable(
'Embedding', initializer = self.embedding_matrix, trainable = False)
inputs = tf.nn.embedding_lookup(embedding, self.input_placeholder)
inputs = [tf.squeeze(x, axis = 1) for x in tf.split(inputs, self.config.num_steps, axis = 1)]
self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size])
lstm_cell = tf.contrib.rnn.BasicLSTMCell(self.config.hidden_size)
outputs, _ = tf.contrib.rnn.static_rnn(
lstm_cell, inputs, dtype = tf.float32,
sequence_length = [self.config.num_steps]*self.config.batch_size)
with tf.variable_scope('Projection'):
proj_U = tf.get_variable('Matrix', [self.config.hidden_size, self.config.vocab_size])
proj_b = tf.get_variable('Bias', [self.config.vocab_size])
outputs = [tf.matmul(o, proj_U) + proj_b for o in rnn_outputs]
the same model (at least from my understanding) built using tensorflow 2.0 keras:
def setup_model():
model = Sequential()
model.add(Embedding(input_dim=vocab_size,
output_dim=embedding_dim,
weights=[embedding_matrix],
input_length=4,
trainable=False))
model.add(LSTM(config.hidden_size, activation="tanh"))
model.add(Dense(vocab_size, activation="softmax"))
return model
The architecture is:
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding (Embedding) (None, 4, 100) 55400
_________________________________________________________________
lstm (LSTM) (None, 100) 80400
_________________________________________________________________
dense (Dense) (None, 554) 55954
=================================================================
Total params: 191,754
Trainable params: 136,354
Non-trainable params: 55,400
_________________________________________________________________
I was expecting similar inference runtime but the one built with tensorflow 1.x is much much faster. I was trying to convert tensorflow 1.x model to tensorflow 2 using only native tensorlow functions, but I have trouble converting due to the big change in tensorflow from 1.x to 2, and I was only able to create it using tf.keras.
In terms of speed, since I'm using both for generating text sequences + getting word probabilities, so I don't have a single inference time difference (I can't modify existing API from tensorflow 1.x model to get this). But in general, I'm seeing at least 2x difference in time from my use cases.
What can be the possible reasons behind this difference of inference speed? I'm happy to provide more information if needed.

Keras Inception-v3 fine-tuning workaraound

I am trying to fine-tune Inception-v3, but no matter which layer I choose to freeze I get random predictions. I found that other people are having the same problem: https://github.com/keras-team/keras/issues/9214 . It seems that the problem comes from setting the BN layer to not trainable.
Now I am trying to get the output of the last layer I want to freeze and use it as an input to the following layers, which I will then train:
train_generator = train_datagen.flow_from_directory(
os.path.join(directory, "train_data"),
target_size=size,
interpolation="bilinear",
classes=["a", "b", "c","d"],
batch_size=1,
shuffle=False) base_model = InceptionV3(weights='imagenet', include_top=True, input_shape=(299, 299, 3))
model_features = Model(inputs=base_model.input, outputs=base_model.get_layer(
self.Inception_Fine_Tune_Layers[layer_freeze]).output)
#I want to use this as input
values_train = model_features.predict_generator(train_generator, verbose=1)
However, I get Memory error like this, although I have 12Gb, which is more than what I need:
....
I tensorflow/core/common_runtime/bfc_allocator.cc:696] 1 Chunks of size 3268864 totalling 3.12MiB
I tensorflow/core/common_runtime/bfc_allocator.cc:696] 1 Chunks of size 3489024 totalling 3.33MiB
I tensorflow/core/common_runtime/bfc_allocator.cc:696] 1 Chunks of size 4211968 totalling 4.02MiB
I tensorflow/core/common_runtime/bfc_allocator.cc:696] 1 Chunks of size 5129472 totalling 4.89MiB
I tensorflow/core/common_runtime/bfc_allocator.cc:700] Sum Total of in-use chunks: 3.62GiB
I tensorflow/core/common_runtime/bfc_allocator.cc:702] Stats:
Limit: 68719476736
InUse: 3886957312
MaxInUse: 3889054464
NumAllocs: 3709
MaxAllocSize: 8388608
Any suggestion how to fix that or another workaround to fine-tune Inception will be very helpful.
I can't tell if you're preprocessing your input properly from what you've provided. However, Keras provides functions for preprocessing that are specific to the pre-trained net, in this case Inception V3.
from keras.applications.inception_v3 import preprocess_input
Try adding this to your data generator as the preprocessing function like so...
train_generator = train_datagen.flow_from_directory(
os.path.join(directory, "train_data"),
preprocessing_function=preprocess_input, # <---
target_size=size,
interpolation="bilinear",
classes=["a", "b", "c","d"],
batch_size=1,
shuffle=False)
You should then be able to unfreeze all of the layers, or the select few that you want to train.
Hope that helps!

Pymc and binomials: How to fit 7 binomials to data

I have this problem: I have a cohort of individuals grouped in 5 age groups. Initially all of them are susceptible and then they develop disease and finally they have cancers. I have information about the age group distribution of the susceptible and then the cancer carrier. Between the susceptible state and the cancer they pass through 7 stages , with same transition rate.
I'm trying to create a model that simulate each transition as a binomial extraction and fit the data I have.
I tried something but in the moment of analysing the traces , nothing work
You can see the code
Where am I getting wrong?
Thanks for any help
from pylab import *
from pymc import *
from pymc.Matplot import plot as plt
#susceptible_data = array([647,1814,8838,9949,1920])
susceptible_data = array([130,398,1415,1303,206])
infected_data_100000 = array([0,197,302,776,927])
infected_data = array([0,7,38,90,17])
prior_values=np.zeros(len(infected_data))
for i in range(0,len(infected_data)):
prior_values[i]=infected_data[i]/susceptible_data[i]
# stochastic priors
beta1 = Uniform('beta1', 0., 1.)
lambda_0_temp=susceptible_data[0]
lambda_0_1=pymc.Binomial("lambda_0_1",lambda_0_temp,pow(beta1,1))
lambda_0_2=pymc.Binomial("lambda_0_2",lambda_0_1.value,pow(beta1,1))
lambda_0_3=pymc.Binomial("lambda_0_3",lambda_0_2.value,pow(beta1,1))
lambda_0_4=pymc.Binomial("lambda_0_4",lambda_0_3.value,pow(beta1,1))
lambda_0_5=pymc.Binomial("lambda_0_5",lambda_0_4.value,pow(beta1,1))
lambda_0_6=pymc.Binomial("lambda_0_6",lambda_0_5.value,pow(beta1,1))
lambda_0_7=pymc.Binomial("lambda_0_7",n=lambda_0_6.value,p=pow(beta1,1),value=infected_data[0],observed=True)
lambda_1_temp=susceptible_data[1]
lambda_1_1=pymc.Binomial("lambda_1_1",lambda_1_temp,pow(beta1,1))
lambda_1_2=pymc.Binomial("lambda_1_2",lambda_1_1.value,pow(beta1,1))
lambda_1_3=pymc.Binomial("lambda_1_3",lambda_1_2.value,pow(beta1,1))
lambda_1_4=pymc.Binomial("lambda_1_4",lambda_1_3.value,pow(beta1,1))
lambda_1_5=pymc.Binomial("lambda_1_5",lambda_1_4.value,pow(beta1,1))
lambda_1_6=pymc.Binomial("lambda_1_6",lambda_1_5.value,pow(beta1,1))
lambda_1_7=pymc.Binomial("lambda_1_7",n=lambda_1_6.value,p=pow(beta1,1),value=infected_data[1],observed=True)
lambda_2_temp=susceptible_data[2]
lambda_2_1=pymc.Binomial("lambda_2_1",lambda_2_temp,pow(beta1,1))
lambda_2_2=pymc.Binomial("lambda_2_2",lambda_2_1.value,pow(beta1,1))
lambda_2_3=pymc.Binomial("lambda_2_3",lambda_2_2.value,pow(beta1,1))
lambda_2_4=pymc.Binomial("lambda_2_4",lambda_2_3.value,pow(beta1,1))
lambda_2_5=pymc.Binomial("lambda_2_5",lambda_2_4.value,pow(beta1,1))
lambda_2_6=pymc.Binomial("lambda_2_6",lambda_2_5.value,pow(beta1,1))
lambda_2_7=pymc.Binomial("lambda_2_7",n=lambda_2_6.value,p=pow(beta1,1),value=infected_data[2],observed=True)
lambda_3_temp=susceptible_data[3]
lambda_3_1=pymc.Binomial("lambda_3_1",lambda_3_temp,pow(beta1,1))
lambda_3_2=pymc.Binomial("lambda_3_2",lambda_3_1.value,pow(beta1,1))
lambda_3_3=pymc.Binomial("lambda_3_3",lambda_3_2.value,pow(beta1,1))
lambda_3_4=pymc.Binomial("lambda_3_4",lambda_3_3.value,pow(beta1,1))
lambda_3_5=pymc.Binomial("lambda_3_5",lambda_3_4.value,pow(beta1,1))
lambda_3_6=pymc.Binomial("lambda_3_6",lambda_3_5.value,pow(beta1,1))
lambda_3_7=pymc.Binomial("lambda_3_7",n=lambda_3_6.value,p=pow(beta1,1),value=infected_data[3],observed=True)
lambda_4_temp=susceptible_data[4]
lambda_4_1=pymc.Binomial("lambda_4_1",lambda_4_temp,pow(beta1,1))
lambda_4_2=pymc.Binomial("lambda_4_2",lambda_4_1.value,pow(beta1,1))
lambda_4_3=pymc.Binomial("lambda_4_3",lambda_4_2.value,pow(beta1,1))
lambda_4_4=pymc.Binomial("lambda_4_4",lambda_4_3.value,pow(beta1,1))
lambda_4_5=pymc.Binomial("lambda_4_5",lambda_4_4.value,pow(beta1,1))
lambda_4_6=pymc.Binomial("lambda_4_6",lambda_4_5.value,pow(beta1,1))
lambda_4_7=pymc.Binomial("lambda_4_7",n=lambda_4_6.value,p=pow(beta1,1),value=infected_data[4],observed=True)
model=pymc.Model([lambda_0_7,lambda_1_7,lambda_2_7,lambda_3_7,lambda_4_7,beta1])
mcmc =pymc.MCMC(model)
mcmc.sample(iter=100000, burn=50000, thin=10, verbose=1)
lambda_0_samples=mcmc.trace('lambda_0_7')[:]
lambda_1_samples=mcmc.trace('lambda_1_7')[:]
lambda_2_samples=mcmc.trace('lambda_2_7')[:]
lambda_3_samples=mcmc.trace('lambda_3_7')[:]
lambda_4_samples=mcmc.trace('lambda_4_7')[:]
beta1_samples=mcmc.trace('beta1')[:]
What you have implemented above only associates data with the 7th distribution in each set; the others are seemingly-redundant hierarchies on the binomial probability. I would think you want data informing each stage. I'm not sure there is information to inform what the values of p should be at each stage, based on what is provided.

Resources