I got error in code to train model - multivariant timeseries - multivariate-time-series

I need help with multivariant time series forecasting using gru / lstm.
The dataset I am using about 4000 rows and 7 columns.
I already used this for input shaping
def create_dataset (X, look_back = 1):
Xs, ys = [], []
for i in range(len(X)-look_back):
v = X[i:i+look_back]
Xs.append(v)
ys.append(X[i+look_back][0])
return np.array(Xs), np.array(ys)
LOOK_BACK = 30
X_train, y_train = create_dataset(train_scaled,LOOK_BACK)
X_test, y_test = create_dataset(test_scaled,LOOK_BACK)
print('X_train.shape: ', X_train.shape)
print('y_train.shape: ', y_train.shape)
print('X_test.shape: ', X_test.shape)
print('y_test.shape: ', y_test.shape)
this part for model creation
def create_gru(units):
model = Sequential()
# Input layer
model.add(GRU (units = units, return_sequences = True,
input_shape = [X_train.shape[1], X_train.shape[2]]))
model.add(Dropout(0.2))
# Hidden layer
model.add(GRU(units = units))
model.add(Dropout(0.2))
model.add(Dense(units = 1))
When I execute I got an error says that non-broadcastable output operand with shape (854,1) doesn't match the broadcast shape (854,7)
this error happen when the execution each this part
y_train = scaler.inverse_transform(y_train)
y_test = scaler.inverse_transform(y_test)
def prediction(model):
prediction = model.predict(X_test)
prediction = scaler.inverse_transform(prediction)
return prediction
prediction_gru = prediction(model_gru)
prediction_bilstm = prediction(model_bilstm)

Related

fitting keras model for cat and dog image classification takes 50 minutes at each epoch. any way i can reduce time?

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
image_size = (180, 180)
batch_size = 32
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
"PetImages",
validation_split=0.2,
subset="training",
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
"PetImages",
validation_split=0.2,
subset="validation",
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
]
)
train_ds = train_ds.prefetch(buffer_size=32)
val_ds = val_ds.prefetch(buffer_size=32)
def make_model(input_shape, num_classes):
inputs = keras.Input(shape=input_shape)
# Image augmentation block
x = data_augmentation(inputs)
# Entry block
x = layers.Rescaling(1.0 / 255)(x)
x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2D(64, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x # Set aside residual
for size in [128, 256, 512, 728]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
# Project residual
residual = layers.Conv2D(size, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
x = layers.SeparableConv2D(1024, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.GlobalAveragePooling2D()(x)
if num_classes == 2:
activation = "sigmoid"
units = 1
else:
activation = "softmax"
units = num_classes
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(units, activation=activation)(x)
return keras.Model(inputs, outputs)
model = make_model(input_shape=image_size + (3,), num_classes=2)
keras.utils.plot_model(model, show_shapes=True)
epochs = 50
callbacks = [
keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
]
model.compile(
optimizer=keras.optimizers.Adam(1e-3),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
train_ds, epochs=epochs, callbacks=callbacks, validation_data=val_ds,
)
So the strategy was to begin the model with the data_augmentation preprocessor, followed by a Rescaling layer and a dropout layer before the final classification layer as shown in the make_model function
for training the model as you can see I set epochs=50 and used buffered prefetching for my input data as it would yield data from disk without having I/O blocking. As for the rest of the parameters I think it was pretty standard. nothing too complicated but when I run my code each epoch is taking approximately 40 minutes and I don't know why.
Any suggestions?

Fit Amplitude (Frequency response) of a capacitor with lmfit

I am trying to fit measured data with lmfit.
My goal is to get the parameters of the capacitor with an equivalent circuit diagram.
So, I want to create a model with parameters (C, R1, L1,...) and fit it to the measured data.
I know that the resonance frequency is at the global minimum and there must also be R1. Also known is C.
So I could fix the parameter C and R1. With the resonance frequency I could calculate L1 too.
I created the model, but the fit doesn't work right.
Maybe someone could help me with this.
Thanks in advance.
from lmfit import minimize, Parameters
from lmfit import report_fit
params = Parameters()
params.add('C', value = 220e-9, vary = False)
params.add('L1', value = 0.00001, min = 0, max = 0.1)
params.add('R1', value = globalmin, vary = False)
params.add('Rp', value = 10000, min = 0, max = 10e20)
params.add('Cp', value = 0.1, min = 0, max = 0.1)
def get_elements(params, freq, data):
C = params['C'].value
L1 = params['L1'].value
R1 = params['R1'].value
Rp = params['Rp'].value
Cp = params['Cp'].value
XC = 1/(1j*2*np.pi*freq*C)
XL = 1j*2*np.pi*freq*L1
XP = 1/(1j*2*np.pi*freq*Cp)
Z1 = R1 + XC*Rp/(XC+Rp) + XL
real = np.real(Z1*XP/(Z1+XP))
imag = np.imag(Z1*XP/(Z1+XP))
model = np.sqrt(real**2 + imag**2)
#model = np.sqrt(R1**2 + ((2*np.pi*freq*L1 - 1/(2*np.pi*freq*C))**2))
#model = (np.arctan((2*np.pi*freq*L1 - 1/(2*np.pi*freq*C))/R1)) * 360/((2*np.pi))
return data - model
out = minimize(get_elements, params , args=(freq, data))
report_fit(out)
#make reconstruction for plotting
C = out.params['C'].value
L1 = out.params['L1'].value
R1 = out.params['R1'].value
Rp = out.params['Rp'].value
Cp = out.params['Cp'].value
XC = 1/(1j*2*np.pi*freq*C)
XL = 1j*2*np.pi*freq*L1
XP = 1/(1j*2*np.pi*freq*Cp)
Z1 = R1 + XC*Rp/(XC+Rp) + XL
real = np.real(Z1*XP/(Z1+XP))
imag = np.imag(Z1*XP/(Z1+XP))
reconst = np.sqrt(real**2 + imag**2)
reconst_phase = np.arctan(imag/real)* 360/(2*np.pi)
'''
PLOTTING
'''
#plot of filtred signal vs measered data (AMPLITUDE)
fig = plt.figure(figsize=(40,15))
file_title = 'Measured Data'
plt.subplot(311)
plt.xscale('log')
plt.yscale('log')
plt.xlim([min(freq), max(freq)])
plt.ylabel('Amplitude')
plt.xlabel('Frequency in Hz')
plt.grid(True, which="both")
plt.plot(freq, z12_fac, 'g', alpha = 0.7, label = 'data')
#Plot Impedance of model in magenta
plt.plot(freq, reconst, 'm', label='Reconstruction (Model)')
plt.legend()
#(PHASE)
plt.subplot(312)
plt.xscale('log')
plt.xlim([min(freq), max(freq)])
plt.ylabel('Phase in °')
plt.xlabel('Frequency in Hz')
plt.grid(True, which="both")
plt.plot(freq, z12_deg, 'g', alpha = 0.7, label = 'data')
#Plot Phase of model in magenta
plt.plot(freq, reconst_phase, 'm', label='Reconstruction (Model)')
plt.legend()
plt.savefig(file_title)
plt.close(fig)
measured data
equivalent circuit diagram (model)
Edit 1:
Fit-Report:
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 28
# data points = 4001
# variables = 3
chi-square = 1197180.70
reduced chi-square = 299.444897
Akaike info crit = 22816.4225
Bayesian info crit = 22835.3054
## Warning: uncertainties could not be estimated:
L1: at initial value
Rp: at boundary
Cp: at initial value
Cp: at boundary
[[Variables]]
C: 2.2e-07 (fixed)
L1: 1.0000e-05 (init = 1e-05)
R1: 0.06375191 (fixed)
Rp: 0.00000000 (init = 10000)
Cp: 0.10000000 (init = 0.1)
Edit 2:
Data can be found here:
https://1drv.ms/u/s!AsLKp-1R8HlZhcdlJER5T7qjmvfmnw?e=r8G2nN
Edit 3:
I now have simplified my model to a simple RLC-series. With a another set of data this works pretty good. see here the plot with another set of data
def get_elements(params, freq, data):
C = params['C'].value
L1 = params['L1'].value
R1 = params['R1'].value
#Rp = params['Rp'].value
#Cp = params['Cp'].value
#k = params['k'].value
#freq = np.log10(freq)
XC = 1/(1j*2*np.pi*freq*C)
XL = 1j*2*np.pi*freq*L1
# XP = 1/(1j*2*np.pi*freq*Cp)
# Z1 = R1*k + XC*Rp/(XC+Rp) + XL
# real = np.real(Z1*XP/(Z1+XP))
# imag = np.imag(Z1*XP/(Z1+XP))
Z1 = R1 + XC + XL
real = np.real(Z1)
imag= np.imag(Z1)
model = np.sqrt(real**2 + imag**2)
return np.sqrt(np.real(data)**2+np.imag(data)**2) - model
out = minimize(get_elements, params , args=(freq, data))
Report:
Chi-Square is really high...
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 25
# data points = 4001
# variables = 2
chi-square = 5.0375e+08
reduced chi-square = 125968.118
Akaike info crit = 46988.8798
Bayesian info crit = 47001.4684
[[Variables]]
C: 3.3e-09 (fixed)
L1: 5.2066e-09 +/- 1.3906e-08 (267.09%) (init = 1e-05)
R1: 0.40753691 +/- 24.5685882 (6028.56%) (init = 0.05)
[[Correlations]] (unreported correlations are < 0.100)
C(L1, R1) = -0.174
With my originally set of data I get this:
plot original data (complex)
Which is not bad, but also not good. That's why I want to make my model more detailed, so I can fit also in higher frequency regions...
Report of this one:
[[Fit Statistics]]
# fitting method = leastsq
# function evals = 25
# data points = 4001
# variables = 2
chi-square = 109156.170
reduced chi-square = 27.2958664
Akaike info crit = 13232.2473
Bayesian info crit = 13244.8359
[[Variables]]
C: 2.2e-07 (fixed)
L1: 2.3344e-08 +/- 1.9987e-10 (0.86%) (init = 1e-05)
R1: 0.17444702 +/- 0.29660571 (170.03%) (init = 0.05)
Please note: I also have changed the input data of the model. Now I give the model complex values and then I calculate the Amplitude. Find this also here: https://1drv.ms/u/s!AsLKp-1R8HlZhcdlJER5T7qjmvfmnw?e=qnrZk1

Emotion detection using facial landmarks

I plan on using scikit svm for class prediction.
I have been trying this :
Get images from a webcam
Detect Facial Landmarks
Train a machine learning algorithm (we will use a linear SVM)
Predict emotions
I have a problem in this line : clf.fit(npar_train, training_labels)
also I have a problem in site-packages\sklearn\svm_base.py and in site-packages\sklearn\utils\validation.py
How can I remove this error?
thank you in advance
python script
emotions = ['neutral', 'sad', 'happy', 'anger']
data={}
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
clf = SVC(kernel='linear', probability=True, tol=1e-3)
def get_files(emotion):
files = glob.glob('img\\datasets\\%s\\*' %emotion)
random.shuffle(files)
training = files[:int(len(files)*0.8)]
prediction = files[-int(len(files)*0.2)]
return training, prediction
def get_landmarks(image):
detections = detector(image, 1)
for k, d in enumerate(detections): # For all detected face instances individually
shape = predictor(image, d) # Draw Facial Landmarks with the predictor class
xlist = []
ylist = []
for i in range(1, 68): # Store X and Y coordinates in two lists
xlist.append(float(shape.part(i).x))
ylist.append(float(shape.part(i).y))
xmean = np.mean(xlist)
ymean = np.mean(ylist)
xcentral = [(x - xmean) for x in xlist]
ycentral = [(y - ymean) for y in ylist]
landmarks_vectorised = []
for x, y, w, z in zip(xcentral, ycentral, xlist, ylist):
landmarks_vectorised.append(w)
landmarks_vectorised.append(z)
meannp = np.asarray((ymean, xmean))
coornp = np.asarray((z, w))
dist = np.linalg.norm(coornp - meannp)
landmarks_vectorised.append(dist)
landmarks_vectorised.append((math.atan2(y, x) * 360) / (2 * math.pi))
data['landmarks_vectorised'] = landmarks_vectorised
if len(detections) < 1:
data['landmarks_vestorised'] = "error"
def make_sets():
training_data = []
training_labels = []
prediction_data = []
prediction_labels = []
for emotion in emotions:
print("Working on %s emotion" %emotion)
training, prediction = get_files(emotion)
for item in training:
image = cv2.imread(item)
try:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
except:
print()
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
clahe_image = clahe.apply(image)
get_landmarks(clahe_image)
if data['landmarks_vectorised'] == "error":
print("no face detected on this one")
else:
training_data.append(data['landmarks_vectorised']) # append image array to training data list
training_labels.append(emotions.index(emotion))
for item in prediction:
image = cv2.imread(item)
try:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
except:
print()
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
clahe_image = clahe.apply(image)
get_landmarks(clahe_image)
if data['landmarks_vectorised'] == "error":
print("no face detected on this one")
else:
prediction_data.append(data['landmarks_vectorised'])
prediction_labels.append(emotions.index(emotion))
return training_data, training_labels, prediction_data, prediction_labels
accur_lin = []
for i in range(0,10):
print("Making sets %s" % i) # Make sets by random sampling 80/20%
training_data, training_labels, prediction_data, prediction_labels = make_sets()
npar_train = np.array(training_data)
npar_trainlabs = np.array(training_labels)
print("training SVM linear %s" % i) # train SVM
clf.fit(npar_train, training_labels)
print("getting accuracies %s" % i)
npar_pred = np.array(prediction_data)
pred_lin = clf.score(npar_pred, prediction_labels)
print("Mean value lin svm: %s" % np.mean(accur_lin))

not enough values to unpack (expected 2, got 1) adaboost algorithm

def adaboost(X_train, Y_train, X_test, Y_test, lamb=0.01, num_iterations=200, learning_rate=0.001):
label_train = 2*Y_train -1
label_test = 2*Y_test -1
[n,p] = X_train.shape
[ntest, ptest] = X_test.shape
X_train_1 = np.concatenate((np.ones([n,1]), X_train), axis=1)
X_test_1 = np.concatenate((np.ones([ntest,1]), X_test), axis=1)
beta = np.zeros([p+1])
acc_train = []
acc_test = []
#margins = []
for it in range(num_iterations):
score = np.matmul(X_train_1, beta)
error = (score*label_train < 1)
dbeta = np.mean(X_train_1 * (error * label_train).reshape(-1,1), axis=0)
beta += learning_rate * dbeta
beta[1:] -= lamb * beta[1:]
#margins.append(np.min(score*label_train))
# train
predict = (np.sign(score) == label_train)
acc = np.sum(predict)/n
acc_train.append(acc)
# test
score_test = np.matmul(X_test_1, beta)
predict = (np.sign(score_test) == label_test)
acc = np.sum(predict)/ntest
acc_test.append(acc)
return beta, acc_train, acc_test
I am calling this function by:
_, train_acc, test_acc = adaboost(X_train, y_train, X_test, y_test)
and it is giving the error provided in title:
for line 68 '''[ntest, ptest] = X_test.shape'''
Any idea how to stop getting this error?
Can someone explain what I am doing wrong??
Whatever X_test is, it must have only a single dimension when it should be two dimensional

Tensorflow/Keras: volatile validation loss

I've been training a U-Net for single class small lesion segmentation, and have been getting consistently volatile validation loss. I have about 20k images split 70/30 between training and validation sets-so I don't think the issue is too little data. I've tried shuffling and resplitting the sets a few times with no change in volatility-so I don't think the validation set is unrepresentative. I have tried lowering the learning rate with no effect on volatility. And I have tried a few loss functions (dice coefficient, focal tversky, weighted binary cross-entropy). I'm using a decent amount of augmentation so as to avoid overfitting. I've also run through all my data (512x512 float64s with corresponding 512x512 int64 masks--both stored as numpy arrays) do double check that the value range, dtypes, etc. aren't screwy...and I even removed any ROIs in the masks under 35 pixels in area which I thought might be artifact and messing with loss.
I'm using keras ImageDataGen.flow_from_directory...I was initially using zca_whitening and brightness_range augmentation but I think this causes issues with flow_from_directory and the link between mask and image being lost.. so I skipped this.
I've tried validation generators with and without shuffle=True. Batch size is 8.
Here's some of my code, happy to include more if it would help:
# loss
from keras.losses import binary_crossentropy
import keras.backend as K
import tensorflow as tf
epsilon = 1e-5
smooth = 1
def dsc(y_true, y_pred):
smooth = 1.
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
loss = 1 - dsc(y_true, y_pred)
return loss
def bce_dice_loss(y_true, y_pred):
loss = binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
return loss
def confusion(y_true, y_pred):
smooth=1
y_pred_pos = K.clip(y_pred, 0, 1)
y_pred_neg = 1 - y_pred_pos
y_pos = K.clip(y_true, 0, 1)
y_neg = 1 - y_pos
tp = K.sum(y_pos * y_pred_pos)
fp = K.sum(y_neg * y_pred_pos)
fn = K.sum(y_pos * y_pred_neg)
prec = (tp + smooth)/(tp+fp+smooth)
recall = (tp+smooth)/(tp+fn+smooth)
return prec, recall
def tp(y_true, y_pred):
smooth = 1
y_pred_pos = K.round(K.clip(y_pred, 0, 1))
y_pos = K.round(K.clip(y_true, 0, 1))
tp = (K.sum(y_pos * y_pred_pos) + smooth)/ (K.sum(y_pos) + smooth)
return tp
def tn(y_true, y_pred):
smooth = 1
y_pred_pos = K.round(K.clip(y_pred, 0, 1))
y_pred_neg = 1 - y_pred_pos
y_pos = K.round(K.clip(y_true, 0, 1))
y_neg = 1 - y_pos
tn = (K.sum(y_neg * y_pred_neg) + smooth) / (K.sum(y_neg) + smooth )
return tn
def tversky(y_true, y_pred):
y_true_pos = K.flatten(y_true)
y_pred_pos = K.flatten(y_pred)
true_pos = K.sum(y_true_pos * y_pred_pos)
false_neg = K.sum(y_true_pos * (1-y_pred_pos))
false_pos = K.sum((1-y_true_pos)*y_pred_pos)
alpha = 0.7
return (true_pos + smooth)/(true_pos + alpha*false_neg + (1-alpha)*false_pos + smooth)
def tversky_loss(y_true, y_pred):
return 1 - tversky(y_true,y_pred)
def focal_tversky(y_true,y_pred):
pt_1 = tversky(y_true, y_pred)
gamma = 0.75
return K.pow((1-pt_1), gamma)
model = BlockModel((len(os.listdir(os.path.join(imageroot,'train_ct','train'))), 512, 512, 1),filt_num=16,numBlocks=4)
#model.compile(optimizer=Adam(learning_rate=0.001), loss=weighted_cross_entropy)
#model.compile(optimizer=Adam(learning_rate=0.001), loss=dice_coef_loss)
model.compile(optimizer=Adam(learning_rate=0.001), loss=focal_tversky)
train_mask = os.path.join(imageroot,'train_masks')
val_mask = os.path.join(imageroot,'val_masks')
model.load_weights(model_weights_path) #I'm initializing with some pre-trained weights from a similar model
data_gen_args_mask = dict(
rotation_range=10,
shear_range=20,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=[0.8,1.2],
horizontal_flip=True,
#vertical_flip=True,
fill_mode='nearest',
data_format='channels_last'
)
data_gen_args = dict(
**data_gen_args_mask
)
image_datagen_train = ImageDataGenerator(**data_gen_args)
mask_datagen_train = ImageDataGenerator(**data_gen_args)#_mask)
image_datagen_val = ImageDataGenerator()
mask_datagen_val = ImageDataGenerator()
seed = 1
BS = 8
steps = int(np.floor((len(os.listdir(os.path.join(train_ct,'train'))))/BS))
print(steps)
val_steps = int(np.floor((len(os.listdir(os.path.join(val_ct,'val'))))/BS))
print(val_steps)
train_image_generator = image_datagen_train.flow_from_directory(
train_ct,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
train_mask_generator = mask_datagen_train.flow_from_directory(
train_mask,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
val_image_generator = image_datagen_val.flow_from_directory(
val_ct,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
val_mask_generator = mask_datagen_val.flow_from_directory(
val_mask,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
train_generator = zip(train_image_generator, train_mask_generator)
val_generator = zip(val_image_generator, val_mask_generator)
# make callback for checkpointing
plot_losses = PlotLossesCallback(skip_first=0,plot_extrema=False)
%matplotlib inline
filepath = os.path.join(versionPath, model_version + "_saved-model-{epoch:02d}-{val_loss:.2f}.hdf5")
if reduce:
cb_check = [ModelCheckpoint(filepath,monitor='val_loss',
verbose=1,save_best_only=False,
save_weights_only=True,mode='auto',period=1),
reduce_lr,
plot_losses]
else:
cb_check = [ModelCheckpoint(filepath,monitor='val_loss',
verbose=1,save_best_only=False,
save_weights_only=True,mode='auto',period=1),
plot_losses]
# train model
history = model.fit_generator(train_generator, epochs=numEp,
steps_per_epoch=steps,
validation_data=val_generator,
validation_steps=val_steps,
verbose=1,
callbacks=cb_check,
use_multiprocessing = False
)
And here's how my loss looks:
Another potentially relevant thing: I tweaked the flow_from_directory code a bit (added npy to the white list). But training loss looks fine so assuming the issue isnt here
Two suggestions:
Switch to the classic validation data format (i.e. numpy array) instead of using a generator -- this will ensure you always use the exactly same validation data every time. If you see a different validation curve, then there is something "random" in the validation generator giving you different data at different epochs.
Use a fixed set of samples (100 or 1000 should be enough w/o any data augmentation) for both training and validation. If everything goes well, you should see your network quickly overfit to this dataset and your training and validation curves should very much similar. If not, debug your network.

Resources