I'm making a model for image denoising and use ImageDataGenerator.flow_from_directory to load the dataset. It is structured in two folders, one with noisy input images and one with the corresponding clean images. I want the generator to use the images in the first folder as inputs and the other folder as "labels"/ground truth.
With the method I'm using right now all images in both folders are treated as inputs with the folder name as label. I can extract the images manually by selecting specific batches and train on that, but it's inconvenient and probably wasn't intended to be used that way.
What is the proper way of doing this? There probably is a function for this but I can't find it.
Had similar problem. Found it necessary to create a custom generator to feed the images into model.fit. Code (rather lengthy) is posted below.
import os
import pandas as pd
import numpy as np
import glob
import cv2
from sklearn.model_selection import train_test_split
def create_df(image_dir, label_dir, shuffle=True):
path=image_dir + '/**/*'
image_file_paths=glob.glob(path,recursive=True)
path=label_dir + '/**/*'
label_file_paths=glob.glob(path,recursive=True)
# run a check and make sure filename without extensions match
df=pd.DataFrame({'image': image_file_paths, 'label':label_file_paths}).astype(str)
if shuffle:
df=df.sample(frac=1.0, replace=False, weights=None, random_state=123, axis=0).reset_index(drop=True)
return df
class jpgen():
batch_index=0 #tracks the number of batches generated
def __init__(self, df, train_split=None, test_split=None):
self.train_split=train_split # float between 0 and 1 indicating the percentage of images to use for training
self.test_split=test_split
self.df=df.copy() # create a copy of the data frame
if self.train_split != None: # split the df to create a training df
self.train_df, dummy_df=train_test_split(self.df, train_size=self.train_split, shuffle=False)
if self.test_split !=None: # create as test set and a validation set
t_split=self.test_split/(1.0-self.train_split)
self.test_df, self.valid_df=train_test_split(dummy_df, train_size=t_split, shuffle=False)
self.valid_gen_len=len(self.valid_df['image'].unique())# create var to return no of samples in valid generator
self.valid_gen_filenames=list(self.valid_df['image'])# create list ofjpg file names in valid generator
else: self.test_df=dummy_df
self.test_gen_len=len(self.test_df['image'].unique())#create var to return no of test samples
self.test_gen_filenames=list(self.test_df['image']) # create list to return jpg file paths in test_gen
else:
self.train_df=self.df
self.tr_gen_len=len(self.train_df['image'].unique()) # crete variable to return no of samples in train generator
def flow(self, batch_size=32, image_shape=None,rescale=None,shuffle=True, subset=None ):
# flows batches of jpg images and png masks to model.fit
self.batch_size=batch_size
self.image_shape=image_shape
self.shuffle=shuffle
self.subset=subset
self.rescale=rescale
image_batch_list=[] # initialize list to hold a batch of jpg images
label_batch_list=[] # initialize list to hold batches of png masks
if self.subset=='training' or self.train_split ==None:
op_df=self.train_df
elif self.subset=='test':
op_df=self.test_df
else:
op_df=self.valid_df
if self.shuffle : # shuffle the op_df then rest the index
op_df=op_df.sample(frac=1.0, replace=False, weights=None, random_state=123, axis=0).reset_index(drop=True)
#op_df will be either train, test or valid depending on subset
# develop the batch of data
while True:
label_batch_list=[]
image_batch_list=[]
start=jpgen.batch_index * self.batch_size # set start value of iteration
end=start + self.batch_size # set end value of iteration to yield 1 batch of data of length batch_size
sample_count=len(op_df['image'])
for i in range(start, end): # iterate over one batch size of data
j=i % sample_count # used to roll the images back to the front if the end is reached
k=j % self.batch_size
path_to_image= op_df.iloc[j]['image']
path_to_label= op_df.iloc[j] ['label']
label_image=cv2.imread(path_to_label, -1) # read unchanged to preserve 4 th channel print (png_image.)
label_image= cv2.cvtColor(label_image, cv2.COLOR_BGR2RGB)
image_image=cv2.imread(path_to_image)
image_image= cv2.cvtColor(image_image, cv2.COLOR_BGR2RGB)
label_image=cv2.resize(label_image, self.image_shape)
image_image=cv2.resize(image_image, self.image_shape )
if rescale !=None:
label_image=label_image * self.rescale
image_image=image_image * self.rescale
label_batch_list.append(label_image)
image_batch_list.append(image_image)
image_array=np.array(image_batch_list)
label_array=np.array(label_batch_list)
jpgen.batch_index +=1
yield (image_array, label_array)
Code below shows how to use the functions above to make generators for model.fit
image_dir=r'C:\Temp\gen_test\images'# directory with clean images
label_dir=r'C:\Temp\gen_test\labels' # directory with noisy images file names same as filenames in clean dir
shuffle=False # if True shuffles the dataframe
df=create_df(image_dir, label_dir ,shuffle) # create a dataframe with columns 'images' , 'labels'
# where labels are the noisy images
train_split=.8 # use 80% of files for training
test_split=.1 # use 10% for test, automatically sets validation split at 1-train_split-test_split
batch_size=32 # set batch_size
height=224 # set image height for generator output images and labels
width=224 # set image width for generator output images and labels
channels=3 # set number of channel in images
image_shape=(height, width)
rescale=1/255 # set value to rescale image pixels
gen=jpgen(df, train_split=train_split, test_split=test_split) # create instance of generator class
tr_gen_len=gen.tr_gen_len
test_gen_len= gen.test_gen_len
valid_gen_len=gen.valid_gen_len
test_filenames=gen.test_gen_filenames # names of test file paths used for training
train_steps=tr_gen_len//batch_size # use this value in for steps_per_epoch in model.fit
valid_steps=valid_gen_len//batch_size # use this value for validation_steps in model.fit
test_steps=test_gen_len//batch_size # use this value for steps in model.predict
# instantiate generators
train_gen=gen.flow(batch_size=batch_size, image_shape=image_shape, rescale=rescale, shuffle=False, subset='training')
valid_gen=gen.flow(batch_size=batch_size, image_shape=image_shape, rescale=rescale, shuffle=False, subset='valid')
test_gen=gen.flow(batch_size=batch_size, image_shape=image_shape, rescale=rescale, shuffle=False, subset='test')
Build your model then use
history=model.fit(train_gen, epochs=epochs, steps_per_epoch=train_steps,validation_data=valid_gen,
validation_steps=valid_steps, verbose=1, shuffle=True)
predictions=model.predict(test_gen, steps=test_steps)
Related
I have some folders containing many jpg pictures (number depends on the folder)
I would like for instance to combine every 4 pict** together with the title of the image (see pict below).
(In case there is not exactly 4 image on the last sequence, I should get the number of left picture such as 3 2 or 1)
**Ideally I could change that number to other numbers like 5 6 10 (the number I chose would depend on the context) and I could chose the number of columns (I showed 2 column in my example below)
How can i perform this on Linux command or any Linux free/open-source software?
As I did not find what I want I created my own python code to solve this (it's probably not the most perfects script of the century but it works)
"""
Prints a collage according to desired number of column and rows with title of file
Instruction
1. Put all jpg picture in same folder [tested sucessfully on 12mb per pict]
2. select desired columns in NO_COL
3. select desired rowsin in NO_ROW
4. run the script which will output the collage with <cur_date>_export.png files
"""
#import libraries
import time
import os
import imageio as iio
from matplotlib import pyplot as plt
def render_collage(pict_file_name_list):
""" create one collage """
fig = plt.figure(figsize=(40, 28)) #change if needed
cnt = 1
for cur_img_name in pict_file_name_list:
img_var = iio.imread(cur_img_name)
fig.add_subplot(NO_COL, NO_ROW, cnt)
plt.imshow(img_var)
plt.axis('off')
plt.title(cur_img_name, fontsize = 30) #change if needed
cnt = cnt + 1
cur_date = time.strftime("%Y-%m-%d--%H-%M-%s")
fig.savefig(cur_date+'_export.png')
NO_COL = 3
NO_ROW = 3
NBR_IMG_COLLAGE = NO_COL * NO_ROW
img_list_name = [elem for elem in os.listdir() if 'jpg' in elem] #keep only file having .jpg
while len(img_list_name) >= 1:
sub_list = img_list_name[:NBR_IMG_COLLAGE]
render_collage(sub_list)
del img_list_name[:NBR_IMG_COLLAGE]
I have a convolutional neural network. And I wanted to train it on images from the training set but first they should be wrapped with my function change(tensor, float) that takes in a tensor/image of the form [hight,width,3] and a float.
Batch size =4
loading data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=2)
Cnn architecture
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
#size of inputs [4,3,32,32]
#size of labels [4]
inputs = change(inputs,0.1) <----------------------------
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs) #[4, 10]
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
I am trying to apply the image function change but it gives an object error.
it there a quick way to fix it?
I am using a Julia function but it works completely fine with other objects. Error message:
JULIA: MethodError: no method matching copy(::PyObject)
Closest candidates are:
copy(!Matched::T) where T<:SHA.SHA3_CTX at /opt/julia-1.7.2/share/julia/stdlib/v1.7/SHA/src/types.jl:213
copy(!Matched::T) where T<:SHA.SHA2_CTX at /opt/julia-1.7.2/share/julia/stdlib/v1.7/SHA/src/types.jl:212
copy(!Matched::Number) at /opt/julia-1.7.2/share/julia/base/number.jl:113
I would recommend to put change function to transforms list, so you do data changes on transformation stage.
partial from functools will help you to fix number of arguments, like this:
from functools import partial
def change(input, float):
pass
# Use partial to fix number of params, such that change accepts only input
change_partial = partial(change, float=pass_float_value_here)
# Add change_partial to a list of transforms before or after converting to tensors
transforms = Compose([
RandomResizedCrop(img_size), # example
# Add change_partial here if it operates on PIL Image
change_partial,
ToTensor(), # convert to tensor
# Add change_partial here if it operates on torch tensors
change_partial,
])
I have a JPEG image. I would like to convert the image to a TIF with JPEG compression (i.e. a "JPEG-in-TIF"). I am using GDAL to do this.
I require that the array representations of the original JPEG and converted TIF are identical.
I have found a method to do this but I am trying to understand a.) why it appears to work and b.) whether it will fail or have significant additional downsides. (If there is an alternative, sure-fire method for accomplishing this task then that would also be a great answer.)
What I have found so far
I found that doing the following command results in a TIF where perhaps 5% of pixels are different (full code sample below):
gdal_translate in.jpg out.tif -co COMPRESS=JPEG
The GDAL docs say that this command will result in a "lossless" conversion. I take it that "lossless" here is not the same thing as a guarantee that the arrays will be identical?
Specifying the block sizes to be equal to the size of the image appears to work?
If I additionally specify block size arguments in the conversion command, then the resulting arrays are identical (in my sample images):
# Suppose that in.jpg is 400x245
gdal_translate in.jpg out.tif -co COMPRESS=JPEG -co BLOCKXSIZE=400 -co BLOCKYSIZE=245
My (very fuzzy) best-guess intuition right now is that specifying this large block size prevents compression of the individual blocks? However,
The docs state that lossless compression will occur "without decompression and compression cycles." So no compression should actually be occurring?
I am confused as to why gdal_translate is happy to accept BLOCKYSIZE=245 when 245 is not a multiple of 8?
The resulting out.tif has 400x245 block(s) for each band. This appears to negatively affect read time in the resulting image.
Block size is only necessary when SOURCE_COLOR_SPACE is not RGB?
In further experiments (see demo code) it appears that - if an image has RGB color space - then it is sufficient to specify -co COMPRESS=JPEG and nothing further in the conversion command.
Demo code
Here is some python code to demonstrate actual commands:
import os
from typing import Tuple
import requests
import numpy as np
import gdal
def read_image_gdal(filepath:str) -> np.ndarray:
try:
f = gdal.Open(filepath)
arr = f.ReadAsArray()
return arr
finally:
f = None
del f
def get_image_size(filepath:str) -> Tuple[int,int]:
try:
f = gdal.Open(filepath)
return f.RasterXSize, f.RasterYSize
finally:
f = None
del f
# Ex. 1 Medium-sized image (400x245) with YCbCr color space ~> TIF_0 is different
SAMPLE_JPEG_URL= 'https://jpeg.org/images/about.jpg'
# # Ex. 2 Small (50x50) image with RGB Colorspace ~> TIF_0 is the SAME
# SAMPLE_JPEG_URL = 'https://raw.githubusercontent.com/OSGeo/gdal/0402b86928e09860e6d24215b6f5611c31fa3c69/autotest/gdrivers/data/jpeg/rgbsmall_rgb.jpg'
# # Ex. 3 Small (50x50) image but has CMYK color space. ~> TIF_0 is different (But see e.g. https://gdal.org/drivers/raster/gtiff.html#raw-mode)
# SAMPLE_JPEG_URL = 'https://raw.githubusercontent.com/OSGeo/gdal/master/autotest/gdrivers/data/jpeg/rgb_ntf_cmyk.jpg'
SAMPLE_JPEG_FILEPATH = "sample_file.jpg"
# Download sample JPEG file from GDAL github
res = requests.get(SAMPLE_JPEG_URL)
with open(SAMPLE_JPEG_FILEPATH, "wb") as file:
file.write(res.content)
# ###########################
# EXPERIMENT: Convert JPEG to TIF with JPEG compression + settings.
# GOAL: TIF's array is same as original JPEG.
# ###########################
# Create a JPEG-in-TIF
TIF_0_FILEPATH = "sample_tif_0.tif"
os.system(f"gdal_translate -co COMPRESS=JPEG {SAMPLE_JPEG_FILEPATH} {TIF_0_FILEPATH}")
# JPEG-in-TIF BUT specify block size explicitly as the size of the image.
TIF_BS_FILEPATH = "sample_tif_bs.tif"
xs, ys = get_image_size(SAMPLE_JPEG_FILEPATH)
print(f"Size of image: {xs}x{ys}")
os.system(f"gdal_translate -co COMPRESS=JPEG -co BLOCKXSIZE={xs} -co BLOCKYSIZE={ys} {SAMPLE_JPEG_FILEPATH} {TIF_BS_FILEPATH}")
# Read and compare resulting arrays
arr = read_image_gdal(SAMPLE_JPEG_FILEPATH)
arr_0 = read_image_gdal(TIF_0_FILEPATH)
arr_bs = read_image_gdal(TIF_BS_FILEPATH)
print("Share Pixels different")
print(" JPEG-in-TIF: ", (arr != arr_0).mean())
print(" Explicit block size: ", (arr != arr_bs).mean())
Output is:
Size of image: 400x245
Share Pixels different
JPEG-in-TIF: 0.051727891156462584
Explicit block size: 0.0
The two additional SAMPLE_JPEG_URLs try to suss out whether the SOURCE_COLOR_SPACE matters. (An alternative explanation is that example #2 is a small image and that this affects the results.)
I am new to programming and used Google OR-tools to create my VRP model. In my current model, I have included a general time window and capacity constraint per vehicle, creating a capacitated vehicle routing problem with time windows. I followed the OR-tools guides which contains a maximum travel duration for each vehicle.
However, I want to include a maximum travel duration for the sum of all routes, whereas the maximum travel duration for each vehicle does not matter (so I set it to 100.000). Accorddingly, I want to create something in the model/solution printer that tells me which amount of addresses could not be visited due to the constraint on the maximum travel duration for the sum of all routes. From the examples I have seen I think it would be kind of easy, but my knowledge on programming is fairly limited, so my attempts had no succes. Can anyone help me?
import pandas as pd
import openpyxl
import numpy as np
import math
from random import sample
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
from scipy.spatial.distance import squareform, pdist
from haversine import haversine
#STEP - create data
# import/read excel file
data = pd.read_excel(r'C:\Users\Jean-Paul\Documents\Thesis\OR TOOLS\Data.xlsx', engine = 'openpyxl')
df = pd.DataFrame(data, columns= ['number','lat','lng']) # create dataframe with 10805 addresses + address of the depot
#print (df)
# randomly sample X addresses from the dataframe and their corresponding number/latitude/longtitude
df_sample = df.sample(n=100)
#print (df_data)
# read first row of the excel file (= coordinates of the depot)
df_depot = pd.DataFrame(data, columns= ['number','lat','lng']).iloc[0:1]
#print (df_depot)
# combine dataframe of depot and sample into one dataframe
df_data = pd.concat([df_depot, df_sample], ignore_index=True, sort=False)
#print (df_data)
#STEP - create distance matrix data
# determine distance between latitude and longtitude
df_data.set_index('number', inplace=True)
matrix_distance = pd.DataFrame(squareform(pdist(df_data, metric=haversine)), index=df_data.index, columns=df_data.index)
matrix_list = np.array(matrix_distance)
#print (matrix_distance) # create table of distances between addresses including headers
#print (matrix_list) # converting table to list of lists and exclude headers
#STEP - create time matrix data
travel_time = matrix_list / 15 * 60 # divide distance by travel speed 20 km/h and multiply by 60 minutes
#print (travel_time) # converting distance matrix to travel time matrix
#STEP - create time window data
# create list for each sample - couriers have to visit this address within 0-X minutes of time using a list of lists
window_range = []
for i in range(len(df_data)):
list = [0, 240]
window_range.append(list) # create list of list with a time window range for each address
#print (window_range)
#STEP - create demand data
# create list for each sample - all addresses demand 1 parcel except the depot
demand_range = []
for i in range(len(df_data.iloc[0:1])):
list = 0
demand_range.append(list)
for j in range(len(df_data.iloc[1:])):
list2 = 1
demand_range.append(list2)
#print (demand_range)
#STEP - create fleet size data # amount of vehicles in the fleet
fleet_size = 6
#print (fleet_size)
#STEP - create capacity data for each vehicle
fleet_capacity = []
for i in range(fleet_size): # capacity per vehicle
list = 20
fleet_capacity.append(list)
#print (fleet_capacity)
#STEP - create data model that stores all data for the problem
def create_data_model():
data = {}
data['time_matrix'] = travel_time
data['time_windows'] = window_range
data['num_vehicles'] = fleet_size
data['depot'] = 0 # index of the depot
data['demands'] = demand_range
data['vehicle_capacities'] = fleet_capacity
return data
#STEP - creating the solution printer
def print_solution(data, manager, routing, solution):
"""Prints solution on console."""
print(f'Objective: {solution.ObjectiveValue()}')
time_dimension = routing.GetDimensionOrDie('Time')
total_time = 0
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
while not routing.IsEnd(index):
time_var = time_dimension.CumulVar(index)
plan_output += '{0} Time({1},{2}) -> '.format(
manager.IndexToNode(index), solution.Min(time_var),
solution.Max(time_var))
index = solution.Value(routing.NextVar(index))
time_var = time_dimension.CumulVar(index)
plan_output += '{0} Time({1},{2})\n'.format(manager.IndexToNode(index),
solution.Min(time_var),
solution.Max(time_var))
plan_output += 'Time of the route: {}min\n'.format(
solution.Min(time_var))
print(plan_output)
total_time += solution.Min(time_var)
print('Total time of all routes: {}min'.format(total_time))
#STEP - create the VRP solver
def main():
# instantiate the data problem
data = create_data_model()
# create the routing index manager
manager = pywrapcp.RoutingIndexManager(len(data['time_matrix']),
data['num_vehicles'], data['depot'])
# create routing model
routing = pywrapcp.RoutingModel(manager)
#STEP - create demand callback and dimension for capacity
# create and register a transit callback
def demand_callback(from_index):
"""Returns the demand of the node."""
# convert from routing variable Index to demands NodeIndex
from_node = manager.IndexToNode(from_index)
return data['demands'][from_node]
demand_callback_index = routing.RegisterUnaryTransitCallback(
demand_callback)
routing.AddDimensionWithVehicleCapacity(
demand_callback_index,
0, # null capacity slack
data['vehicle_capacities'], # vehicle maximum capacities
True, # start cumul to zero
'Capacity')
#STEP - create time callback
# create and register a transit callback
def time_callback(from_index, to_index):
"""Returns the travel time between the two nodes."""
# convert from routing variable Index to time matrix NodeIndex
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
return data['time_matrix'][from_node][to_node]
transit_callback_index = routing.RegisterTransitCallback(time_callback)
# define cost of each Arc (costs in terms of travel time)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
# STEP - create a dimension for the travel time (TIMEWINDOW) - dimension keeps track of quantities that accumulate over a vehicles route
# add time windows constraint
time = 'Time'
routing.AddDimension(
transit_callback_index,
2, # allow waiting time (does not have an influence in this model)
100000, # maximum total route lenght in minutes per vehicle (does not have an influence because of capacity constraint)
False, # do not force start cumul to zero
time)
time_dimension = routing.GetDimensionOrDie(time)
# add time window constraints for each location except depot
for location_idx, time_window in enumerate(data['time_windows']):
if location_idx == data['depot']:
continue
index = manager.NodeToIndex(location_idx)
time_dimension.CumulVar(index).SetRange(time_window[0], time_window[1])
# add time window constraint for each vehicle start node
depot_idx = data['depot']
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
time_dimension.CumulVar(index).SetRange(
data['time_windows'][depot_idx][0],
data['time_windows'][depot_idx][1])
#STEP - instantiate route start and end times to produce feasible times
for i in range(data['num_vehicles']):
routing.AddVariableMinimizedByFinalizer(
time_dimension.CumulVar(routing.Start(i)))
routing.AddVariableMinimizedByFinalizer(
time_dimension.CumulVar(routing.End(i)))
#STEP - setting default search parameters and a heuristic method for finding the first solution
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (
routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
#STEP - solve the problem with the serach parameters and print solution
solution = routing.SolveWithParameters(search_parameters)
if solution:
print_solution(data, manager, routing, solution)
if __name__ == '__main__':
main()
See #Mizux's answer, going under-the-hood in the solver to make a summation cost over all vehicle route lengths:
https://stackoverflow.com/a/68756570/13773745
I am trying to replicate the resnet18 paper. Before running this on the full Image Net dataset on disk, I'm doing some evaluation runs with the publicly available imagenette/320px dataset from TFDS (much much smaller subset of imagenet with 10 classes, already in .tfrecord format._
Note: the full notebook to do training and tracing is available here: resnet18_baseline.ipynb Just switch to a GPU runtime and run all the cells. It's already set-up with tensorboard profiling on the second batch. (You can use TPU as well, but some keras.layers.experimental.preprocessing layers do not support TPU ops yet and you have to enable soft device placement. Please use a GPU).
Input Operations
read images from the input dataset. These images usually have got different dimensions and we need some crop function because input tensors can not have different dimensions for batching. Therefore, for training I use random crop and for testing/validation datasets a center crop.
random_crop_layer = keras.layers.experimental.preprocessing.RandomCrop(224, 224)
center_crop_layer = keras.layers.experimental.preprocessing.CenterCrop(224, 224)
#tf.function(experimental_relax_shapes=True) # avoid retracing
def train_crop_fn(x, y):
return random_crop_layer(x), y
#tf.function(experimental_relax_shapes=True)
def eval_crop_fn(x, y):
return center_crop_layer(x), y
Perform some simple preprocessing/augmentations to the input data. These include rescaling to 0-1 and also scaling based on mean and stdev of the rgb colours on imagenet. Also, random
rescaling_layer = keras.layers.experimental.preprocessing.Rescaling(1./255)
train_preproc = keras.Sequential([
rescaling_layer
])
# from https://github.com/tensorflow/models/blob/master/official/vision/image_classification/preprocessing.py
# Calculated from the ImageNet training set
MEAN_RGB = (0.485 , 0.456, 0.406)
STDDEV_RGB = (0.229, 0.224, 0.225)
#tf.function
def z_score_scale(x):
return (x - MEAN_RGB) / STDDEV_RGB
#tf.function
def train_preproc_fn(x, y):
return z_score_scale(train_preproc(x)), y
#tf.function
def eval_preproc_fn(x, y):
return z_score_scale(eval_preproc(x)), y
Input Pipeline
def get_input_pipeline(input_ds, bs, crop_fn, augmentation_fn):
ret_ds = (
input_ds
.batch(1) # pre-crop are different dimensions and can't be batched
.map(crop_fn,
num_parallel_calls=tf.data.experimental.AUTOTUNE)
.unbatch()
.batch(bs)
.map(augmentation_fn, # augmentations can be batched though.
num_parallel_calls=tf.data.experimental.AUTOTUNE)
)
return ret_ds
# dataset loading
def load_imagenette():
train_ds, ds_info = tfds.load('imagenette/320px', split='train', as_supervised=True, with_info=True)
valid_ds = tfds.load('imagenette/320px', split='validation', as_supervised=True)
return train_ds, valid_ds, valid_ds, ds_info.features['label'].num_classes
# pipeline construction
train_ds, valid_ds, test_ds, num_classes = load_imagenette()
# datasets used for training (notice that I use prefetch here)
train_samples = get_input_pipeline(train_ds, BS, train_crop_fn, train_preproc_fn).prefetch(tf.data.experimental.AUTOTUNE)
valid_samples = get_input_pipeline(valid_ds, BS, eval_crop_fn, eval_preproc_fn).prefetch(tf.data.experimental.AUTOTUNE)
test_samples = get_input_pipeline(test_ds, BS, eval_crop_fn, eval_preproc_fn).prefetch(tf.data.experimental.AUTOTUNE)
Training and Profiling
I use tensorboard profiler to check the second batch size and I get a warning that this is highly input bound, with about 40% of processing wasted on inputs.
For a classic resnet18 model, you can drive the batch size up to 768 without getting a OOM error, which is what I use. A single step with bs 256 takes about 2-3 seconds.
I also get a warning that on_train_batch_size_end is slow, at around ~1.5 seconds, compared to the 1s batch time.
The model training code is very simple keras:
model.fit(
train_samples,
validation_data=valid_samples,
epochs=100,
batch_size=BS,
use_multiprocessing=True
callbacks=[tensorboard_callback, model_checkpoint_callback, early_stop_callback, reduce_lr_callback]
)
and the callbacks are specified as:
log_dir = os.path.join(os.getcwd(), 'logs')
tensorboard_callback = TensorBoard(log_dir=log_dir, update_freq="epoch", profile_batch=2)
reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=0.001, verbose=1)
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath='model.{epoch:02d}-{val_loss:.4f}.h5',
monitor='val_loss',
verbose=1,
save_best_only=True)
early_stop_callback = keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)
Lastly, here are some sample tensorboard profiling screenshots. I can't figure out how to make this run faster: