Select randomly percent of datasets in each percent of epochs - random

I have a model that should train with 25000 data in 50000 epochs.
I want to train with the percentage of datasets for a percentage of periods
for example it trains for 10 first epochs only 1000 random data than for 10 next epochs, 1000 random data.....
My source code in part of datalogger is as follows.
class DataModule(pl.LightningDataModule):
def __init__(self, train_dataset, val_dataset, batch_size = 2):
super(DataModule, self).__init__()
self.train_dataset = train_dataset
self.val_dataset = val_dataset
self.batch_size = batch_size
def train_dataloader(self):
return DataLoader(self.train_dataset, batch_size = self.batch_size,
collate_fn = collate_fn, shuffle = True, num_workers = 2, pin_memory = True)
def val_dataloader(self):
return DataLoader(self.val_dataset, batch_size = self.batch_size,
collate_fn = collate_fn, shuffle = False, num_workers = 2, pin_memory = True)
I understand below code could select random of dataset but I want to train the other data for next epochs too.
df_fraction= df_mydataset.sample(frac=0.04)
And I understand below code could select a random dataset but I don't know how it works. Because I should change data for each 10 epochs
train_sampler = SubsetRandomSampler(train_indices)
train_loader = torch.utils.data.DataLoader(dataset, batch_size=2, sampler=train_sampler)
How can I do that with batch_size=2?

Related

Major performance differences when using the tokenizer in huggingface

I'm using the dataset
from transformers import AutoTokenizer, AutoModelForTokenClassification, set_seed, AutoModelForMultipleChoice, AutoModelForMaskedLM, BertForSequenceClassification, AutoTokenizer, AutoModel
from datasets import load_dataset
mrpc = load_dataset('glue', 'mrpc')
mrpc_test = mrpc['test']
and the following tokenizer and model
baseline_model_name = 'Intel/bert-base-uncased-mrpc'
baseline_model = BertForSequenceClassification.from_pretrained(baseline_model_name).to(device)
baseline_tokenizer = AutoTokenizer.from_pretrained(baseline_model_name, truncation=True)
The issue is that I get one performance using
successes = 0
batch_size = 15
for sample in range(0,len(mrpc_test), batch_size):
tok = baseline_tokenizer(mrpc_test[sample:sample+batch_size]['sentence1'], mrpc_test[sample:sample+batch_size]['sentence2'], return_tensors="pt", padding=True)
logits = baseline_model(**tok.to(device)).logits
results_softmax = torch.softmax(logits, dim=1)
prediction = torch.argmax(results_softmax, dim=1)
labels = mrpc_test[sample:sample+batch_size]['label']
successes += torch.sum(torch.tensor(labels).to(device)==prediction)
print(f'final accuracy: {successes/len(mrpc_test)}')
>>>final accuracy: 0.8307245969772339
But different performance if I run
successes = 0
batch_size = 15
for sample in range(0,len(mrpc_test), batch_size):
sentences = [f'{s1} [SEP] {s2}' for s1, s2 in zip(mrpc_test[sample:sample+batch_size]['sentence1'],mrpc_test[sample:sample+batch_size]['sentence2'])]
tok = baseline_tokenizer(sentences, return_tensors="pt", padding=True)
logits = baseline_model(**tok.to(device)).logits
results_softmax = torch.softmax(logits, dim=1)
prediction = torch.argmax(results_softmax, dim=1)
labels = mrpc_test[sample:sample+batch_size]['label']
successes += torch.sum(torch.tensor(labels).to(device)==prediction)
print(f'final accuracy: {successes/len(mrpc_test)}')
>>>final accuracy: 0.49159419536590576
Note that the only difference is not sending the 2 sentences together to the tokenizer, and instead doing
sentences = [f'{s1} [SEP] {s2}' for s1, s2 in zip(mrpc_test[sample:sample+batch_size]['sentence1'],mrpc_test[sample:sample+batch_size]['sentence2'])]

I got error in code to train model - multivariant timeseries

I need help with multivariant time series forecasting using gru / lstm.
The dataset I am using about 4000 rows and 7 columns.
I already used this for input shaping
def create_dataset (X, look_back = 1):
Xs, ys = [], []
for i in range(len(X)-look_back):
v = X[i:i+look_back]
Xs.append(v)
ys.append(X[i+look_back][0])
return np.array(Xs), np.array(ys)
LOOK_BACK = 30
X_train, y_train = create_dataset(train_scaled,LOOK_BACK)
X_test, y_test = create_dataset(test_scaled,LOOK_BACK)
print('X_train.shape: ', X_train.shape)
print('y_train.shape: ', y_train.shape)
print('X_test.shape: ', X_test.shape)
print('y_test.shape: ', y_test.shape)
this part for model creation
def create_gru(units):
model = Sequential()
# Input layer
model.add(GRU (units = units, return_sequences = True,
input_shape = [X_train.shape[1], X_train.shape[2]]))
model.add(Dropout(0.2))
# Hidden layer
model.add(GRU(units = units))
model.add(Dropout(0.2))
model.add(Dense(units = 1))
When I execute I got an error says that non-broadcastable output operand with shape (854,1) doesn't match the broadcast shape (854,7)
this error happen when the execution each this part
y_train = scaler.inverse_transform(y_train)
y_test = scaler.inverse_transform(y_test)
def prediction(model):
prediction = model.predict(X_test)
prediction = scaler.inverse_transform(prediction)
return prediction
prediction_gru = prediction(model_gru)
prediction_bilstm = prediction(model_bilstm)

Tensorflow/Keras: volatile validation loss

I've been training a U-Net for single class small lesion segmentation, and have been getting consistently volatile validation loss. I have about 20k images split 70/30 between training and validation sets-so I don't think the issue is too little data. I've tried shuffling and resplitting the sets a few times with no change in volatility-so I don't think the validation set is unrepresentative. I have tried lowering the learning rate with no effect on volatility. And I have tried a few loss functions (dice coefficient, focal tversky, weighted binary cross-entropy). I'm using a decent amount of augmentation so as to avoid overfitting. I've also run through all my data (512x512 float64s with corresponding 512x512 int64 masks--both stored as numpy arrays) do double check that the value range, dtypes, etc. aren't screwy...and I even removed any ROIs in the masks under 35 pixels in area which I thought might be artifact and messing with loss.
I'm using keras ImageDataGen.flow_from_directory...I was initially using zca_whitening and brightness_range augmentation but I think this causes issues with flow_from_directory and the link between mask and image being lost.. so I skipped this.
I've tried validation generators with and without shuffle=True. Batch size is 8.
Here's some of my code, happy to include more if it would help:
# loss
from keras.losses import binary_crossentropy
import keras.backend as K
import tensorflow as tf
epsilon = 1e-5
smooth = 1
def dsc(y_true, y_pred):
smooth = 1.
y_true_f = K.flatten(y_true)
y_pred_f = K.flatten(y_pred)
intersection = K.sum(y_true_f * y_pred_f)
score = (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
return score
def dice_loss(y_true, y_pred):
loss = 1 - dsc(y_true, y_pred)
return loss
def bce_dice_loss(y_true, y_pred):
loss = binary_crossentropy(y_true, y_pred) + dice_loss(y_true, y_pred)
return loss
def confusion(y_true, y_pred):
smooth=1
y_pred_pos = K.clip(y_pred, 0, 1)
y_pred_neg = 1 - y_pred_pos
y_pos = K.clip(y_true, 0, 1)
y_neg = 1 - y_pos
tp = K.sum(y_pos * y_pred_pos)
fp = K.sum(y_neg * y_pred_pos)
fn = K.sum(y_pos * y_pred_neg)
prec = (tp + smooth)/(tp+fp+smooth)
recall = (tp+smooth)/(tp+fn+smooth)
return prec, recall
def tp(y_true, y_pred):
smooth = 1
y_pred_pos = K.round(K.clip(y_pred, 0, 1))
y_pos = K.round(K.clip(y_true, 0, 1))
tp = (K.sum(y_pos * y_pred_pos) + smooth)/ (K.sum(y_pos) + smooth)
return tp
def tn(y_true, y_pred):
smooth = 1
y_pred_pos = K.round(K.clip(y_pred, 0, 1))
y_pred_neg = 1 - y_pred_pos
y_pos = K.round(K.clip(y_true, 0, 1))
y_neg = 1 - y_pos
tn = (K.sum(y_neg * y_pred_neg) + smooth) / (K.sum(y_neg) + smooth )
return tn
def tversky(y_true, y_pred):
y_true_pos = K.flatten(y_true)
y_pred_pos = K.flatten(y_pred)
true_pos = K.sum(y_true_pos * y_pred_pos)
false_neg = K.sum(y_true_pos * (1-y_pred_pos))
false_pos = K.sum((1-y_true_pos)*y_pred_pos)
alpha = 0.7
return (true_pos + smooth)/(true_pos + alpha*false_neg + (1-alpha)*false_pos + smooth)
def tversky_loss(y_true, y_pred):
return 1 - tversky(y_true,y_pred)
def focal_tversky(y_true,y_pred):
pt_1 = tversky(y_true, y_pred)
gamma = 0.75
return K.pow((1-pt_1), gamma)
model = BlockModel((len(os.listdir(os.path.join(imageroot,'train_ct','train'))), 512, 512, 1),filt_num=16,numBlocks=4)
#model.compile(optimizer=Adam(learning_rate=0.001), loss=weighted_cross_entropy)
#model.compile(optimizer=Adam(learning_rate=0.001), loss=dice_coef_loss)
model.compile(optimizer=Adam(learning_rate=0.001), loss=focal_tversky)
train_mask = os.path.join(imageroot,'train_masks')
val_mask = os.path.join(imageroot,'val_masks')
model.load_weights(model_weights_path) #I'm initializing with some pre-trained weights from a similar model
data_gen_args_mask = dict(
rotation_range=10,
shear_range=20,
width_shift_range=0.1,
height_shift_range=0.1,
zoom_range=[0.8,1.2],
horizontal_flip=True,
#vertical_flip=True,
fill_mode='nearest',
data_format='channels_last'
)
data_gen_args = dict(
**data_gen_args_mask
)
image_datagen_train = ImageDataGenerator(**data_gen_args)
mask_datagen_train = ImageDataGenerator(**data_gen_args)#_mask)
image_datagen_val = ImageDataGenerator()
mask_datagen_val = ImageDataGenerator()
seed = 1
BS = 8
steps = int(np.floor((len(os.listdir(os.path.join(train_ct,'train'))))/BS))
print(steps)
val_steps = int(np.floor((len(os.listdir(os.path.join(val_ct,'val'))))/BS))
print(val_steps)
train_image_generator = image_datagen_train.flow_from_directory(
train_ct,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
train_mask_generator = mask_datagen_train.flow_from_directory(
train_mask,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
val_image_generator = image_datagen_val.flow_from_directory(
val_ct,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
val_mask_generator = mask_datagen_val.flow_from_directory(
val_mask,
target_size = (512, 512),
color_mode = ("grayscale"),
classes=None,
class_mode=None,
seed = seed,
shuffle = True,
batch_size = BS)
train_generator = zip(train_image_generator, train_mask_generator)
val_generator = zip(val_image_generator, val_mask_generator)
# make callback for checkpointing
plot_losses = PlotLossesCallback(skip_first=0,plot_extrema=False)
%matplotlib inline
filepath = os.path.join(versionPath, model_version + "_saved-model-{epoch:02d}-{val_loss:.2f}.hdf5")
if reduce:
cb_check = [ModelCheckpoint(filepath,monitor='val_loss',
verbose=1,save_best_only=False,
save_weights_only=True,mode='auto',period=1),
reduce_lr,
plot_losses]
else:
cb_check = [ModelCheckpoint(filepath,monitor='val_loss',
verbose=1,save_best_only=False,
save_weights_only=True,mode='auto',period=1),
plot_losses]
# train model
history = model.fit_generator(train_generator, epochs=numEp,
steps_per_epoch=steps,
validation_data=val_generator,
validation_steps=val_steps,
verbose=1,
callbacks=cb_check,
use_multiprocessing = False
)
And here's how my loss looks:
Another potentially relevant thing: I tweaked the flow_from_directory code a bit (added npy to the white list). But training loss looks fine so assuming the issue isnt here
Two suggestions:
Switch to the classic validation data format (i.e. numpy array) instead of using a generator -- this will ensure you always use the exactly same validation data every time. If you see a different validation curve, then there is something "random" in the validation generator giving you different data at different epochs.
Use a fixed set of samples (100 or 1000 should be enough w/o any data augmentation) for both training and validation. If everything goes well, you should see your network quickly overfit to this dataset and your training and validation curves should very much similar. If not, debug your network.

Why does this error pop up, what are your thoughts on my neural network/genetic algorithm?

Preamble:
This is a combination of my first and second programs in python (besides hello world level tutorials). Any questions I've had have led me to this site so it seemed fitting that I post it here. I come from a TI-Basic background; so if you have no idea why I did it this why when you should do it this why that is likely why.
My first program was a genetic learning algorithm. Its testing setup was/is to guess your input string. There is currently a problem with it, but it only slightly affects the efficiency of the program.[1]
My second is a simple feed forward neural network (I am currently only working on the xor problem). Some of the code for customizing the variables (the number of inputs, the number of outputs, the number of hidden layers, the number of neurons in those hidden layers) is there but is currently not my focus.
What I am trying to do now is train my network with my genetic algorithm. All seems to be fine but I keep getting a un-explanable error.
Traceback (most recent call last):
File "python", line 174, in <module>
File "python", line 68, in fitness_function
File "python", line 146, in weight_dot_value_plus_bias
TypeError: 'int' object is not subscriptable
Now the weird thing is, the code this is referring to is a direct transfer of code from the original neural network.
I am using repl.it as my compiler, could that be the problem?
import random
from random import choice
from random import randint
#Global varables
length_of_phrase = 15
generation_number = 0
max_number_of_generations = 250
population = 150
perckill = 40
percparents = 35
percrandom = 1
percmutate = 1
individual_by_gene_matrix = [0]
one = 1
zero = 0
number_of_layers = 3
number_of_neurons = [2,3,1]
nnv = [0]*number_of_layers
nnw = [0]*number_of_layers
nnb = [0]*number_of_layers
val1 = randint(0,1)
val2 = randint(0,1)
living = int(((100 - perckill)*population)//100)
dead = population - living
random_strings = int((( percrandom)*population)//100)
reproduced_strings = int(living + random_strings)
parents = int(((100 - percparents)*population)//100)
"""
print(living)
print(dead)
print(population)
print(random_strings)
print(reproduced_strings)
"""
def random_matrix_generator(): #generates a matrix With = number of genes in the target and Hight = population
from random import randint
individual_by_gene_matrix = [[randint(-200, 200)/100 for x in range(length_of_phrase)] for x in range(population)]
#horozontal is traits, vertical is individual
#each gene represents a letter
#each individual represents a word
return(individual_by_gene_matrix)
"""
def convert_matrix_into_list_of_stings():
listofstrings = [ () for var in range( population)]
for var in range( population ):
list = individual_by_gene_matrix[var] #creates a list for each individual with their traits
lista = [ (chr(n )) for n in list] #the traits become letters
listofstrings[var] = ''.join(lista) #creates a list of all the individuals with letters joined
return(listofstrings)
"""
def fitness_function():
for individual in range (population):
number_of_layers,number_of_neurons ,nnv,nnw,nnb = NN_setup(val1,val2,individual_by_gene_matrix[individual][0],individual_by_gene_matrix[individual][1],individual_by_gene_matrix[individual][2],individual_by_gene_matrix[individual][3],individual_by_gene_matrix[individual][4],individual_by_gene_matrix[individual][5],individual_by_gene_matrix[individual][6],individual_by_gene_matrix[individual][7],individual_by_gene_matrix[individual][8],individual_by_gene_matrix[individual][9],individual_by_gene_matrix[individual][10],individual_by_gene_matrix[individual][11],individual_by_gene_matrix[individual][12],individual_by_gene_matrix[individual][13],individual_by_gene_matrix[individual][14])
for var in range (1,number_of_layers):
nnv = weight_dot_value_plus_bias(var)
nnv = sigmoid(var)
fitness[individual] = 1-abs((val1 ^ val2)- (nnv[2][0]))
#for n in range(population):
#print('{} : {} : {}'.format(n, listofstrings[n], fitness[n]))
return(fitness)
def matrix_reorder():
temp_individual_by_gene_matrix = [[0 for var in range(length_of_phrase)] for var in range(population)]
temp_fitness = [(0) for var in range(population)]
for var in range(population):
var_a = fitness.index(max(fitness))
temp_fitness[var] = fitness.pop(var_a)
temp_individual_by_gene_matrix[var] = individual_by_gene_matrix.pop(var_a)
return(temp_individual_by_gene_matrix, temp_fitness)
def kill():
for individal in range(living, population):
individual_by_gene_matrix[individal] = [0]*length_of_phrase
return(individual_by_gene_matrix)
def reproduce():
for individual in range(living,reproduced_strings):
for gene in range(length_of_phrase):
individual_by_gene_matrix[individual][gene] = randint(-200,200)/100
for individual in range(reproduced_strings, population):
mom = randint(0,parents)
dad = randint(0,parents)
for gene in range(length_of_phrase):
individual_by_gene_matrix[individual][gene] = random.choice([individual_by_gene_matrix[mom][gene],individual_by_gene_matrix[dad][gene]])
return(individual_by_gene_matrix)
def mutate():
for individual in range(population):
for gene in range(length_of_phrase):
if randint(0,100)<=percmutate:
individual_by_gene_matrix[individual][gene] = random.gauss(individual_by_gene_matrix[individual][gene],0.5)
return(individual_by_gene_matrix)
def NN_setup(val1,val2,w100,w101,w110,w111,w120,w121,w200,w201,w202,b00,b01,b10,b11,b12,b20):
number_of_layers = 3
number_of_neurons = [2,3,1]
nnv = [0]*number_of_layers
nnw = [0]*number_of_layers
nnb = [0]*number_of_layers
for layer in range ( number_of_layers ):
nnv[layer] = [0]*number_of_neurons[layer]
nnw[layer] = [0]*number_of_neurons[layer]
nnb[layer] = [0]*number_of_neurons[layer]
if layer != 0:
for neuron in range (number_of_neurons[layer]):
nnw[layer][neuron] = [0]*number_of_neurons[layer - 1]
nnv = [[val1,val2],[0.0,0.0,0.0],[0.0]]
nnw = [['inputs have no weight'],[[w100,w101],[w110,w111],[w120,w121]],[[w200,w201,w202]]]
nnb = [[b00,b01],[b10,b11,b12],[b20]]
return(number_of_layers,number_of_neurons,nnv,nnw,nnb)
|
|
|
v
def weight_dot_value_plus_bias(layer):
for nueron in range (number_of_neurons[layer]):
for weight in range (number_of_neurons[layer - 1]):
---> nnv[layer][nueron] += (nnv[layer-1][weight])*(nnw[layer][nueron][weight])
nnv[layer][nueron] += nnb[layer][nueron]
return(nnv)
def sigmoid(layer):
for neuron in range(number_of_neurons[layer]):
nnv[layer][neuron] = (1/(1+3**(-nnv[layer][neuron])))
return(nnv)
individual_by_gene_matrix = random_matrix_generator()
while (generation_number <= max_number_of_generations):
val1 = randint(0,1)
val2 = randint(0,1)
fitness = [(0) for var in range(population)]
#populations_phenotypes_by_individual = convert_matrix_into_list_of_stings()
fitness = fitness_function()
individual_by_gene_matrix , fitness = matrix_reorder()
individual_by_gene_matrix = kill()
individual_by_gene_matrix = reproduce()
individual_by_gene_matrix = mutate()
individual_by_gene_matrix , fitness = matrix_reorder()
#populations_phenotypes_by_individual = convert_matrix_into_list_of_stings()
print('{} {} {} {}'.format(generation_number,(10000(fitness[0]))//100),val1,val2)
generation_number += 1
print('')
print('')
print(individual_by_gene_matrix[0])
That was way to many indents!!!
How the hell do I just insert a block of code????!!!!!
I'll give you the source code to the individual programs once I learn how to insert a block of code
[1] Your going to have to wait till I give you the source code to just the genetic algorithm
Any tips, suggestions, maybe how would you write the code to what I'm trying to do?

how to get reproducible result in Tensorflow

I built 5-layer neural network by using tensorflow.
I have a problem to get reproducible results (or stable results).
I found similar questions regarding reproducibility of tensorflow and the corresponding answers, such as How to get stable results with TensorFlow, setting random seed
But the problem is not solved yet.
I also set random seed like the following
tf.set_random_seed(1)
Furthermore, I added seed options to every random function such as
b1 = tf.Variable(tf.random_normal([nHidden1], seed=1234))
I confirmed that the first epoch shows the identical results, but not identical from the second epoch little by little.
How can I get the reproducible results?
Am I missing something?
Here is a code block I use.
def xavier_init(n_inputs, n_outputs, uniform=True):
if uniform:
init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
return tf.random_uniform_initializer(-init_range, init_range, seed=1234)
else:
stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
return tf.truncated_normal_initializer(stddev=stddev, seed=1234)
import numpy as np
import tensorflow as tf
import dataSetup
from scipy.stats.stats import pearsonr
tf.set_random_seed(1)
x_train, y_train, x_test, y_test = dataSetup.input_data()
# Parameters
learningRate = 0.01
trainingEpochs = 1000000
batchSize = 64
displayStep = 100
thresholdReduce = 1e-6
thresholdNow = 0.6
#dropoutRate = tf.constant(0.7)
# Network Parameter
nHidden1 = 128 # number of 1st layer nodes
nHidden2 = 64 # number of 2nd layer nodes
nInput = 24 #
nOutput = 1 # Predicted score: 1 output for regression
# save parameter
modelPath = 'model/model_layer5_%d_%d_mini%d_lr%.3f_noDrop_rollBack.ckpt' %(nHidden1, nHidden2, batchSize, learningRate)
# tf Graph input
X = tf.placeholder("float", [None, nInput])
Y = tf.placeholder("float", [None, nOutput])
# Weight
W1 = tf.get_variable("W1", shape=[nInput, nHidden1], initializer=xavier_init(nInput, nHidden1))
W2 = tf.get_variable("W2", shape=[nHidden1, nHidden2], initializer=xavier_init(nHidden1, nHidden2))
W3 = tf.get_variable("W3", shape=[nHidden2, nHidden2], initializer=xavier_init(nHidden2, nHidden2))
W4 = tf.get_variable("W4", shape=[nHidden2, nHidden2], initializer=xavier_init(nHidden2, nHidden2))
WFinal = tf.get_variable("WFinal", shape=[nHidden2, nOutput], initializer=xavier_init(nHidden2, nOutput))
# biases
b1 = tf.Variable(tf.random_normal([nHidden1], seed=1234))
b2 = tf.Variable(tf.random_normal([nHidden2], seed=1234))
b3 = tf.Variable(tf.random_normal([nHidden2], seed=1234))
b4 = tf.Variable(tf.random_normal([nHidden2], seed=1234))
bFinal = tf.Variable(tf.random_normal([nOutput], seed=1234))
# Layers for dropout
L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
L3 = tf.nn.relu(tf.add(tf.matmul(L2, W3), b3))
L4 = tf.nn.relu(tf.add(tf.matmul(L3, W4), b4))
hypothesis = tf.add(tf.matmul(L4, WFinal), bFinal)
print "Layer setting DONE..."
# define loss and optimizer
cost = tf.reduce_mean(tf.square(hypothesis - Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(cost)
# Initialize the variable
init = tf.initialize_all_variables()
# save op to save and restore all the variables
saver = tf.train.Saver()
with tf.Session() as sess:
# initialize
sess.run(init)
print "Initialize DONE..."
# Training
costPrevious = 100000000000000.0
best = float("INF")
totalBatch = int(len(x_train)/batchSize)
print "Total Batch: %d" %totalBatch
for epoch in range(trainingEpochs):
#print "EPOCH: %04d" %epoch
avgCost = 0.
for i in range(totalBatch):
np.random.seed(i+epoch)
randidx = np.random.randint(len(x_train), size=batchSize)
batch_xs = x_train[randidx,:]
batch_ys = y_train[randidx,:]
# Fit traiing using batch data
sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys})
# compute average loss
avgCost += sess.run(cost, feed_dict={X:batch_xs, Y:batch_ys})/totalBatch
# compare the current cost and the previous
# if current cost > the previous
# just continue and make the learning rate half
#print "Cost: %1.8f --> %1.8f at epoch %05d" %(costPrevious, avgCost, epoch+1)
if avgCost > costPrevious + .5:
#sess.run(init)
load_path = saver.restore(sess, modelPath)
print "Cost increases at the epoch %05d" %(epoch+1)
print "Cost: %1.8f --> %1.8f" %(costPrevious, avgCost)
continue
costNow = avgCost
reduceCost = abs(costPrevious - costNow)
costPrevious = costNow
#Display logs per epoch step
if costNow < best:
best = costNow
bestMatch = sess.run(hypothesis, feed_dict={X:x_test})
# model save
save_path = saver.save(sess, modelPath)
if epoch % displayStep == 0:
print "step {}".format(epoch)
pearson = np.corrcoef(bestMatch.flatten(), y_test.flatten())
print 'train loss = {}, current loss = {}, test corrcoef={}'.format(best, costNow, pearson[0][1])
if reduceCost < thresholdReduce or costNow < thresholdNow:
print "Epoch: %04d, Cost: %.9f, Prev: %.9f, Reduce: %.9f" %(epoch+1, costNow, costPrevious, reduceCost)
break
print "Optimization Finished"
It seems that your results are perhaps not reproducible because you are using Saver to write/restore from checkpoint each time? (i.e. the second time that you run the code, the variable values aren't initialized using your random seed -- they are restored from your previous checkpoint)
Please trim down your code example to just the code necessary to reproduce irreproducibility.

Resources