plot decision paths from all of the test sample - graphviz

There is this code obtained from:
How to display the path of a Decision Tree for test samples?
Basically it plots the decision path of a sample over the decision tree plot to know how was a specific prediction made
import pydotplus
from sklearn.datasets import load_iris
from sklearn import tree
clf = tree.DecisionTreeClassifier(random_state=42)
iris = load_iris()
clf = clf.fit(iris.data, iris.target)
dot_data = tree.export_graphviz(clf, out_file=None,
feature_names=iris.feature_names,
class_names=iris.target_names,
filled=True, rounded=True,
special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
# empty all nodes, i.e.set color to white and number of samples to zero
for node in graph.get_node_list():
if node.get_attributes().get('label') is None:
continue
if 'samples = ' in node.get_attributes()['label']:
labels = node.get_attributes()['label'].split('<br/>')
for i, label in enumerate(labels):
if label.startswith('samples = '):
labels[i] = 'samples = 0'
node.set('label', '<br/>'.join(labels))
node.set_fillcolor('white')
samples = iris.data[129:130]
decision_paths = clf.decision_path(samples)
for decision_path in decision_paths:
for n, node_value in enumerate(decision_path.toarray()[0]):
if node_value == 0:
continue
node = graph.get_node(str(n))[0]
node.set_fillcolor('green')
labels = node.get_attributes()['label'].split('<br/>')
for i, label in enumerate(labels):
if label.startswith('samples = '):
labels[i] = 'samples = {}'.format(int(label.split('=')[1]) + 1)
node.set('label', '<br/>'.join(labels))
filename = 'tree.png'
graph.write_png(filename)
What I want to do is to plot all of the samples decision paths in different plots in my Jupiter notebook. What should I add to the code?

As the question doesn't provide concrete expected results, I assume that you want to plot the parts of the decision paths of your classification results.
One of the solutions would be adding one more layer of for loop to achieve it. However, it might affect your program's performance. Hence, use with caution.
import pydotplus
from sklearn.datasets import load_iris
from sklearn import tree
clf = tree.DecisionTreeClassifier(random_state=42)
iris = load_iris()
clf = clf.fit(iris.data, iris.target)
dot_data = tree.export_graphviz(clf, out_file=None,
feature_names=iris.feature_names,
class_names=iris.target_names,
filled=True, rounded=True,
special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
# empty all nodes, i.e.set color to white and number of samples to zero
for node in graph.get_node_list():
if node.get_attributes().get('label') is None:
continue
if 'samples = ' in node.get_attributes()['label']:
labels = node.get_attributes()['label'].split('<br/>')
for i, label in enumerate(labels):
if label.startswith('samples = '):
labels[i] = 'samples = 0'
node.set('label', '<br/>'.join(labels))
node.set_fillcolor('white')
# samples = iris.data[129:130]
samples = iris.data[120:130] # <-- Classifying 10 data
decision_paths = clf.decision_path(samples)
for decision_path in decision_paths:
for path in decision_path.toarray(): # <-- Adding one more layer of for loop to loop each path
# for n, node_value in enumerate(decision_path.toarray()[0]):
for n, node_value in enumerate(path):
if node_value == 0:
continue
node = graph.get_node(str(n))[0]
node.set_fillcolor('green')
labels = node.get_attributes()['label'].split('<br/>')
for i, label in enumerate(labels):
if label.startswith('samples = '):
labels[i] = 'samples = {}'.format(int(label.split('=')[1]) + 1)
node.set('label', '<br/>'.join(labels))
# display it inline
display_png(Image(graph.create_png()))
# or save it as png
filename = 'tree.png'
graph.write_png(filename)
The result:

Related

DecisionTreeClassifier cost complexity pruning ccp_alpha

I have this code which model the imbalance class via decision tree. but some how ccp_alpha in the end its not picking the right value. the ccp_alpha should be around 0.005 instead of code is picking up 0.020.
I am not sure why "cp_alpha=0.02044841897041862" instead of 0.005 as per the graph of
"Recall vs alpha for training and testing sets"
class_weight_t={0: 0.07, 1: 0.89}
clf = DecisionTreeClassifier(random_state=1, class_weight=class_weight_t)
path = clf.cost_complexity_pruning_path(X_train, y_train)
ccp_alphas, impurities = path.ccp_alphas, path.impurities
pd.DataFrame(path)
clfs = []
for ccp_alpha in ccp_alphas:
clf = DecisionTreeClassifier(
random_state=1, ccp_alpha=ccp_alpha, class_weight=class_weight_t
)
clf.fit(X_train, y_train)
clfs.append(clf)
#print(str(clf)+","+str(ccp_alpha)+","+str(clfs[-1].tree_.node_count))
print(
"Number of nodes in the last tree is: {} with ccp_alpha: {}".format(
clfs[-1].tree_.node_count, ccp_alphas[-1]
)
)
Number of nodes in the last tree is: 1 with ccp_alpha: 0.29696815935983295
recall_train = []
for clf in clfs:
pred_train = clf.predict(X_train)
values_train = recall_score(y_train, pred_train)
recall_train.append(values_train)
recall_test = []
for clf in clfs:
pred_test = clf.predict(X_test)
values_test = recall_score(y_test, pred_test)
recall_test.append(values_test)
fig, ax = plt.subplots(figsize=(15, 5))
ax.set_xlabel("alpha")
ax.set_ylabel("Recall")
ax.set_title("Recall vs alpha for training and testing sets")
ax.plot(
ccp_alphas, recall_train, marker="o", label="train", drawstyle="steps-post",
)
ax.plot(ccp_alphas, recall_test, marker="o", label="test", drawstyle="steps-post")
#ax.plot(
# ccp_alphas, train_scores, marker="o", label="train", drawstyle="steps-post",
#)
#ax.plot(ccp_alphas, test_scores, marker="o", label="test", drawstyle="steps-post")
ax.legend()
plt.show()
https://i.stack.imgur.com/0imAq.png
index_best_model = np.argmax(recall_test)
best_model = clfs[index_best_model]
print(best_model)
DecisionTreeClassifier(ccp_alpha=0.02044841897041862,class_weight={0: 0.07, 1: 0.89}, random_state=1)

Why does this error pop up, what are your thoughts on my neural network/genetic algorithm?

Preamble:
This is a combination of my first and second programs in python (besides hello world level tutorials). Any questions I've had have led me to this site so it seemed fitting that I post it here. I come from a TI-Basic background; so if you have no idea why I did it this why when you should do it this why that is likely why.
My first program was a genetic learning algorithm. Its testing setup was/is to guess your input string. There is currently a problem with it, but it only slightly affects the efficiency of the program.[1]
My second is a simple feed forward neural network (I am currently only working on the xor problem). Some of the code for customizing the variables (the number of inputs, the number of outputs, the number of hidden layers, the number of neurons in those hidden layers) is there but is currently not my focus.
What I am trying to do now is train my network with my genetic algorithm. All seems to be fine but I keep getting a un-explanable error.
Traceback (most recent call last):
File "python", line 174, in <module>
File "python", line 68, in fitness_function
File "python", line 146, in weight_dot_value_plus_bias
TypeError: 'int' object is not subscriptable
Now the weird thing is, the code this is referring to is a direct transfer of code from the original neural network.
I am using repl.it as my compiler, could that be the problem?
import random
from random import choice
from random import randint
#Global varables
length_of_phrase = 15
generation_number = 0
max_number_of_generations = 250
population = 150
perckill = 40
percparents = 35
percrandom = 1
percmutate = 1
individual_by_gene_matrix = [0]
one = 1
zero = 0
number_of_layers = 3
number_of_neurons = [2,3,1]
nnv = [0]*number_of_layers
nnw = [0]*number_of_layers
nnb = [0]*number_of_layers
val1 = randint(0,1)
val2 = randint(0,1)
living = int(((100 - perckill)*population)//100)
dead = population - living
random_strings = int((( percrandom)*population)//100)
reproduced_strings = int(living + random_strings)
parents = int(((100 - percparents)*population)//100)
"""
print(living)
print(dead)
print(population)
print(random_strings)
print(reproduced_strings)
"""
def random_matrix_generator(): #generates a matrix With = number of genes in the target and Hight = population
from random import randint
individual_by_gene_matrix = [[randint(-200, 200)/100 for x in range(length_of_phrase)] for x in range(population)]
#horozontal is traits, vertical is individual
#each gene represents a letter
#each individual represents a word
return(individual_by_gene_matrix)
"""
def convert_matrix_into_list_of_stings():
listofstrings = [ () for var in range( population)]
for var in range( population ):
list = individual_by_gene_matrix[var] #creates a list for each individual with their traits
lista = [ (chr(n )) for n in list] #the traits become letters
listofstrings[var] = ''.join(lista) #creates a list of all the individuals with letters joined
return(listofstrings)
"""
def fitness_function():
for individual in range (population):
number_of_layers,number_of_neurons ,nnv,nnw,nnb = NN_setup(val1,val2,individual_by_gene_matrix[individual][0],individual_by_gene_matrix[individual][1],individual_by_gene_matrix[individual][2],individual_by_gene_matrix[individual][3],individual_by_gene_matrix[individual][4],individual_by_gene_matrix[individual][5],individual_by_gene_matrix[individual][6],individual_by_gene_matrix[individual][7],individual_by_gene_matrix[individual][8],individual_by_gene_matrix[individual][9],individual_by_gene_matrix[individual][10],individual_by_gene_matrix[individual][11],individual_by_gene_matrix[individual][12],individual_by_gene_matrix[individual][13],individual_by_gene_matrix[individual][14])
for var in range (1,number_of_layers):
nnv = weight_dot_value_plus_bias(var)
nnv = sigmoid(var)
fitness[individual] = 1-abs((val1 ^ val2)- (nnv[2][0]))
#for n in range(population):
#print('{} : {} : {}'.format(n, listofstrings[n], fitness[n]))
return(fitness)
def matrix_reorder():
temp_individual_by_gene_matrix = [[0 for var in range(length_of_phrase)] for var in range(population)]
temp_fitness = [(0) for var in range(population)]
for var in range(population):
var_a = fitness.index(max(fitness))
temp_fitness[var] = fitness.pop(var_a)
temp_individual_by_gene_matrix[var] = individual_by_gene_matrix.pop(var_a)
return(temp_individual_by_gene_matrix, temp_fitness)
def kill():
for individal in range(living, population):
individual_by_gene_matrix[individal] = [0]*length_of_phrase
return(individual_by_gene_matrix)
def reproduce():
for individual in range(living,reproduced_strings):
for gene in range(length_of_phrase):
individual_by_gene_matrix[individual][gene] = randint(-200,200)/100
for individual in range(reproduced_strings, population):
mom = randint(0,parents)
dad = randint(0,parents)
for gene in range(length_of_phrase):
individual_by_gene_matrix[individual][gene] = random.choice([individual_by_gene_matrix[mom][gene],individual_by_gene_matrix[dad][gene]])
return(individual_by_gene_matrix)
def mutate():
for individual in range(population):
for gene in range(length_of_phrase):
if randint(0,100)<=percmutate:
individual_by_gene_matrix[individual][gene] = random.gauss(individual_by_gene_matrix[individual][gene],0.5)
return(individual_by_gene_matrix)
def NN_setup(val1,val2,w100,w101,w110,w111,w120,w121,w200,w201,w202,b00,b01,b10,b11,b12,b20):
number_of_layers = 3
number_of_neurons = [2,3,1]
nnv = [0]*number_of_layers
nnw = [0]*number_of_layers
nnb = [0]*number_of_layers
for layer in range ( number_of_layers ):
nnv[layer] = [0]*number_of_neurons[layer]
nnw[layer] = [0]*number_of_neurons[layer]
nnb[layer] = [0]*number_of_neurons[layer]
if layer != 0:
for neuron in range (number_of_neurons[layer]):
nnw[layer][neuron] = [0]*number_of_neurons[layer - 1]
nnv = [[val1,val2],[0.0,0.0,0.0],[0.0]]
nnw = [['inputs have no weight'],[[w100,w101],[w110,w111],[w120,w121]],[[w200,w201,w202]]]
nnb = [[b00,b01],[b10,b11,b12],[b20]]
return(number_of_layers,number_of_neurons,nnv,nnw,nnb)
|
|
|
v
def weight_dot_value_plus_bias(layer):
for nueron in range (number_of_neurons[layer]):
for weight in range (number_of_neurons[layer - 1]):
---> nnv[layer][nueron] += (nnv[layer-1][weight])*(nnw[layer][nueron][weight])
nnv[layer][nueron] += nnb[layer][nueron]
return(nnv)
def sigmoid(layer):
for neuron in range(number_of_neurons[layer]):
nnv[layer][neuron] = (1/(1+3**(-nnv[layer][neuron])))
return(nnv)
individual_by_gene_matrix = random_matrix_generator()
while (generation_number <= max_number_of_generations):
val1 = randint(0,1)
val2 = randint(0,1)
fitness = [(0) for var in range(population)]
#populations_phenotypes_by_individual = convert_matrix_into_list_of_stings()
fitness = fitness_function()
individual_by_gene_matrix , fitness = matrix_reorder()
individual_by_gene_matrix = kill()
individual_by_gene_matrix = reproduce()
individual_by_gene_matrix = mutate()
individual_by_gene_matrix , fitness = matrix_reorder()
#populations_phenotypes_by_individual = convert_matrix_into_list_of_stings()
print('{} {} {} {}'.format(generation_number,(10000(fitness[0]))//100),val1,val2)
generation_number += 1
print('')
print('')
print(individual_by_gene_matrix[0])
That was way to many indents!!!
How the hell do I just insert a block of code????!!!!!
I'll give you the source code to the individual programs once I learn how to insert a block of code
[1] Your going to have to wait till I give you the source code to just the genetic algorithm
Any tips, suggestions, maybe how would you write the code to what I'm trying to do?

how to get reproducible result in Tensorflow

I built 5-layer neural network by using tensorflow.
I have a problem to get reproducible results (or stable results).
I found similar questions regarding reproducibility of tensorflow and the corresponding answers, such as How to get stable results with TensorFlow, setting random seed
But the problem is not solved yet.
I also set random seed like the following
tf.set_random_seed(1)
Furthermore, I added seed options to every random function such as
b1 = tf.Variable(tf.random_normal([nHidden1], seed=1234))
I confirmed that the first epoch shows the identical results, but not identical from the second epoch little by little.
How can I get the reproducible results?
Am I missing something?
Here is a code block I use.
def xavier_init(n_inputs, n_outputs, uniform=True):
if uniform:
init_range = tf.sqrt(6.0 / (n_inputs + n_outputs))
return tf.random_uniform_initializer(-init_range, init_range, seed=1234)
else:
stddev = tf.sqrt(3.0 / (n_inputs + n_outputs))
return tf.truncated_normal_initializer(stddev=stddev, seed=1234)
import numpy as np
import tensorflow as tf
import dataSetup
from scipy.stats.stats import pearsonr
tf.set_random_seed(1)
x_train, y_train, x_test, y_test = dataSetup.input_data()
# Parameters
learningRate = 0.01
trainingEpochs = 1000000
batchSize = 64
displayStep = 100
thresholdReduce = 1e-6
thresholdNow = 0.6
#dropoutRate = tf.constant(0.7)
# Network Parameter
nHidden1 = 128 # number of 1st layer nodes
nHidden2 = 64 # number of 2nd layer nodes
nInput = 24 #
nOutput = 1 # Predicted score: 1 output for regression
# save parameter
modelPath = 'model/model_layer5_%d_%d_mini%d_lr%.3f_noDrop_rollBack.ckpt' %(nHidden1, nHidden2, batchSize, learningRate)
# tf Graph input
X = tf.placeholder("float", [None, nInput])
Y = tf.placeholder("float", [None, nOutput])
# Weight
W1 = tf.get_variable("W1", shape=[nInput, nHidden1], initializer=xavier_init(nInput, nHidden1))
W2 = tf.get_variable("W2", shape=[nHidden1, nHidden2], initializer=xavier_init(nHidden1, nHidden2))
W3 = tf.get_variable("W3", shape=[nHidden2, nHidden2], initializer=xavier_init(nHidden2, nHidden2))
W4 = tf.get_variable("W4", shape=[nHidden2, nHidden2], initializer=xavier_init(nHidden2, nHidden2))
WFinal = tf.get_variable("WFinal", shape=[nHidden2, nOutput], initializer=xavier_init(nHidden2, nOutput))
# biases
b1 = tf.Variable(tf.random_normal([nHidden1], seed=1234))
b2 = tf.Variable(tf.random_normal([nHidden2], seed=1234))
b3 = tf.Variable(tf.random_normal([nHidden2], seed=1234))
b4 = tf.Variable(tf.random_normal([nHidden2], seed=1234))
bFinal = tf.Variable(tf.random_normal([nOutput], seed=1234))
# Layers for dropout
L1 = tf.nn.relu(tf.add(tf.matmul(X, W1), b1))
L2 = tf.nn.relu(tf.add(tf.matmul(L1, W2), b2))
L3 = tf.nn.relu(tf.add(tf.matmul(L2, W3), b3))
L4 = tf.nn.relu(tf.add(tf.matmul(L3, W4), b4))
hypothesis = tf.add(tf.matmul(L4, WFinal), bFinal)
print "Layer setting DONE..."
# define loss and optimizer
cost = tf.reduce_mean(tf.square(hypothesis - Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learningRate).minimize(cost)
# Initialize the variable
init = tf.initialize_all_variables()
# save op to save and restore all the variables
saver = tf.train.Saver()
with tf.Session() as sess:
# initialize
sess.run(init)
print "Initialize DONE..."
# Training
costPrevious = 100000000000000.0
best = float("INF")
totalBatch = int(len(x_train)/batchSize)
print "Total Batch: %d" %totalBatch
for epoch in range(trainingEpochs):
#print "EPOCH: %04d" %epoch
avgCost = 0.
for i in range(totalBatch):
np.random.seed(i+epoch)
randidx = np.random.randint(len(x_train), size=batchSize)
batch_xs = x_train[randidx,:]
batch_ys = y_train[randidx,:]
# Fit traiing using batch data
sess.run(optimizer, feed_dict={X:batch_xs, Y:batch_ys})
# compute average loss
avgCost += sess.run(cost, feed_dict={X:batch_xs, Y:batch_ys})/totalBatch
# compare the current cost and the previous
# if current cost > the previous
# just continue and make the learning rate half
#print "Cost: %1.8f --> %1.8f at epoch %05d" %(costPrevious, avgCost, epoch+1)
if avgCost > costPrevious + .5:
#sess.run(init)
load_path = saver.restore(sess, modelPath)
print "Cost increases at the epoch %05d" %(epoch+1)
print "Cost: %1.8f --> %1.8f" %(costPrevious, avgCost)
continue
costNow = avgCost
reduceCost = abs(costPrevious - costNow)
costPrevious = costNow
#Display logs per epoch step
if costNow < best:
best = costNow
bestMatch = sess.run(hypothesis, feed_dict={X:x_test})
# model save
save_path = saver.save(sess, modelPath)
if epoch % displayStep == 0:
print "step {}".format(epoch)
pearson = np.corrcoef(bestMatch.flatten(), y_test.flatten())
print 'train loss = {}, current loss = {}, test corrcoef={}'.format(best, costNow, pearson[0][1])
if reduceCost < thresholdReduce or costNow < thresholdNow:
print "Epoch: %04d, Cost: %.9f, Prev: %.9f, Reduce: %.9f" %(epoch+1, costNow, costPrevious, reduceCost)
break
print "Optimization Finished"
It seems that your results are perhaps not reproducible because you are using Saver to write/restore from checkpoint each time? (i.e. the second time that you run the code, the variable values aren't initialized using your random seed -- they are restored from your previous checkpoint)
Please trim down your code example to just the code necessary to reproduce irreproducibility.

Python 2.7 - How to compare two image?

In python 2.7, I want to compare 2 image to the same, How to do this? please show me step by step. Thanks!
There are many ways to do. By using some opensource Library, like OpenCV, Scikit Learn, TensorFlow.
To compare two images, you can do something like Template Matching in OpenCV
import cv2
import numpy as np
from matplotlib import pyplot as plt
img = cv2.imread('img.jpg', 0)
img2 = img.copy()
template = cv2.imread('img2.jpg', 0)
w, h = template.shape[::-1]
methods = ['cv2.TM_CCOEFF', 'cv2.TM_CCOEFF_NORMED']
for meth in methods:
img = img2.copy()
method = eval(meth)
res = cv2.matchTemplate(img, template, method)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
if method in [cv2.TM_SQDIFF or cv2. TM_SQDIFF_NORMED]:
top_left = min_loc
else:
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
cv2.rectangle(img, top_left, bottom_right, 255,2)
plt.subplot(121), plt.imshow(res)
plt.title('Matching Result'), plt.xticks([]), plt.yticks([])
plt.subplot(122),plt.imshow(img,cmap = 'gray')
plt.title('Detected Point'), plt.xticks([]), plt.yticks([])
plt.suptitle(meth)
plt.show()
or Histogram comparison
import cv2
import numpy as np
base = cv2.imread('test4.jpg')
test1 = cv2.imread('test3.jpg')
test2 = cv2.imread('test5.jpg')
rows,cols = base.shape[:2]
basehsv = cv2.cvtColor(base,cv2.COLOR_BGR2HSV)
test1hsv = cv2.cvtColor(test1,cv2.COLOR_BGR2HSV)
test2hsv = cv2.cvtColor(test2,cv2.COLOR_BGR2HSV)
halfhsv = basehsv[rows/2:rows-1,cols/2:cols-1].copy() # Take lower half of the base image for testing
hbins = 180
sbins = 255
hrange = [0,180]
srange = [0,256]
ranges = hrange+srange # ranges = [0,180,0,256]
ranges=None
histbase = cv2.calcHist(basehsv,[0,1],None,[180,256],ranges)
cv2.normalize(histbase,histbase,0,255,cv2.NORM_MINMAX)
histhalf = cv2.calcHist(halfhsv,[0,1],None,[180,256],ranges)
cv2.normalize(histhalf,histhalf,0,255,cv2.NORM_MINMAX)
histtest1 = cv2.calcHist(test1hsv,[0,1],None,[180,256],ranges)
cv2.normalize(histtest1,histtest1,0,255,cv2.NORM_MINMAX)
histtest2 = cv2.calcHist(test2hsv,[0,1],None,[180,256],ranges)
cv2.normalize(histtest2,histtest2,0,255,cv2.NORM_MINMAX)
for i in xrange(5):
base_base = cv2.compareHist(histbase,histbase,i)
base_half = cv2.compareHist(histbase,histhalf,i)
base_test1 = cv2.compareHist(histbase,histtest1,i)
base_test2 = cv2.compareHist(histbase,histtest2,i)
print "Method: {0} -- base-base: {1} , base-test1: {2}, base_test2: {3}".format(i,base_base,base_test1,base_test2)

I want to correct this code for images, what change need to do..?

Currently i am recognzing a face, means i have to find a face which we have to test is in training database or not..! So, i have to decide yes or no..
Yes means find image, and no means print message that NO IMAGE IN DATABASE. I have a program, Currently this program is finding a correct image correctly, but even when there is no image, even it shows other image which not matches.. Actually it should print NO IMAGE IN DATABASE.
So, How to do..?
Here is a Test and training images data on this link.
http://www.fileconvoy.com/dfl.php?id=g6e59fe8105a6e6389994740914b7b2fc99eb3e445
My Program is in terms of different four .m files, and it is here,we have to run only first code.. and remaining 3 are functions, it is also given here..**
clear all
clc
close all
TrainDatabasePath = uigetdir('D:\Program Files\MATLAB\R2006a\work', 'Select training database path' );
TestDatabasePath = uigetdir('D:\Program Files\MATLAB\R2006a\work', 'Select test database path');
prompt = {'Enter test image name (a number between 1 to 10):'};
dlg_title = 'Input of PCA-Based Face Recognition System';
num_lines= 1;
def = {'1'};
TestImage = inputdlg(prompt,dlg_title,num_lines,def);
TestImage = strcat(TestDatabasePath,'\',char(TestImage),'.jpg');
im = imread(TestImage);
T = CreateDatabase(TrainDatabasePath);
[m, A, Eigenfaces] = EigenfaceCore(T);
OutputName = Recognition(TestImage, m, A, Eigenfaces);
SelectedImage = strcat(TrainDatabasePath,'\',OutputName);
SelectedImage = imread(SelectedImage);
imshow(im)
title('Test Image');
figure,imshow(SelectedImage);
title('Equivalent Image');
str = strcat('Matched image is : ',OutputName);
disp(str)
function T = CreateDatabase(TrainDatabasePath)
TrainFiles = dir(TrainDatabasePath);
Train_Number = 0;
for i = 1:size(TrainFiles,1)
if
not(strcmp(TrainFiles(i).name,'.')|strcmp(TrainFiles(i).name,'..')|strcmp(TrainFiles(i).name,'Thu mbs.db'))
Train_Number = Train_Number + 1; % Number of all images in the training database
end
end
T = [];
for i = 1 : Train_Number
str = int2str(i);
str = strcat('\',str,'.jpg');
str = strcat(TrainDatabasePath,str);
img = imread(str);
img = rgb2gray(img);
[irow icol] = size(img);
temp = reshape(img',irow*icol,1); % Reshaping 2D images into 1D image vectors
T = [T temp]; % 'T' grows after each turn
end
function [m, A, Eigenfaces] = EigenfaceCore(T)
m = mean(T,2); % Computing the average face image m = (1/P)*sum(Tj's) (j = 1 : P)
Train_Number = size(T,2);
A = [];
for i = 1 : Train_Number
temp = double(T(:,i)) - m;
Ai = Ti - m
A = [A temp]; % Merging all centered images
end
L = A'*A; % L is the surrogate of covariance matrix C=A*A'.
[V D] = eig(L); % Diagonal elements of D are the eigenvalues for both L=A'*A and C=A*A'.
L_eig_vec = [];
for i = 1 : size(V,2)
if( D(i,i)>1 )
L_eig_vec = [L_eig_vec V(:,i)];
end
end
Eigenfaces = A * L_eig_vec; % A: centered image vectors
function OutputName = Recognition(TestImage, m, A, Eigenfaces)
ProjectedImages = [];
Train_Number = size(Eigenfaces,2);
for i = 1 : Train_Number
temp = Eigenfaces'*A(:,i); % Projection of centered images into facespace
ProjectedImages = [ProjectedImages temp];
end
InputImage = imread(TestImage);
temp = InputImage(:,:,1);
[irow icol] = size(temp);
InImage = reshape(temp',irow*icol,1);
Difference = double(InImage)-m; % Centered test image
ProjectedTestImage = Eigenfaces'*Difference; % Test image feature vector
Euc_dist = [];
for i = 1 : Train_Number
q = ProjectedImages(:,i);
temp = ( norm( ProjectedTestImage - q ) )^2;
Euc_dist = [Euc_dist temp];
end
[Euc_dist_min , Recognized_index] = min(Euc_dist);
OutputName = strcat(int2str(Recognized_index),'.jpg');
So, how to generate error massege when no image matches..?
At the moment, your application appears to find the most similar image (you appear to be using Euclidean distance as you measure of similarity), and return it. There doesn't seem to be any concept of whether the image "matches" or not.
Define a threshold on similarity, and then determine whether your most similar image meets that threshold. If it does, return it, otherwise display an error message.

Resources