Matlab error in Backpropagation algorithm - algorithm

Here is a matalab program for backpropagation algorithm-
% XOR input for x1 and x2
input = [0 0; 0 1; 1 0; 1 1];
% Desired output of XOR
output = [0;1;1;0];
% Initialize the bias
bias = [-1 -1 -1];
% Learning coefficient
coeff = 0.7;
% Number of learning iterations
iterations = 10000;
% Calculate weights randomly using seed.
rand('state',sum(100.*clock));
weights = -1 +2.*rand(3,3);
for i = 1:iterations
out = zeros(4,1);
numIn = length (input(:,1));
for j = 1:numIn
% Hidden layer
H1 = bias(1,1).*weights(1,1) + input(j,1).*weights(1,2)+ input(j,2).*weights(1,3);
% Send data through sigmoid function 1/1+e^-x
% Note that sigma is a different m file
% that I created to run this operation
x2(1) = sigma(H1);
H2 = bias(1,2).*weights(2,1)+ input(j,1).*weights(2,2)+ input(j,2).*weights(2,3);
x2(2) = sigma(H2);
% Output layer
x3_1 = bias(1,3).*weights(3,1)+ x2(1).*weights(3,2)+ x2(2).*weights(3,3);
out(j) = sigma(x3_1);
% Adjust delta values of weights
% For output layer:
% delta(wi) = xi*delta,
% delta = (1-actual output)*(desired output - actual output)
delta3_1 = out(j).*(1-out(j)).*(output(j)-out(j));
% Propagate the delta backwards into hidden layers
delta2_1 = x2(1).*(1-x2(1)).*weights(3,2).*delta3_1;
delta2_2 = x2(2).*(1-x2(2)).*weights(3,3).*delta3_1;
% Add weight changes to original weights
% And use the new weights to repeat process.
% delta weight = coeff*x*delta
for k = 1:3
if k == 1 % Bias cases
weights(1,k) = weights(1,k) + coeff.*bias(1,1).*delta2_1;
weights(2,k) = weights(2,k) + coeff.*bias(1,2).*delta2_2;
weights(3,k) = weights(3,k) + coeff.*bias(1,3).*delta3_1;
else % When k=2 or 3 input cases to neurons
weights(1,k) = weights(1,k) + coeff.*input(j,1).*delta2_1;
weights(2,k) = weights(2,k) + coeff.*input(j,2).*delta2_2;
weights(3,k) = weights(3,k) + coeff.*x2(k-1).*delta3_1;
end
end
end
end
But its showing error like -
??? Index exceeds matrix dimensions.
Error in ==> sigma at 95
a=varargin{1}; b=varargin{2}; c=varargin{3}; d=varargin{4};
Error in ==> back at 25
x2(1) = sigma(H1);
Please help me out. I am not able to understand the problem. Why there is an error saying index exceeds matrix dimension? Help is needed.

Related

K-means for image compression only gives black-and-white result

I'm doing this exercise by Andrew NG about using k-means to reduce the number of colors in an image. But the problem is my code only gives a black-and-white image :( . I have checked every step in the algorithm but it still won't give the correct result. Please help me, thank you very much
Here is the link of the exercise, and here is the dataset.
The correct result is given in the link of the exercise. And here is my black-and-white image:
Here is my code:
function [] = KMeans()
Image = double(imread('bird_small.tiff'));
[rows,cols, RGB] = size(Image);
Points = reshape(Image,rows * cols, RGB);
K = 16;
Centroids = zeros(K,RGB);
s = RandStream('mt19937ar','Seed',0);
% Initialization :
% Pick out K random colours and make sure they are all different
% from each other! This prevents the situation where two of the means
% are assigned to the exact same colour, therefore we don't have to
% worry about division by zero in the E-step
% However, if K = 16 for example, and there are only 15 colours in the
% image, then this while loop will never exit!!! This needs to be
% addressed in the future :(
% TODO : Vectorize this part!
done = false;
while done == false
RowIndex = randperm(s,rows);
ColIndex = randperm(s,cols);
RowIndex = RowIndex(1:K);
ColIndex = ColIndex(1:K);
for i = 1 : K
for j = 1 : RGB
Centroids(i,j) = Image(RowIndex(i),ColIndex(i),j);
end
end
Centroids = sort(Centroids,2);
Centroids = unique(Centroids,'rows');
if size(Centroids,1) == K
done = true;
end
end;
% imshow(imread('bird_small.tiff'))
%
% for i = 1 : K
% hold on;
% plot(RowIndex(i),ColIndex(i),'r+','MarkerSize',50)
% end
eps = 0.01; % Epsilon
IterNum = 0;
while 1
% E-step: Estimate membership given parameters
% Membership: The centroid that each colour is assigned to
% Parameters: Location of centroids
Dist = pdist2(Points,Centroids,'euclidean');
[~, WhichCentroid] = min(Dist,[],2);
% M-step: Estimate parameters given membership
% Membership: The centroid that each colour is assigned to
% Parameters: Location of centroids
% TODO: Vectorize this part!
OldCentroids = Centroids;
for i = 1 : K
PointsInCentroid = Points((find(WhichCentroid == i))',:);
NumOfPoints = size(PointsInCentroid,1);
% Note that NumOfPoints is never equal to 0, as a result of
% the initialization. Or .... ???????
if NumOfPoints ~= 0
Centroids(i,:) = sum(PointsInCentroid , 1) / NumOfPoints ;
end
end
% Check for convergence: Here we use the L2 distance
IterNum = IterNum + 1;
Margins = sqrt(sum((Centroids - OldCentroids).^2, 2));
if sum(Margins > eps) == 0
break;
end
end
IterNum;
Centroids ;
% Load the larger image
[LargerImage,ColorMap] = imread('bird_large.tiff');
LargerImage = double(LargerImage);
[largeRows,largeCols,~] = size(LargerImage); % RGB is always 3
% Dist = zeros(size(Centroids,1),RGB);
% TODO: Vectorize this part!
% Replace each of the pixel with the nearest centroid
for i = 1 : largeRows
for j = 1 : largeCols
Dist = pdist2(Centroids,reshape(LargerImage(i,j,:),1,RGB),'euclidean');
[~,WhichCentroid] = min(Dist);
LargerImage(i,j,:) = Centroids(WhichCentroid);
end
end
% Display new image
imshow(uint8(round(LargerImage)),ColorMap)
imwrite(uint8(round(LargerImage)), 'D:\Hoctap\bird_kmeans.tiff');
You're indexing into Centroids with a single linear index.
Centroids(WhichCentroid)
This is going to return a single value (specifically the red value for that centroid). When you assign this to LargerImage(i,j,:), it will assign all RGB channels the same value resulting in a grayscale image.
You likely want to grab all columns of the selected centroid to provide an array of red, green, and blue values that you want to assign to LargerImage(i,j,:). You can do by using a colon : to specify all columns of Centroids which belong to the row indicated by WhichCentroid.
LargerImage(i,j,:) = Centroids(WhichCentroid,:);

K-means for color quantization - Code not vectorized

I'm doing this exercise by Andrew NG about using k-means to reduce the number of colors in an image. It worked correctly but I'm afraid it's a little slow because of all the for loops in the code, so I'd like to vectorize them. But there are those loops that I just can't seem to vectorize effectively. Please help me, thank you very much!
Also if possible please give some feedback on my coding style :)
Here is the link of the exercise, and here is the dataset.
The correct result is given in the link of the exercise.
And here is my code:
function [] = KMeans()
Image = double(imread('bird_small.tiff'));
[rows,cols, RGB] = size(Image);
Points = reshape(Image,rows * cols, RGB);
K = 16;
Centroids = zeros(K,RGB);
s = RandStream('mt19937ar','Seed',0);
% Initialization :
% Pick out K random colours and make sure they are all different
% from each other! This prevents the situation where two of the means
% are assigned to the exact same colour, therefore we don't have to
% worry about division by zero in the E-step
% However, if K = 16 for example, and there are only 15 colours in the
% image, then this while loop will never exit!!! This needs to be
% addressed in the future :(
% TODO : Vectorize this part!
done = false;
while done == false
RowIndex = randperm(s,rows);
ColIndex = randperm(s,cols);
RowIndex = RowIndex(1:K);
ColIndex = ColIndex(1:K);
for i = 1 : K
for j = 1 : RGB
Centroids(i,j) = Image(RowIndex(i),ColIndex(i),j);
end
end
Centroids = sort(Centroids,2);
Centroids = unique(Centroids,'rows');
if size(Centroids,1) == K
done = true;
end
end;
% imshow(imread('bird_small.tiff'))
%
% for i = 1 : K
% hold on;
% plot(RowIndex(i),ColIndex(i),'r+','MarkerSize',50)
% end
eps = 0.01; % Epsilon
IterNum = 0;
while 1
% E-step: Estimate membership given parameters
% Membership: The centroid that each colour is assigned to
% Parameters: Location of centroids
Dist = pdist2(Points,Centroids,'euclidean');
[~, WhichCentroid] = min(Dist,[],2);
% M-step: Estimate parameters given membership
% Membership: The centroid that each colour is assigned to
% Parameters: Location of centroids
% TODO: Vectorize this part!
OldCentroids = Centroids;
for i = 1 : K
PointsInCentroid = Points((find(WhichCentroid == i))',:);
NumOfPoints = size(PointsInCentroid,1);
% Note that NumOfPoints is never equal to 0, as a result of
% the initialization. Or .... ???????
if NumOfPoints ~= 0
Centroids(i,:) = sum(PointsInCentroid , 1) / NumOfPoints ;
end
end
% Check for convergence: Here we use the L2 distance
IterNum = IterNum + 1;
Margins = sqrt(sum((Centroids - OldCentroids).^2, 2));
if sum(Margins > eps) == 0
break;
end
end
IterNum;
Centroids ;
% Load the larger image
[LargerImage,ColorMap] = imread('bird_large.tiff');
LargerImage = double(LargerImage);
[largeRows,largeCols,NewRGB] = size(LargerImage); % RGB is always 3
% TODO: Vectorize this part!
largeRows
largeCols
NewRGB
% Replace each of the pixel with the nearest centroid
NewPoints = reshape(LargerImage,largeRows * largeCols, NewRGB);
Dist = pdist2(NewPoints,Centroids,'euclidean');
[~,WhichCentroid] = min(Dist,[],2);
NewPoints = Centroids(WhichCentroid,:);
LargerImage = reshape(NewPoints,largeRows,largeCols,NewRGB);
% for i = 1 : largeRows
% for j = 1 : largeCols
% Dist = pdist2(Centroids,reshape(LargerImage(i,j,:),1,RGB),'euclidean');
% [~,WhichCentroid] = min(Dist);
% LargerImage(i,j,:) = Centroids(WhichCentroid,:);
% end
% end
% Display new image
imshow(uint8(round(LargerImage)),ColorMap)
UPDATE: Replaced
for i = 1 : K
for j = 1 : RGB
Centroids(i,j) = Image(RowIndex(i),ColIndex(i),j);
end
end
with
for i = 1 : K
Centroids(i,:) = Image(RowIndex(i),ColIndex(i),:);
end
I think this may be vectorized further by using linear indexing, but for now I should just focus on the while loop since it takes most of the time.
Also when I tried #Dev-iL's suggestion and replaced
for i = 1 : K
PointsInCentroid = Points((find(WhichCentroid == i))',:);
NumOfPoints = size(PointsInCentroid,1);
% Note that NumOfPoints is never equal to 0, as a result of
% the initialization. Or .... ???????
if NumOfPoints ~= 0
Centroids(i,:) = sum(PointsInCentroid , 1) / NumOfPoints ;
end
end
with
E = sparse(1:size(WhichCentroid), WhichCentroid' , 1, Num, K, Num);
Centroids = (E * spdiags(1./sum(E,1)',0,K,K))' * Points ;
the results were always worse: With K = 16, the first takes 2,414s , the second takes 2,455s ; K = 32, the first takes 4,529s , the second takes 5,022s. Seems like vectorization does not help, but maybe there's something wrong with my code :( .
Replaced
for i = 1 : K
for j = 1 : RGB
Centroids(i,j) = Image(RowIndex(i),ColIndex(i),j);
end
end
with
for i = 1 : K
Centroids(i,:) = Image(RowIndex(i),ColIndex(i),:);
end
I think this may be vectorized further by using linear indexing, but for now I should just focus on the while loop since it takes most of the time.
Also when I tried #Dev-iL's suggestion and replaced
for i = 1 : K
PointsInCentroid = Points((find(WhichCentroid == i))',:);
NumOfPoints = size(PointsInCentroid,1);
% Note that NumOfPoints is never equal to 0, as a result of
% the initialization. Or .... ???????
if NumOfPoints ~= 0
Centroids(i,:) = sum(PointsInCentroid , 1) / NumOfPoints ;
end
end
with
E = sparse(1:size(WhichCentroid), WhichCentroid' , 1, Num, K, Num);
Centroids = (E * spdiags(1./sum(E,1)',0,K,K))' * Points ;
the results were always worse: With K = 16, the first takes 2,414s , the second takes 2,455s ; K = 32, the first took 4,529s , the second took 5,022s. Seems like vectorization did not help in this case.
However, when I replaced
Dist = pdist2(Points,Centroids,'euclidean');
[~, WhichCentroid] = min(Dist,[],2);
(in the while loop) with
Dist = bsxfun(#minus,dot(Centroids',Centroids',1)' / 2 , Centroids * Points' );
[~, WhichCentroid] = min(Dist,[],1);
WhichCentroid = WhichCentroid';
the code ran much faster, especially when K is large (K=32)
Thank you everyone!

Vectorized formula for output layer in a neural network

I have a neural network and want to use the trained neural network to solve for a set of test data. What I am struggling with is writing the formula for the hidden layer and for the output layer. I aim to have a vectorized formula but I will also be happy to implement a loop variation.
Now I believe I have the correct formula for the hidden layer and only need one for the output layer, but would appreciate it anyone confirm that it is the vectorized formula.
% Variables
% Xtest test training data
% thetah - trained weights for inputs to hidden layer
% thetao - trained weights for hidden layer to outputs
% ytest - output
htest = (1 ./ (1 + exp(-(thetah * Xtest'))))' ; % FORMULA FOR HIDDEN LAYER
ytest = ones(mtest, num_outputs) ; % FORMULA FOR OUTPUT LAYER
Below you can find both vectorized and loop implementations of the forward propagation. It is possible, that your input data have to be adapted to the code below, because of different notations and the way you store data in your matrices.
You need to add a bias unit to both input and hidden layer.
In order to simplify the work on the implementation and debugging I took some data from the open source machine learning repository and trained the network for the wine classification task.
Xtest - input data [178x13]
y - output class [178x1]
thetah - parameters of the hidden layer [15x14]
thetao -
parameters of the output layer [3x16]
The network separates the input data with rate 97.7%
Here is the code:
function [] = nn_fp()
load('Xtest.mat'); %input data 178x13
load('y.mat'); %output data 178x1
load('thetah.mat'); %Parameters of the hidden layer 15x14
load('thetao.mat'); %Parameters of the output layer 3x16
predict_simple(Xtest, y, thetah, thetao);
predict_vectorized(Xtest, y, thetah, thetao);
end
function predict_simple(Xtest, y, thetah, thetao)
mtest = size(Xtest, 1); %number of input examples
n = size(Xtest, 2); %number of features
hl_size = size(thetah, 1); %size of the hidden layer (without the bias unit)
num_outputs = size(thetao, 1); %size of the output layer
%add a bias unit to the input layer
a1 = [ones(mtest, 1) Xtest]; %[mtest x (n+1)]
%compute activations of the hidden layer
z2 = zeros(mtest, hl_size); %[mtest x hl_size]
a2 = zeros(mtest, hl_size); %[mtest x hl_size]
for i=1:mtest
for j=1:hl_size
for k=1:n+1
z2(i, j) = z2(i, j) + a1(i, k)*thetah(j, k);
end
a2(i, j) = sigmoid_simple(z2(i, j));
end
end
%add a bias unit to the hidden layer
a2 = [ones(mtest, 1) a2]; %[mtest x (hl_size+1)]
%compute activations of the output layer
z3 = zeros(mtest, num_outputs); %[mtest x num_outputs]
h = zeros(mtest, num_outputs); %[mtest x num_outputs]
for i=1:mtest
for j=1:num_outputs
for k=1:hl_size+1
z3(i, j) = z3(i, j) + a2(i, k)*thetao(j, k);
end
h(i, j) = sigmoid_simple(z3(i, j)); %the hypothesis
end
end
%calculate predictions for each input example based on the maximum term
%of the hypothesis h
p = zeros(size(y));
for i=1:mtest
max_ind = 1;
max_value = h(i, 1);
for j=2:num_outputs
if (h(i, j) > max_value)
max_ind = j;
max_value = h(i, j);
end
end
p(i) = max_ind;
end
%calculate the success rate of the prediction
correct_count = 0;
for i=1:mtest
if (p(i) == y(i))
correct_count = correct_count + 1;
end
end
rate = correct_count/mtest*100;
display(['simple version rate:', num2str(rate)]);
end
function predict_vectorized(Xtest, y, thetah, thetao)
mtest = size(Xtest, 1); %number of input examples
%add a bias unit to the input layer
a1 = [ones(mtest, 1) Xtest];
%compute activations of the hidden layer
z2 = a1*thetah';
a2 = sigmoid_universal(z2);
%add a bias unit to the hidden layer
a2 = [ones(mtest, 1) a2];
%compute activations of the output layer
z3 = a2*thetao';
h = sigmoid_universal(z3); %the hypothesis
%calculate predictions for each input example based on the maximum term
%of the hypothesis h
[~,p] = max(h, [], 2);
%calculate the success rate of the prediction
rate = mean(double((p == y))) * 100;
display(['vectorized version rate:', num2str(rate)]);
end
function [ s ] = sigmoid_simple( z )
s = 1/(1+exp(-z));
end
function [ s ] = sigmoid_universal( z )
s = 1./(1+exp(-z));
end
Assuming that your Xtest has dimensions N by M where N is the number of examples and M is the number of features, thetah is a M by H1 matrix where H1 is the number of hidden layers in the first layer and thetao is a H1 by O matrix where O is the number of output classes you do the following:
a1 = Xtest * thetah;
z1 = 1 / (1 + exp(-a1)); %Assuming you are using sigmoid units
a2 = z1 * thetao;
z2 = softmax(a2);
Read more about softmax here.

MatLab code speed and optimization. How to improve?

Could someone please run this for me and tell me how long it takes for you? It took my laptop 60s. I can't tell if it's my laptop that's crappy or my code. Probably both.
I just started learning MatLab, so I'm not yet familiar with which functions are better than others for specific tasks. If you have any suggestions on how I could improve this code, it would be greatly appreciated.
function gbp
clear; clc;
zi = 0; % initial position
zf = 100; % final position
Ei = 1; % initial electric field
c = 3*10^8; % speed of light
epsilon = 8.86*10^-12; % permittivity of free space
lambda = 1064*10^-9; % wavelength
k = 2*pi/lambda; % wave number
wi = 1.78*10^-3; % initial waist width (minimum spot size)
zr = (pi*wi^2)/lambda; % Rayleigh range
Ri = zi + zr^2/zi; % initial radius of curvature
qi = 1/(1/Ri-1i*lambda/(pi*wi^2)); % initial complex beam parameter
Psii = atan(real(qi)/imag(qi)); % Gouy phase
mat = [1 zf; 0 1]; % transformation matrix
A = mat(1,1); B = mat(1,2); C = mat(2,1); D = mat(2,2);
qf = (A*qi + B)/(C*qi + D); % final complex beam parameter
wf = sqrt(-lambda/pi*(1/imag(1/qf))); % final spot size
Rf = 1/real(1/qf); % final radius of curvature
Psif = atan(real(qf)/imag(qf)); % final Gouy phase
% Hermite - Gaussian modes function
u = #(z, x, n, w, R, Psi) (2/pi)^(1/4)*sqrt(exp(1i*(2*n+1)*Psi)/(2^n*factorial(n)*w))*...
hermiteH(n,sqrt(2)*x/w).*exp(-x.^2*(1/w^2+1i*k/(2*R))-1i*k*z);
% Complex amplitude coefficients function
a = #(n) exp(1i*k*zi)*integral(#(x) Ei.*conj(u(zi, x, n, wi, Ri, Psii)),-2*wi,2*wi);
%----------------------------------------------------------------------------
xlisti = -0.1:1/10000:0.1; % initial x-axis range
xlistf = -0.1:1/10000:0.1; % final x-axis range
nlist = 0:2:20; % modes range
function Eiplot
Efieldi = zeros(size(xlisti));
for nr = nlist
Efieldi = Efieldi + a(nr).*u(zi, xlisti, nr, wi, Ri, Psii)*exp(-1i*k*zi);
end
Ii = 1/2*c*epsilon*arrayfun(#(x)x.*conj(x),Efieldi);
end
function Efplot
Efieldf = zeros(size(xlistf));
for nr = nlist
Efieldf = Efieldf + a(nr).*u(zf, xlistf, nr, wf, Rf, Psif)*exp(-1i*k*zf);
end
If = 1/2*c*epsilon*arrayfun(#(x)x.*conj(x),Efieldf);
end
Eiplot
Efplot
plot(xlisti,real(Ii),xlistf,real(If))
xlabel('x(m)') % x-axis label
ylabel('I(W/m^2)') % y-axis label
end
The cost is coming from the calls to hermiteH -- for every call, this creates a new function using symbolic variables, then evaluates the function at your input. The key to speeding this up is to pre-compute the hermite polynomial functions then evaluate those rather than create them from scratch each time (speedup from ~26 seconds to around 0.75 secs on my computer).
With the changes:
function gbp
x = sym('x');
zi = 0; % initial position
zf = 100; % final position
Ei = 1; % initial electric field
c = 3*10^8; % speed of light
epsilon = 8.86*10^-12; % permittivity of free space
lambda = 1064*10^-9; % wavelength
k = 2*pi/lambda; % wave number
wi = 1.78*10^-3; % initial waist width (minimum spot size)
zr = (pi*wi^2)/lambda; % Rayleigh range
Ri = zi + zr^2/zi; % initial radius of curvature
qi = 1/(1/Ri-1i*lambda/(pi*wi^2)); % initial complex beam parameter
Psii = atan(real(qi)/imag(qi)); % Gouy phase
mat = [1 zf; 0 1]; % transformation matrix
A = mat(1,1); B = mat(1,2); C = mat(2,1); D = mat(2,2);
qf = (A*qi + B)/(C*qi + D); % final complex beam parameter
wf = sqrt(-lambda/pi*(1/imag(1/qf))); % final spot size
Rf = 1/real(1/qf); % final radius of curvature
Psif = atan(real(qf)/imag(qf)); % final Gouy phase
% Hermite - Gaussian modes function
nlist = 0:2:20; % modes range
% precompute hermite polynomials for nlist
hermites = {};
for n = nlist
if n == 0
hermites{n + 1} = #(x)1.0;
else
hermites{n + 1} = matlabFunction(hermiteH(n, x));
end
end
u = #(z, x, n, w, R, Psi) (2/pi)^(1/4)*sqrt(exp(1i*(2*n+1)*Psi)/(2^n*factorial(n)*w))*...
hermites{n + 1}(sqrt(2)*x/w).*exp(-x.^2*(1/w^2+1i*k/(2*R))-1i*k*z);
% Complex amplitude coefficients function
a = #(n) exp(1i*k*zi)*integral(#(x) Ei.*conj(u(zi, x, n, wi, Ri, Psii)),-2*wi,2*wi);
%----------------------------------------------------------------------------
xlisti = -0.1:1/10000:0.1; % initial x-axis range
xlistf = -0.1:1/10000:0.1; % final x-axis range
function Eiplot
Efieldi = zeros(size(xlisti));
for nr = nlist
Efieldi = Efieldi + a(nr).*u(zi, xlisti, nr, wi, Ri, Psii)*exp(-1i*k*zi);
end
Ii = 1/2*c*epsilon*arrayfun(#(x)x.*conj(x),Efieldi);
end
function Efplot
Efieldf = zeros(size(xlistf));
for nr = nlist
Efieldf = Efieldf + a(nr).*u(zf, xlistf, nr, wf, Rf, Psif)*exp(-1i*k*zf);
end
If = 1/2*c*epsilon*arrayfun(#(x)x.*conj(x),Efieldf);
end
Eiplot
Efplot
plot(xlisti,real(Ii),xlistf,real(If))
xlabel('x(m)') % x-axis label
ylabel('I(W/m^2)') % y-axis label
end

MATLAB program takes more than 1 hour to execute

The below program is a program for finding k-clique communities from a input graph.
The graph dataset can be found here.
The first line of the dataset contains 'number of nodes and edges' respectively. The following lines have 'node1 node2' representing an edge between node1 and node2 .
For example:
2500 6589 // number_of_nodes, number_of_edges
0 5 // edge between node[0] and node[5]
.
.
.
The k-clique( aCliqueSIZE, anAdjacencyMATRIX ) function is contained here.
The following commands are executed in command window of MATLAB:
x = textread( 'amazon.graph.small' ); %% source input file text
s = max(x(1,1), x(1,2)); %% take largest dimemsion
adjMatrix = sparse(x(2:end,1)+1, x(2:end,2)+1, 1, s, s); %% now matrix is square
adjMatrix = adjMatrix | adjMatrix.'; %% apply "or" with transpose to make symmetric
adjMatrix = full(adjMatrix); %% convert to full if needed
k=4;
[X,Y,Z]=k_clique(k,adjMatrix); %%
% The output can be viewed by the following commands
celldisp(X);
celldisp(Y);
Z
The above program takes more than 1 hour to execute whereas I think this shouldn't be the case. While running the program on windows, I checked the task manager and found that only 500 MB is allocated for the program. Is this the reason for the slowness of the program? If yes, then how can I allocate more heap memory (close to 4GB) to this program in MATLAB?
The problem does not seem to be Memory-bound
Having a sparse, square, symmetric matrix of 6k5 * 6k5 edges does not mean a big memory.
The provided code has many for loops and is heavily recursive in the tail function transfer_nodes()
Add a "Stone-Age-Profiler" into the code
To show the respective times spent on a CPU-bound sections of the processing, wrap the main sections of the code into a construct of:
tic(); for .... end;toc()
which will print you the CPU-bound times spent on relevent sections of the k_clique.m code, showing the readings "on-the-fly"
Your original code k_clique.m
function [components,cliques,CC] = k_clique(k,M)
% k-clique algorithm for detecting overlapping communities in a network
% as defined in the paper "Uncovering the overlapping
% community structure of complex networks in nature and society"
%
% [X,Y,Z] = k_clique(k,A)
%
% Inputs:
% k - clique size
% A - adjacency matrix
%
% Outputs:
% X - detected communities
% Y - all cliques (i.e. complete subgraphs that are not parts of larger
% complete subgraphs)
% Z - k-clique matrix
nb_nodes = size(M,1); % number of nodes
% Find the largest possible clique size via the degree sequence:
% Let {d1,d2,...,dk} be the degree sequence of a graph. The largest
% possible clique size of the graph is the maximum value k such that
% dk >= k-1
degree_sequence = sort(sum(M,2) - 1,'descend');
%max_s = degree_sequence(1);
max_s = 0;
for i = 1:length(degree_sequence)
if degree_sequence(i) >= i - 1
max_s = i;
else
break;
end
end
cliques = cell(0);
% Find all s-size kliques in the graph
for s = max_s:-1:3
M_aux = M;
% Looping over nodes
for n = 1:nb_nodes
A = n; % Set of nodes all linked to each other
B = setdiff(find(M_aux(n,:)==1),n); % Set of nodes that are linked to each node in A, but not necessarily to the nodes in B
C = transfer_nodes(A,B,s,M_aux); % Enlarging A by transferring nodes from B
if ~isempty(C)
for i = size(C,1)
cliques = [cliques;{C(i,:)}];
end
end
M_aux(n,:) = 0; % Remove the processed node
M_aux(:,n) = 0;
end
end
% Generating the clique-clique overlap matrix
CC = zeros(length(cliques));
for c1 = 1:length(cliques)
for c2 = c1:length(cliques)
if c1==c2
CC(c1,c2) = numel(cliques{c1});
else
CC(c1,c2) = numel(intersect(cliques{c1},cliques{c2}));
CC(c2,c1) = CC(c1,c2);
end
end
end
% Extracting the k-clique matrix from the clique-clique overlap matrix
% Off-diagonal elements <= k-1 --> 0
% Diagonal elements <= k --> 0
CC(eye(size(CC))==1) = CC(eye(size(CC))==1) - k;
CC(eye(size(CC))~=1) = CC(eye(size(CC))~=1) - k + 1;
CC(CC >= 0) = 1;
CC(CC < 0) = 0;
% Extracting components (or k-clique communities) from the k-clique matrix
components = [];
for i = 1:length(cliques)
linked_cliques = find(CC(i,:)==1);
new_component = [];
for j = 1:length(linked_cliques)
new_component = union(new_component,cliques{linked_cliques(j)});
end
found = false;
if ~isempty(new_component)
for j = 1:length(components)
if all(ismember(new_component,components{j}))
found = true;
end
end
if ~found
components = [components; {new_component}];
end
end
end
function R = transfer_nodes(S1,S2,clique_size,C)
% Recursive function to transfer nodes from set B to set A (as
% defined above)
% Check if the union of S1 and S2 or S1 is inside an already found larger
% clique
found_s12 = false;
found_s1 = false;
for c = 1:length(cliques)
for cc = 1:size(cliques{c},1)
if all(ismember(S1,cliques{c}(cc,:)))
found_s1 = true;
end
if all(ismember(union(S1,S2),cliques{c}(cc,:)))
found_s12 = true;
break;
end
end
end
if found_s12 || (length(S1) ~= clique_size && isempty(S2))
% If the union of the sets A and B can be included in an
% already found (larger) clique, the recursion is stepped back
% to check other possibilities
R = [];
elseif length(S1) == clique_size;
% The size of A reaches s, a new clique is found
if found_s1
R = [];
else
R = S1;
end
else
% Check the remaining possible combinations of the neighbors
% indices
if isempty(find(S2>=max(S1),1))
R = [];
else
R = [];
for w = find(S2>=max(S1),1):length(S2)
S2_aux = S2;
S1_aux = S1;
S1_aux = [S1_aux S2_aux(w)];
S2_aux = setdiff(S2_aux(C(S2(w),S2_aux)==1),S2_aux(w));
R = [R;transfer_nodes(S1_aux,S2_aux,clique_size,C)];
end
end
end
end
end

Resources