speed up loop in matlab - performance

I'm very new in MATLAB (this is my first script).
I wonder how may I speed up this loop, I don't know any toolbox or 'tricks' as I'm a newbie on it. I tried to code it with instinct, it works, but it is really long.
All are variables get with fread or integer manually entered, so this is basically simple math, but I have no clue on why is it so long (maybe nested loops ?) and how to improve, as I am more familiar with Python and for example multiprocess.
Thanks a lot
X = 0;
Points = [0,0,0];
for i=1:nbLines
for j=1:nbPositions-1
if lDate(i)>posDate(j) && lDate(i)<=posDate(j+1)
weight = (lDate(i) - posDate(j)) / (posDate(j+1)- posDate(j));
X = posX(j)*(1-weight) + posX(j+1) * weight;
end
end
if X ~= 0
for j=1:nbScans
Y = - distance(i,j) / tan(angle(i,j));
Points = [Points;X, Y, distance(i,j)];
end
end
end

X = 0;
Points = cell([],1) ;
Points{1} = [0,0,0];
count = 1 ;
for i=1:nbLines
id = find(lDate(i)>posDate & lDate(i)<=posDate) ;
if length(id) > 1
weight = (lDate(i) - posDate(id(1))) / (posDate(id(end))- posDate(id(1)));
X = posX(id(1))*(1-weight) + posX(id(end)) * weight;
end
if X ~= 0
j=1:nbScans ;
count = count+1 ;
Y = - distance(i,j)./tan(angle(i,j));
Points{count} = [repelem(X,size(Y,2),size(Y),1), Y, distance(i,j)'];
end
end

You have one issue with the given code. The blow line:
Points = [Points; X, Y, distance(i,j)];
This will definitely slow up your code. You need to initialize this array to store the numbers. If you initialize it, you will find good difference in speed.
X = 0;
Points = zeros([],3) ;
Points(1,:) = [0,0,0];
count = 1 ;
for i=1:nbLines
for j=1:nbPositions-1
if lDate(i)>posDate(j) && lDate(i)<=posDate(j+1)
weight = (lDate(i) - posDate(j)) / (posDate(j+1)- posDate(j));
X = posX(j)*(1-weight) + posX(j+1) * weight;
end
end
if X ~= 0
for j=1:nbScans
count = count+1 ;
Y = - distance(i,j) / tan(angle(i,j));
Points(count,:) = [X, Y, distance(i,j)];
end
end
end
Note that, you code only saves the last value of X, is this what you want?

try using parallelization- "parfor" instead of "for" that uses all available processors.
parfor i=1:nbLines
rest of code here
end

Related

Can anyone explain how different is this hybrid PSOGA from normal GA?

Does this code have mutation, selection, and crossover, just like the original genetic algorithm.
Since this, a hybrid algorithm (i.e PSO with GA) does it use all steps of original GA or skips some
of them.Please do tell me.
I am just new to this and still trying to understand. Thank you.
%%% Hybrid GA and PSO code
function [gbest, gBestScore, all_scores] = QAP_PSO_GA(CreatePopFcn, FitnessFcn, UpdatePosition, ...
nCity, nPlant, nPopSize, nIters)
% Set algorithm parameters
constant = 0.95;
c1 = 1.5; %1.4944; %2;
c2 = 1.5; %1.4944; %2;
w = 0.792 * constant;
% Allocate memory and initialize
gBestScore = inf;
all_scores = inf * ones(nPopSize, nIters);
x = CreatePopFcn(nPopSize, nCity);
v = zeros(nPopSize, nCity);
pbest = x;
% update lbest
cost_p = inf * ones(1, nPopSize); %feval(FUN, pbest');
for i=1:nPopSize
cost_p(i) = FitnessFcn(pbest(i, 1:nPlant));
end
lbest = update_lbest(cost_p, pbest, nPopSize);
for iter = 1 : nIters
if mod(iter,1000) == 0
parents = randperm(nPopSize);
for i = 1:nPopSize
x(i,:) = (pbest(i,:) + pbest(parents(i),:))/2;
% v(i,:) = pbest(parents(i),:) - x(i,:);
% v(i,:) = (v(i,:) + v(parents(i),:))/2;
end
else
% Update velocity
v = w*v + c1*rand(nPopSize,nCity).*(pbest-x) + c2*rand(nPopSize,nCity).*(lbest-x);
% Update position
x = x + v;
x = UpdatePosition(x);
end
% Update pbest
cost_x = inf * ones(1, nPopSize);
for i=1:nPopSize
cost_x(i) = FitnessFcn(x(i, 1:nPlant));
end
s = cost_x<cost_p;
cost_p = (1-s).*cost_p + s.*cost_x;
s = repmat(s',1,nCity);
pbest = (1-s).*pbest + s.*x;
% update lbest
lbest = update_lbest(cost_p, pbest, nPopSize);
% update global best
all_scores(:, iter) = cost_x;
[cost,index] = min(cost_p);
if (cost < gBestScore)
gbest = pbest(index, :);
gBestScore = cost;
end
% draw current fitness
figure(1);
plot(iter,min(cost_x),'cp','MarkerEdgeColor','k','MarkerFaceColor','g','MarkerSize',8)
hold on
str=strcat('Best fitness: ', num2str(min(cost_x)));
disp(str);
end
end
% Function to update lbest
function lbest = update_lbest(cost_p, x, nPopSize)
sm(1, 1)= cost_p(1, nPopSize);
sm(1, 2:3)= cost_p(1, 1:2);
[cost, index] = min(sm);
if index==1
lbest(1, :) = x(nPopSize, :);
else
lbest(1, :) = x(index-1, :);
end
for i = 2:nPopSize-1
sm(1, 1:3)= cost_p(1, i-1:i+1);
[cost, index] = min(sm);
lbest(i, :) = x(i+index-2, :);
end
sm(1, 1:2)= cost_p(1, nPopSize-1:nPopSize);
sm(1, 3)= cost_p(1, 1);
[cost, index] = min(sm);
if index==3
lbest(nPopSize, :) = x(1, :);
else
lbest(nPopSize, :) = x(nPopSize-2+index, :);
end
end
If you are new to Optimization, I recommend you first to study each algorithm separately, then you may study how GA and PSO maybe combined, Although you must have basic mathematical skills in order to understand the operators of the two algorithms and in order to test the efficiency of these algorithm (this is what really matter).
This code chunk is responsible for parent selection and crossover:
parents = randperm(nPopSize);
for i = 1:nPopSize
x(i,:) = (pbest(i,:) + pbest(parents(i),:))/2;
% v(i,:) = pbest(parents(i),:) - x(i,:);
% v(i,:) = (v(i,:) + v(parents(i),:))/2;
end
Is not really obvious how selection randperm is done (I have no experience about Matlab).
And this is the code that is responsible for updating the velocity and position of each particle:
% Update velocity
v = w*v + c1*rand(nPopSize,nCity).*(pbest-x) + c2*rand(nPopSize,nCity).*(lbest-x);
% Update position
x = x + v;
x = UpdatePosition(x);
This version of velocity updating strategy is utilizing what is called Interia-Weight W, which basically mean we are preserving the velocity history of each particle (not completely recomputing it).
It worth mentioning that velocity updating is done more often than crossover (each 1000 iteration).

Diamond-Square algorithm: How do I determine what tiles I need to run a diamond function and what tiles I need to run a square function on?

I'm working on a diamond-square heightmap generator and I've been stuck on a certain part for a while now.
I'm having trouble determining which tiles I need to run a square() function on and which tiles I need to run a diamond() function on.
I took a look at this guide: http://www.playfuljs.com/realistic-terrain-in-130-lines/ and I took their for loop they're using as an example, but it didn't seem to work at all.
The preferred language for the answer is Lua (or just kindly point me in the right direction). I just need someone to tell me what I need to do to get a for loop that works for both diamond and square functions.
-- height constraints
local min_height = 10
local max_height = 100
-- the grid
local K = 4
local M = 2^K -- the field is cyclic integer grid 0 <= x,y < M (x=M is the same point as x=0)
local heights = {} -- min_height <= heights[x][y] <= max_height
for x = 0, M-1 do
heights[x] = {}
end
-- set corners height (all 4 corners are the same point because of cyclic field)
heights[0][0] = (min_height + max_height) / 2
local delta_height = (max_height - min_height) * 0.264
local side = M
local sqrt2 = 2^0.5
repeat
local dbl_side = side
side = side/2
-- squares
for x = side, M, dbl_side do
for y = side, M, dbl_side do
local sum =
heights[(x-side)%M][(y-side)%M]
+ heights[(x-side)%M][(y+side)%M]
+ heights[(x+side)%M][(y-side)%M]
+ heights[(x+side)%M][(y+side)%M]
heights[x][y] = sum/4 + (2*math.random()-1) * delta_height
end
end
delta_height = delta_height / sqrt2
-- diamonds
for x = 0, M-1, side do
for y = (x+side) % dbl_side, M-1, dbl_side do
local sum =
heights[(x-side)%M][y]
+ heights[x][(y-side)%M]
+ heights[x][(y+side)%M]
+ heights[(x+side)%M][y]
heights[x][y] = sum/4 + (2*math.random()-1) * delta_height
end
end
delta_height = delta_height / sqrt2
until side == 1
-- draw field
for x = 0, M-1 do
local s = ''
for y = 0, M-1 do
s = s..' '..tostring(math.floor(heights[x][y]))
end
print(s)
end

Kaczmarz animation

i am asking for help.. I want to animate the Kaczmarz method on Matlab. It's method allows to find solution of system of equations by the serial projecting solution vector on hyperplanes, which which is given by the eqations of system.
And i want make animation of this vector moving (like the point is going on the projected vectors).
%% System of equations
% 2x + 3y = 4;
% x - y = 2;
% 6x + y = 15;
%%
A = [2 3;1 -1; 6 1];
f = [4; 2; 15];
resh = pinv(A)*f
x = -10:0.1:10;
e1 = (1 - 2*x)/3;
e2 = (x - 2);
e3 = 15 - 6*x;
plot(x,e1)
grid on
%
axis([0 4 -2 2])
hold on
plot(x,e2)
hold on
plot(x,e3)
hold on
precision = 0.001; % точность
iteration = 100; % количество итераций
lambda = 0.75; % лямбда
[m,n] = size(A);
x = zeros(n,1);
%count of norms
for i = 1:m
nrm(i) = norm(A(i,:));
end
for i = 1:1:iteration
j = mod(i-1,m) + 1;
if (nrm(j) <= 0), continue, end;
predx = x;
x = x + ((f(j) - A(j,:)*x)*A(j,:)')/(nrm(j))^2;
p = plot(x);
set(p)
%pause 0.04;
hold on;
if(norm(predx - x) <= precision), break, end
end
I wrote the code for this method, by don't imagine how make the animation, how I can use the set function.
In your code there are a lot of redundant and random pieces. Do not call hold on more than once, it does nothing. Also set(p) does nothing, you want to set some ps properties to something, then you use set.
Also, you are plotting the result, but not the "change". The change is a line between the previous and current, and that is the only reason you'd want to have a variable such as predx, to plot. SO USE IT!
Anyway, this following code plots your algorithm. I added a repeated line to plot in green and then delete, so you can see what the last step does. I also changed the plots in the begging to just plot in red so its more clear what is each of the things.
Change your loop for:
for i = 1:1:iteration
j = mod(i-1,m) + 1;
if (nrm(j) <= 0), continue, end;
predx = x;
x = x + ((f(j) - A(j,:)*x)*A(j,:)')/(nrm(j))^2;
plot([predx(1) x(1)],[predx(2) x(2)],'b'); %plot line
c=plot([predx(1) x(1)],[predx(2) x(2)],'g'); %plot it in green
pause(0.1)
children = get(gca, 'children'); %delete the green line
delete(children(1));
drawnow
% hold on;
if(norm(predx - x) <= precision), break, end
end
This will show:

K-means for color quantization - Code not vectorized

I'm doing this exercise by Andrew NG about using k-means to reduce the number of colors in an image. It worked correctly but I'm afraid it's a little slow because of all the for loops in the code, so I'd like to vectorize them. But there are those loops that I just can't seem to vectorize effectively. Please help me, thank you very much!
Also if possible please give some feedback on my coding style :)
Here is the link of the exercise, and here is the dataset.
The correct result is given in the link of the exercise.
And here is my code:
function [] = KMeans()
Image = double(imread('bird_small.tiff'));
[rows,cols, RGB] = size(Image);
Points = reshape(Image,rows * cols, RGB);
K = 16;
Centroids = zeros(K,RGB);
s = RandStream('mt19937ar','Seed',0);
% Initialization :
% Pick out K random colours and make sure they are all different
% from each other! This prevents the situation where two of the means
% are assigned to the exact same colour, therefore we don't have to
% worry about division by zero in the E-step
% However, if K = 16 for example, and there are only 15 colours in the
% image, then this while loop will never exit!!! This needs to be
% addressed in the future :(
% TODO : Vectorize this part!
done = false;
while done == false
RowIndex = randperm(s,rows);
ColIndex = randperm(s,cols);
RowIndex = RowIndex(1:K);
ColIndex = ColIndex(1:K);
for i = 1 : K
for j = 1 : RGB
Centroids(i,j) = Image(RowIndex(i),ColIndex(i),j);
end
end
Centroids = sort(Centroids,2);
Centroids = unique(Centroids,'rows');
if size(Centroids,1) == K
done = true;
end
end;
% imshow(imread('bird_small.tiff'))
%
% for i = 1 : K
% hold on;
% plot(RowIndex(i),ColIndex(i),'r+','MarkerSize',50)
% end
eps = 0.01; % Epsilon
IterNum = 0;
while 1
% E-step: Estimate membership given parameters
% Membership: The centroid that each colour is assigned to
% Parameters: Location of centroids
Dist = pdist2(Points,Centroids,'euclidean');
[~, WhichCentroid] = min(Dist,[],2);
% M-step: Estimate parameters given membership
% Membership: The centroid that each colour is assigned to
% Parameters: Location of centroids
% TODO: Vectorize this part!
OldCentroids = Centroids;
for i = 1 : K
PointsInCentroid = Points((find(WhichCentroid == i))',:);
NumOfPoints = size(PointsInCentroid,1);
% Note that NumOfPoints is never equal to 0, as a result of
% the initialization. Or .... ???????
if NumOfPoints ~= 0
Centroids(i,:) = sum(PointsInCentroid , 1) / NumOfPoints ;
end
end
% Check for convergence: Here we use the L2 distance
IterNum = IterNum + 1;
Margins = sqrt(sum((Centroids - OldCentroids).^2, 2));
if sum(Margins > eps) == 0
break;
end
end
IterNum;
Centroids ;
% Load the larger image
[LargerImage,ColorMap] = imread('bird_large.tiff');
LargerImage = double(LargerImage);
[largeRows,largeCols,NewRGB] = size(LargerImage); % RGB is always 3
% TODO: Vectorize this part!
largeRows
largeCols
NewRGB
% Replace each of the pixel with the nearest centroid
NewPoints = reshape(LargerImage,largeRows * largeCols, NewRGB);
Dist = pdist2(NewPoints,Centroids,'euclidean');
[~,WhichCentroid] = min(Dist,[],2);
NewPoints = Centroids(WhichCentroid,:);
LargerImage = reshape(NewPoints,largeRows,largeCols,NewRGB);
% for i = 1 : largeRows
% for j = 1 : largeCols
% Dist = pdist2(Centroids,reshape(LargerImage(i,j,:),1,RGB),'euclidean');
% [~,WhichCentroid] = min(Dist);
% LargerImage(i,j,:) = Centroids(WhichCentroid,:);
% end
% end
% Display new image
imshow(uint8(round(LargerImage)),ColorMap)
UPDATE: Replaced
for i = 1 : K
for j = 1 : RGB
Centroids(i,j) = Image(RowIndex(i),ColIndex(i),j);
end
end
with
for i = 1 : K
Centroids(i,:) = Image(RowIndex(i),ColIndex(i),:);
end
I think this may be vectorized further by using linear indexing, but for now I should just focus on the while loop since it takes most of the time.
Also when I tried #Dev-iL's suggestion and replaced
for i = 1 : K
PointsInCentroid = Points((find(WhichCentroid == i))',:);
NumOfPoints = size(PointsInCentroid,1);
% Note that NumOfPoints is never equal to 0, as a result of
% the initialization. Or .... ???????
if NumOfPoints ~= 0
Centroids(i,:) = sum(PointsInCentroid , 1) / NumOfPoints ;
end
end
with
E = sparse(1:size(WhichCentroid), WhichCentroid' , 1, Num, K, Num);
Centroids = (E * spdiags(1./sum(E,1)',0,K,K))' * Points ;
the results were always worse: With K = 16, the first takes 2,414s , the second takes 2,455s ; K = 32, the first takes 4,529s , the second takes 5,022s. Seems like vectorization does not help, but maybe there's something wrong with my code :( .
Replaced
for i = 1 : K
for j = 1 : RGB
Centroids(i,j) = Image(RowIndex(i),ColIndex(i),j);
end
end
with
for i = 1 : K
Centroids(i,:) = Image(RowIndex(i),ColIndex(i),:);
end
I think this may be vectorized further by using linear indexing, but for now I should just focus on the while loop since it takes most of the time.
Also when I tried #Dev-iL's suggestion and replaced
for i = 1 : K
PointsInCentroid = Points((find(WhichCentroid == i))',:);
NumOfPoints = size(PointsInCentroid,1);
% Note that NumOfPoints is never equal to 0, as a result of
% the initialization. Or .... ???????
if NumOfPoints ~= 0
Centroids(i,:) = sum(PointsInCentroid , 1) / NumOfPoints ;
end
end
with
E = sparse(1:size(WhichCentroid), WhichCentroid' , 1, Num, K, Num);
Centroids = (E * spdiags(1./sum(E,1)',0,K,K))' * Points ;
the results were always worse: With K = 16, the first takes 2,414s , the second takes 2,455s ; K = 32, the first took 4,529s , the second took 5,022s. Seems like vectorization did not help in this case.
However, when I replaced
Dist = pdist2(Points,Centroids,'euclidean');
[~, WhichCentroid] = min(Dist,[],2);
(in the while loop) with
Dist = bsxfun(#minus,dot(Centroids',Centroids',1)' / 2 , Centroids * Points' );
[~, WhichCentroid] = min(Dist,[],1);
WhichCentroid = WhichCentroid';
the code ran much faster, especially when K is large (K=32)
Thank you everyone!

Why is my Julia shared array code running so slow?

I'm trying to implement Smith-Waterman alignment in parallel using Julia (see: Figure 1 of http://www.cs.virginia.edu/~rl6sf/paper_dump/2011:12:33:22.pdf), but the algorithm is running much slower in Julia than the serial version. I'm using shared arrays to do this and figure I am doing something silly that is making the code run slow. Could someone take a look and see if my code is optimized as possible? The parallel version should run faster than in serial….
The basic concept of it is to compute the anti-diagonal elements of a matrix in parallel from the upper left to lower right corner and to update them. I'm trying to use 32 cores on a shared array machine to do this. I have a SharedArray matrix that I am using to do this and am computing the elements of each anti-diagonal in parallel as shown below. The while loops in the spSW function submit tasks to workers in sync for each anti-diagonal using the helper function shared_get_score(). The main goal of this function is to fill in each element in the shared arrays "matrix" and "path".
function spSW(seq1,seq2,p)
indel = -1
match = 2
seq1 = "^$seq1"
seq2 = "^$seq2"
col = length(seq1)
row = length(seq2)
wl = workers()
matrix,path = shared_initialize_path(seq1,seq2)
for j = 2:col
jcol = j
irow = 2
#sync begin
count = 0
while jcol > 1 && irow < row + 1
#println(j," ",irow," ",jcol)
if seq1[jcol] == seq2[irow]
equal = true
else
equal = false
end
w = wl[(count % p) + 1]
#async remotecall_wait(w,shared_get_score!,matrix,path,equal,indel,match,irow,jcol)
jcol -= 1
irow += 1
count += 1
end
end
end
for i = 3:row
jcol = col
irow = i
#sync begin
count = 0
while irow < row+1 && jcol > 1
#println(j," ",irow," ",jcol)
if seq1[jcol] == seq2[irow]
equal = true
else
equal = false
end
w = wl[(count % p) + 1]
#async remotecall_wait(w,shared_get_score!,matrix,path,equal,indel,match,irow,jcol)
jcol -= 1
irow += 1
count += 1
end
end
end
return matrix,path
end
The other helper functions are:
function shared_initialize_path(seq1,seq2)
col = length(seq1)
row = length(seq2)
matrix = convert(SharedArray,fill(0,(row,col)))
path = convert(SharedArray,fill(0,(row,col)))
return matrix,path
end
#everywhere function shared_get_score!(matrix,path,equal,indel,match,i,j)
pathvalscode = ["-","|","M"]
pathvals = [1,2,3]
scores = []
push!(scores,matrix[i,j-1]+indel)
push!(scores,matrix[i-1,j]+indel)
if equal
push!(scores,matrix[i-1,j-1]+match)
else
push!(scores,matrix[i-1,j-1]+indel)
end
val,ind = findmax(scores)
if val < 0
matrix[i,j] = 0
else
matrix[i,j] = val
end
path[i,j] = pathvals[ind]
end
Does anyone see an obvious way to make this run faster? Right now it's about 10 times slower than the serial version.

Resources