How to speed up MATLAB integration? - performance

I have the following code:
function [] = Solver( t )
%pre-declaration
foo=[1,1,1];
fooCell = num2cell(foo);
[q, val(q), star]=fooCell{:};
%functions used in prosomoiwsh
syms q val(q) star;
qd1=symfun(90*pi/180+30*pi/180*cos(q),q);
qd2=symfun(90*pi/180+30*pi/180*sin(q),q);
p1=symfun(79*pi/180*exp(-1.25*q)+pi/180,q);
p2=symfun(79*pi/180*exp(-1.25*q)+pi/180,q);
e1=symfun(val-qd1,q);
e2=symfun(val-qd2,q);
T1=symfun(log(-(1+star)/star),star);
T2=symfun(log(star/(1-star)),star);
%anonymous function handles
lambda=[0.75;10.494441313222076];
calcEVR_handles={#(t,x)[double(subs(diff(subs(T1,star,e1/p1),q)+subs(lambda(1)*T1,star,e1/p1),{diff(val,q);val;q},{x(2);x(1);t})),double(subs(diff(subs(T1,star,e1/p1),q)+subs(lambda(1)*T1,star,e1/p1),{diff(val,q);val;q},{0;x(1);t})),double(subs(double(subs(subs(diff(T1,star),star,e1/p1),{val;q},{x(1);t}))/p1,q,t))];#(t,x)[double(subs(diff(subs(T2,star,e2/p2),q)+subs(lambda(2)*T2,star,e2/p2),{diff(val,q);val;q},{x(4);x(3);t})),double(subs(diff(subs(T2,star,e2/p2),q)+subs(lambda(2)*T2,star,e2/p2),{diff(val,q);val;q},{0;x(3);t})),double(subs(double(subs(subs(diff(T2,star),star,e2/p2),{val;q},{x(3);t}))/p2,q,t))]};
options = odeset('AbsTol',1e-1,'RelTol',1e-1);
[T,x_r] = ode23(#prosomoiwsh,[0 t],[80*pi/180;0;130*pi/180;0;2.4943180186983711;11.216948999754299],options);
save newresult T x_r
function dx_th = prosomoiwsh(t,x_th)
%declarations
k=0.80773938740480955;
nf=6.2860930902603602;
hGa=0.16727117784664769;
hGb=0.010886618389781832;
dD=0.14062935253218495;
s=0.64963817519705203;
IwF={[4.5453398382686956 5.2541234145178066 -6.5853972592002235 7.695225990702979];[-4.4358339284697337 -8.1138542053372298 -8.2698210582548395 3.9739729629084071]};
IwG={[5.7098975358444752 4.2470526600975802 -0.83412489434697168 0.53829395964565041] [1.8689492167233894 -0.0015017513794517434 8.8666804106266461 -1.0775021663921467];[6.9513235639494155 -0.8133752392893685 7.4032432556804162 3.1496138243338709] [5.8037182454981568 2.0933267947187457 4.852362963697928 -0.10745559204132382]};
IbF={-1.2165533594615545;7.9215291787744917};
IbG={2.8425752327892844 2.5931576770598168;9.4789237295474873 7.9378928037841252};
p=2;
m=2;
signG=1;
n_vals=[2;2];
nFixedStates=4;
gamma_nn=[0.31559428834175318;9.2037894041383641];
th_star_guess=[2.4943180186983711;11.216948999754299];
%solution
x = x_th(1:nFixedStates);
th = x_th(nFixedStates+1:nFixedStates+p);
f = zeros(m,1);
G = zeros(m,m);
ZF = zeros(p,m);
ZG = zeros(p,m,m);
for i=1:m
[f(i), ZF(:,i)] = calculate_neural_output(x, IwF{i}, IbF{i}, th);
for j=1:m
[G(i,j), ZG(:,i,j)] = calculate_neural_output(x, IwG{i,j}, IbG{i,j}, th);
end
end
detG = det(G);
if m == 1
adjG = 1;
else
adjG = detG*G^-1;
end
E = zeros(m,1);
V = zeros(m,1);
R = zeros(m,m);
for i=1:m
EVR=calcEVR_handles{i}(t,x);
E(i)=EVR(1);
V(i)=EVR(2);
R(i,i)=EVR(3);
end
Rinv = R^-1;
prod_R_E = R*E;
ub = f + Rinv * (V + k*E) + nf*prod_R_E;
ua = - detG / (detG^2+dD) * (adjG * ub) ;
u = ua - signG * (hGa*(ua'*ua) + hGb*(ub'*ub)) * prod_R_E;
dx_th = zeros(nFixedStates+p, 1); %preallocation
%System in form (1) of the IEEE paper
[vec_sys_f, vec_sys_G] = sys_f_G(x);
dx_nm = vec_sys_f + vec_sys_G*u;
%Calculation of dx
index_start = 1;
index_end = -1;
for i=1:m
index_end = index_end + n_vals(i);
for j=index_start:index_end
dx_th(j) = x(j+1);
end
dx_th(index_end+1) = dx_nm(i);
index_start = index_end + 2;
end
%Calculation of dth
AFvalueT = zeros(p,m);
for i=1:m
AFvalueT(:,i) = 0;
for j=1:m
AFvalueT(:,i) = AFvalueT(:,i)+ZG(:,i,j)*ua(j);
end
end
dx_th(nFixedStates+1:nFixedStates+p) = diag(gamma_nn)*( (ZF+AFvalueT)*prod_R_E -s*(th-th_star_guess) );
display(t)
end
function [y, Z] = calculate_neural_output(input, Iw, Ib, state)
Z = [tanh(Iw*input+Ib);1];
y = state' * Z;
end
function [ f,g ] = sys_f_G( x )
Iz1=0.96;
Iz2=0.81;
m1=3.2;
m2=2.0;
l1=0.5;
l2=0.4;
g=9.81;
q1=x(1);
q2=x(3);
q1dot=x(2);
q2dot=x(4);
M=[Iz1+Iz2+m1*l1^2/4+m2*(l1^2+l2^2/4+l1*l2*cos(q2)),Iz2+m2*(l2^2/4+l1*l2*cos(q2)/2);Iz2+m2*(l2^2/4+l1*l2*cos(q2)/2),Iz2+m2*l2^2/4];
c=0.5*m2*l1*l2*sin(q2);
C=[-c*q2dot,-c*(q1dot+q2dot);c*q1dot,0];
G=[0.5*m1*g*l1*cos(q1)+m2*g*(l1*cos(q1)+0.5*l2*cos(q1+q2));0.5*m2*g*l2*cos(q1+q2)];
f=-M\(C*[q1dot;q2dot]+G);
g=inv(M);
end
end
Its target is to simulate the control of a 2-DOF robotic arm using a certain control law. The results I get after running the simulation are correct(I have a graph of the output I should expect), but it takes ages to finish!
Is there anything I could do to speed up the process?

In order to improve the computational speed of any integration in Matlab, a few options are available to you:
Reduce the required accuracy (which you already have done)
Use an adapted integrator. As mentioned by #sanchises, sometimes ode23 can be longer than another ode solver in Matlab (if your equation is stiff for instance). You could try to determine which solver is most adapted from the documentation... Or simply try them all!
The best solution, but by far the most time consuming, would be to use a compiled language, such as C or Fortran. If the integration is but a part of your Matlab program, you could use Mex files, and translate only the integration to a compiled language. You could also create dynamic libraries in your compiled language and load them in Matlab using loadlibrary. I use loadlibrary and an integration routine written in Fortran for the integration of orbits and trajectories, and I get over 100 times speedup with Fortran vs. Matlab! Of course, technically, the integration is not in Matlab anymore... But the library or Mex files trick allows you to only convert the integration part of your program to a different language! A number of open source integrators are available, such as ODEPACK or RKSUITE in Fortran. Then, you only need to create a wrapper and your dynamics function in the correct language.
So to put it in a nutshell, if you're going to use this integration a lot, I would advise using a compiled language. If not, you should make do with Matlab, and be patient!

Related

R estimating one independent variable more than once

I am trying to estimate a multinomial logit model for predicting systemic banking crisis with panel data. Below is my code. I have ran this code before and it has worked fine. However, I tried to change the names of the independent variables and used the new data to run the model again. But ever since then R is estimating multiple iterations of x1 variable. But when I am dropping x1 the model estimation turns out to be just fine again. I have attached a screenshots of the results. Faulty_result1, Faulty_result_2 and Result_with_x1_dropped. I can't seem to figure out what the issue is. Any help will be much appreciated.
#Remove all items from memory (if any)
rm(list=ls(all=TRUE))
#Set working directory to load files
setwd("D:/PhD/Codes")
#Load necessary libraries
library(readr)
library(nnet)
library(plm)
#Load data
my_data <- read_csv("D:/PhD/Data/xx_Final Data_4.csv",
col_types = cols(`Time Period` = col_date(format = "%d/%m/%Y"),
y = col_factor(levels = c("0", "1",
"2")), x2 = col_double(), x5 = col_double(),
x9 = col_double(), x11 = col_double(),
x13 = col_double(), x24 = col_double()),
na = "NA")
#Change levels from numeric to character
levels(my_data$y) <- c("Tranquil", "Pre-crisis", "Crisis")
str(my_data$y)
#Create Panel Data
p_data=pdata.frame(my_data)
#Export dataset
write_csv(p_data,"D:/PhD/Data/Clean_Final Data_4.csv")
#Drop unnecessary columns
p <- subset(p_data, select = c(3:27))
#Set reference level
p$y <- relevel(p$y, ref="Tranquil")
#Create Model
model <- multinom(y~ ., data = p)
summary(model)
stargazer::stargazer(model, type = "text")

OpenMDAO hierarchical solvers recording

In OpenMDAO, is there any recommendation on how to record and read solver cases if the model is composed of multiple groups/cycles, and multiple nonlinear solvers?
I have a model built of 2 cycles (cycle1 and cycle2), one of them containing two subcycles (cycle1_1 and cycle1_2). For now I am attaching a solver to each of my nonlinear solvers:
solver1 = model.cycle1.nonlinear_solver
solver1_1 = model.cycle1.cycle1_1.nonlinear_solver
solver1_2 = model.cycle1.cycle1_2.nonlinear_solver
solver2 = model.cycle2.nonlinear_solver
solver1.add_recorder(recorder)
solver1_1.add_recorder(recorder)
solver1_2.add_recorder(recorder)
solver2.add_recorder(recorder)
When trying to read the results with:
cr = om.CaseReader(results)
I am getting the following error:
RuntimeError: Can't parse solver iteration coordinate:
rank0:root._solve_nonlinear|0|NLRunOnce|0|cycle1._solve_nonlinear|0|NonlinearBlockGS|1|cycle1.cycle1_1._solve_nonlinear|1|NonlinearBlockGS|1
I am looking to get information about the convergence history and some plots on the coupling variables.
EDIT: My code has a structure similar to that in https://openmdao.org/newdocs/versions/latest/basic_user_guide/multidisciplinary_optimization/sellar.html, with the groups defined in setup:
import openmdao.api as om
class MDA(om.Group):
# class ObjCmp(om.ExplicitComponent):
# some objective component
# class ConCmp(om.ExplicitComponent):
# some constraint component
def setup(self):
cycle1 = self.add_subsystem('cycle1', om.Group(), promotes=['*'])
cycle1_1 = cycle1.add_subsystem('cycle1_1', om.Group(), promotes=['*'])
cycle1_1_comp = cycle1_1.add_subsystem('comp', om.ExecComp('x1 = 3 + x2'), promotes=["*"])
cycle1_2 = cycle1.add_subsystem('cycle1_2', om.Group(), promotes=['*'])
cycle1_2_comp = cycle1_2.add_subsystem('comp', om.ExecComp('x2 = 3 + x1 + y'), promotes=["*"])
cycle2 = self.add_subsystem('cycle2', om.Group(), promotes=['*'])
cycle2.add_subsystem('comp', om.ExecComp('y = x1 + 2'), promotes=['*'])
p = om.Problem(model=MDA())
model = p.model
p.setup()
p.run_model()
Unfortunately, as of OpenMDAO V3.16 this looks like a bug. Its been logged as a high priority issue on the OpenMDAO development backlog: Issue #2453
I can replicate it with the following script:
import openmdao.api as om
p = om.Problem()
model = p.model
cycle1 = p.model.add_subsystem('cycle1', om.Group(), promotes=['*'])
cycle1_1 = cycle1.add_subsystem('cycle1_1', om.Group(), promotes=['*'])
cycle1_1_comp = cycle1_1.add_subsystem('comp', om.ExecComp('x1 = 3 + x2'), promotes=["*"])
cycle1_2 = cycle1.add_subsystem('cycle1_2', om.Group(), promotes=['*'])
cycle1_2_comp = cycle1_2.add_subsystem('comp', om.ExecComp('x2 = 3 + x1 + y'), promotes=["*"])
cycle2 = p.model.add_subsystem('cycle2', om.Group(), promotes=['*'])
cycle2.add_subsystem('comp', om.ExecComp('y = x1 + 2'), promotes=['*'])
solver1 = model.cycle1.nonlinear_solver
solver1_1 = model.cycle1.cycle1_1.nonlinear_solver
solver1_2 = model.cycle1.cycle1_2.nonlinear_solver
solver2 = model.cycle2.nonlinear_solver
print(solver1, solver1_1, solver1_2, solver2)
recorder = om.SqliteRecorder('cases.db')
solver1.add_recorder(recorder)
# recorders on nested solvers trigger the bug
# solver1_1.add_recorder(recorder)
# solver1_2.add_recorder(recorder)
# Kind-of workaround, put the recorder on the child component/group instead
cycle1_1_comp.add_recorder(recorder)
cycle1_2_comp.add_recorder(recorder)
solver2.add_recorder(recorder)
p.setup()
p.run_model()
reader = om.CaseReader('cases.db')
print(reader.list_sources())
It seems to be the nested recorders that are triggering the bug. As a kind-of workaround you can stick the recorder on the lower level group/component instead. That will make it a bit harder to know which cases are from which solver iteration, but the naming scheme of the iteration coordinates should at least help a little there. Hopefully that gets you moving in the meantime, while the bug is fixed.

Confused about the use of validation set here

For the main.py of the px2graph project, the part of training and validation is shown as below:
splits = [s for s in ['train', 'valid'] if opt.iters[s] > 0]
start_round = opt.last_round - opt.num_rounds
# Main training loop
for round_idx in range(start_round, opt.last_round):
for split in splits:
print("Round %d: %s" % (round_idx, split))
loader.start_epoch(sess, split, train_flag, opt.iters[split] * opt.batchsize)
flag_val = split == 'train'
for step in tqdm(range(opt.iters[split]), ascii=True):
global_step = step + round_idx * opt.iters[split]
to_run = [sample_idx, summaries[split], loss, accuracy]
if split == 'train': to_run += [optim]
# Do image summaries at the end of each round
do_image_summary = step == opt.iters[split] - 1
if do_image_summary: to_run[1] = image_summaries[split]
# Start with lower learning rate to prevent early divergence
t = 1/(1+np.exp(-(global_step-5000)/1000))
lr_start = opt.learning_rate / 15
lr_end = opt.learning_rate
tmp_lr = (1-t) * lr_start + t * lr_end
# Run computation graph
result = sess.run(to_run, feed_dict={train_flag:flag_val, lr:tmp_lr})
out_loss = result[2]
out_accuracy = result[3]
if sum(out_loss) > 1e5:
print("Loss diverging...exiting before code freezes due to NaN values.")
print("If this continues you may need to try a lower learning rate, a")
print("different optimizer, or a larger batch size.")
return
time_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
print("{}: step {}, loss {:g}, acc {:g}".format(time_str, global_step, out_loss, out_accuracy))
# Log data
if split == 'valid' or (split == 'train' and step % 20 == 0) or do_image_summary:
writer.add_summary(result[1], global_step)
writer.flush()
# Save training snapshot
saver.save(sess, 'exp/' + opt.exp_id + '/snapshot')
with open('exp/' + opt.exp_id + '/last_round', 'w') as f:
f.write('%d\n' % round_idx)
It seems that the author only get the result of each batch of the validation set. I am wondering, if I want to observe whether the model is improving or reaching the best performance, should I use the result on the whole validation set?
If the validation set is small enough, we could calculate the loss, accuracy on the whole validation set during training to observe the performance. However, if the validation set is too large, it is better to calculate batch-wise validation results and for multiple steps.

Dealing with under flow while calculating GMM parameters using EM

I am currently runnuing training in matlab on a matrix of logspecrum samples I am constantly dealing with underflow problems.I understood that I need to work with log's in order to deal with underflowing.
I am still strugling with uderflow though , when i calculate the mean (mue) bucause it is negetive i cant work with logs so i need the real values that underflow.
These are equasions i am working with:
In MATLAB code i calulate log_tau in oreder avoid underflow but when calulating mue i need exp(log(tau)) which goes to zero.
I am attaching relevent MATLAB code
**in the code i called the variable alpha is tau ...
for i = 1 : 50
log_c = Logsum(log_alpha,1) - log(N);
c = exp(log_c);
mue = DataMat*alpha./(repmat(exp(Logsum(log_alpha,1)),FrameSize,1));
log_abs_mue = log(abs(mue));
log_SigmaSqr = log((DataMat.^2)*alpha) - repmat(Logsum(log_alpha,1),FrameSize,1) - 2*log_abs_mue;
SigmaSqr = exp(log_SigmaSqr);
for j=1:N
rep_DataMat(:,:,j) = repmat(DataMat(:,j),1,M);
log_gamma(j,:) = log_c - 0.5*(FrameSize*log(2*pi)+sum(log_SigmaSqr)) + sum((rep_DataMat(:,:,j) - mue).^2./(2*SigmaSqr));
end
log_alpha = log_gamma - repmat(Logsum(log_gamma,2),1,M);
alpha = exp(log_alpha);
end
c = exp(log_c);
SigmaSqr = exp(log_SigmaSqr);
does any one see how i can avoid this? or what needs to be fixed in code?
What i did was add this line to the MATLAB code:
mue(isnan(mue))=0; %fix 0/0 problem
and this one:
SigmaSqr(SigmaSqr==0)=1;%fix if mue_k = x_k
not sure if this is the best solution but is seems to work...
any have a better idea?

Multiple linear optimizations

I'm interested in solving a few hundred linear systems in MATLAB. At the moment this is done by a for-loop with linprog
The vectors used have identical dimensions and are lines of one matrix.
for combination_id = 1:1000
[tempOperatingPointsVectors,tempTargetValue, exitflag] = ...
linprog( lo_c(combination_id,:), ...
[], [], ...
lo_G(:,:,combination_id), lo_d(:,combination_id), ...
lo_u(:,combination_id), lo_v(:,combination_id), ...
x0_in, options);
end
Is there a way of using linprog with the whole vectors instead of picking each line?
I also tried a parfor loop but since the operations in each loop are very small there is no speed improvement.
Why can't you set up one big linear program and then solve all of it at once?
Since I don't have your data, I cannot test the following code, but the basic idea should work.
xVar = 1:size(lo_c,2);
uBound = lo_u(:, 1);
vBound = lo_v(:, 1);
dMat = lo_d(:, 1);
gMat = lo_G(:,:, 1);
objMat = lo_c(1,:);
x0_inMat = x0_in;
for combination_id = 2:1000
xVar = [xVar, xVar(end)+1:xVar(end)+size(lo_c,2)];
uBound = [uBound; lo_u(:, combination_id);
vBound = [vBound; lo_v(:, combination_id);
dMat = [dMat; lo_d(:, combination_id);
gMat = [gMat; lo_G(:,:, combination_id)];
objMat = [objMat; lo_c(combination_id,:)];
x0_inMat = [xo_inMat; x0_in];
end
[tempOperatingPointsVectors,tempTargetValue, exitflag] = ...
linprog( objMat, ...
[], [], ...
gMat, dMat, ...
uBound, vBound, ...
x0_in, options);
Should do the trick.

Resources