How to add a constraint in CVaR optimization code in Matlab? - xcode

I want to find the optimal weights in an multi-asset portfolio by minimizing the VaR.
This is the code that gives a minimum risk for a target return.
p = PortfolioCVaR('ProbabilityLevel', .99, 'AssetNames', names);
p = p.setScenarios(R); % R= asset returns
p = p.setDefaultConstraints();
wts = p.estimateFrontier(20);
portRisk = p.estimatePortRisk(wts);
portRet = p.estimatePortReturn(wts);
clf
visualizeFrontier(p, portRisk, portRet);
%% Compute portfolio with given level of return
tic;
wt = p.estimateFrontierByReturn(.05/100);
toc;
pRisk = p.estimatePortRisk(wt);
pRet = p.estimatePortReturn(wt);
The sum of weights = 1 .. My question is how to add a constraint such that no asset can have a weight of greater than 60%.
Thank you for any help you could provide

Use the object's setBounds property,
>> p = setBounds(p,LowerBoundsVector,UpperBoundsVector);
See
>> doc setBounds
for more info.

Related

Tune a learner with the searchspace parameter setting

I am trying to tune a ranger learner with the searchspace parameter setting. The purpose is to find the optimal K (the number of input indicators, I uesd a filterpipe with setting importance.filter.nfeat) and D (the depth of each tree, i.e., classif.ranger.max.depth) by grid search. D's value should not be greater than the number of input indicators K. The values searched for D are then set proportionally to the input K: D ∈ {10%, 25%, 50%, 100%} ∗ K. Values of D ≤ 0 were rejected.
However, I am unfamiliar with writing fuction code within searchspace, thus the can not achieve the purpose (D is greater than K).
My question is:
How to set a parameter that is based on the other one in the searchspace? (I think it is different with the depends metioned in mlr3 book)
Here is my code:
ranger = lrn("classif.ranger", importance = "impurity", predict_type = "prob", id = "ranger")
graph = po("filter", flt("importance"), filter.nfeat = 3) %>>% ranger %>>% po("threshold")
plot(graph)
graph_learner = GraphLearner$new(graph)
searchspace = ps(
importance.filter.nfeat = p_int(1,length(task$feature_names)),
classif.ranger.max.depth = p_int(1,length(task$feature_names)),
.extra_trafo = function(x, param_set) {x = graph_learner$param_set$importance.filter.nfeat * c(.1,.25,.50,1)})
inst1 = TuningInstanceMultiCrit$new(
task,
learner = graph_learner,
resampling = rsmp("cv"),
measures = msrs(c("classif.ce","classif.bacc","classif.mcc")),
terminator = trm("evals", n_evals = 50),
search_space = searchspace
)
tuner = tnr("grid_search")
# reduce logging output
lgr:: get_logger("bbotk") $set_threshold("warn")
# The tuning procedure may take some time:
set.seed(1234)
tuner$optimize(inst1)
#Returns list with optimal configurations and estimated performance.
inst1$result
# We can plot the performance against the number of features.
#If we do so, we see the possible trade-off between sparsity and predictive performance:
arx = as.data.table(inst$archive)
ggplot(arx, aes(x = importance.filter.nfeat, y = classif.ce)) + geom_line()
How to know what indicators are uesd in the tuned model, for we only see the trade-off between sparsity and predictive performance, are they based on the importance rank?
I also have tried the feature selection. In FS, I could get the optimal feature set. So what are the relationships betweet the tuning nfeat and feature selection? Which one is perfer in real partice?
# https://mlr3gallery.mlr-org.com/posts/2020-09-14-mlr3fselect-basic/
resampling = rsmp("cv")
measure = msr("classif.mcc")
terminator = trm("none")
ranger_lrn = lrn("classif.ranger", importance = "impurity", predict_type = "prob")
#
instance = FSelectInstanceSingleCrit$new(
task = task,
learner = ranger_lrn,
resampling = resampling,
measure = measure,
terminator = terminator,
store_models = TRUE)
#
fselector = fs("rfe", recursive = FALSE)
set.seed(1234)
fselector$optimize(instance)
#
as.data.table(instance$archive)
instance$result
instance$result_feature_set
instance$result_y
# set new feature_set
# task$select(instance$result_feature_set)
Does this answer question 1?
How to set specific values in `paradox`?
Seems that you could simply set up your own data table as shown there, except remove rows where D>K, then use the design_points tuner.

How to implement Roulette Wheel Selection and Rank Sleection on Matlab code for the Traveling Salesman Problom?

I have an assignment coding a genetic algorithm for the traveling salesman problem. I've written some code giving correct results using Tournament Selection.
The problem is, I have to do Wheel and Rank and the results I get are incorrect.
Here is my code using Tournament Selection:
clc;
clear all;
close all;
nofCities = 30;
initialPopulationSize = nofCities*nofCities;
generations = nofCities*ceil(nofCities/10);
cities = floor(rand([nofCities 2])*100+1);
figure;
hold on;
scatter(cities(:,1), cities(:,2), 5, 'b','fill');
line(cities(:,1), cities(:,2));
line(cities([1 end],1), cities([1 end],2));
axis([0 110 0 110]);
population = zeros(initialPopulationSize ,nofCities);
for i=1:initialPopulationSize
population(i,:) = randperm(nofCities);
end
distanceMatrix = zeros(nofCities);
for i=1:nofCities
for j=1:nofCities
if (i==j)
distanceMatrix(i,j)=0;
else
distanceMatrix(i,j) = sqrt((cities(i,1)-cities(j,1))^2+(cities(i,2)-cities(j,2))^2);
end
end
end
for u=1:generations
tourDistance = zeros(initialPopulationSize ,1);
for i=1:initialPopulationSize
for j=1:length(cities)-1
tourDistance(i) = tourDistance(i) + distanceMatrix(population(i,j),population(i,j+1));
end
end
for i=1:initialPopulationSize
tourDistance(i) = tourDistance(i) + distanceMatrix(population(i,end),population(i,1));
end
min(tourDistance)
newPopulation = zeros(initialPopulationSize,nofCities);
for k=1:initialPopulationSize
child = zeros(1,nofCities);
%tournament start
for i=1:5
tournamentParent1(i) = ceil(rand()*initialPopulationSize);
end
p1 = find(tourDistance == min(tourDistance([tournamentParent1])));
parent1 = population(p1(1), :);
for i=1:5
tournamentParent2(i) = ceil(rand()*initialPopulationSize);
end
p2 = find(tourDistance == min(tourDistance([tournamentParent2])));
parent2 = population(p2(1), :);
%tournament end
%crossover
startPos = ceil(rand()*(nofCities/2));
endPos = ceil(rand()*(nofCities/2)+10);
for i=1:nofCities
if (i>startPos && i<endPos)
child(i) = parent1(i);
end
end
for i=1:nofCities
if (isempty(find(child==parent2(i))))
for j=1:nofCities
if (child(j) == 0)
child(j) = parent2(i);
break;
end
end
end
end
newPopulation(k,:) = child;
end
%mutation
mutationRate = 0.015;
for i=1:initialPopulationSize
if (rand() < mutationRate)
pos1 = ceil(rand()*nofCities);
pos2 = ceil(rand()*nofCities);
mutation1 = newPopulation(i,pos1);
mutation2 = newPopulation(i,pos2);
newPopulation(i,pos1) = mutation2;
newPopulation(i,pos2) = mutation1;
end
end
population = newPopulation;
u
end
figure;
hold on;
scatter(cities(:,1), cities(:,2), 5, 'b','fill');
line(cities(population(i,:),1), cities(population(i,:),2));
line(cities([population(i,1) population(i,end)],1), cities([population(i,1) population(i,end)],2));
axis([0 110 0 110]);
%close all;
What I want is to replace the tournament code with wheel and rank code.
Here is what I wrote for the Wheel Selection:
fitness = tourDistance./sum(tourDistance);
wheel = cumsum(fitness);
parent1 = population(find(wheel >= rand(),1),:);
parent2 = population(find(wheel >= rand(),1),:);
Here is a vectorized implementation of a roulette wheel selection in Matlab:
[~,W] = min(ones(popSize,1)*rand(1,2*popSize) > ((cumsum(fitness)*ones(1,2*popSize)/sum(fitness))),[],1);
This assumes that the fitness input into the selection scheme is a matrix of size (popSize x 1) (or a column vector of the same size as the number of population members).
And popSize is obviously the amount of members in your population. And W is the winners or the population members that are selected to become parents/crossover.
The output of the selection will be selected_parents which is a double row vector of size 2*popSize which has all of the indices of the members of the population that will be used in the crossover stage.
This row vector can then be input into a vectorized crossover scheme that could look something like this:
%% Single-Point Preservation Crossover
Pop2 = Pop(W(1:2:end),:); % Pop2 Winners 1
P2A = Pop(W(2:2:end),:); % Pop2 Winners 2
Lidx = sub2ind(size(Pop),[1:popSize]',round(rand(popSize,1)*(genome-1)+1));
vLidx = P2A(Lidx)*ones(1,genome);
[r,c]=find(Pop2==vLidx);
[~,Ord]=sort(r);
r = r(Ord); c = c(Ord);
Lidx2 = sub2ind(size(Pop),r,c);
Pop2(Lidx2) = Pop2(Lidx);
Pop2(Lidx) = P2A(Lidx);
this crossover assumes an input of the W variable from the selection scheme. It also uses Pop which is the population members stored in a popSize by genome matrix. (genome is the number of cities in one tour and also happens to be the size of the genome). The genome is stored as an array of integers with each integer being a city and the tour being defined as the order from the value of the genome array from the array's first index to the array's last index.
while we are at it we may as well include a nice vectorized mutation scheme for a permuation genetic algorithm (which this is).
%% Mutation (Permutation)
idx = rand(popSize,1)<mutRate;
Loc1 = sub2ind(size(Pop2),1:popSize,round(rand(1,popSize)*(genome-1)+1));
Loc2 = sub2ind(size(Pop2),1:popSize,round(rand(1,popSize)*(genome-1)+1));
Loc2(idx == 0) = Loc1(idx == 0);
[Pop2(Loc1), Pop2(Loc2)] = deal(Pop2(Loc2), Pop2(Loc1));
This mutation randomly flips the order of 2 cities in our tour (genome).
Finally make sure to update your population after all of that work we did!
%% Update Population!
Pop = Pop2; % updates the population to include crossovers and mutation.
So i know this reply is probably way too late for your assignment, but hopefully it will help someone else with a similar problem.
I REALLY REALLY recommend anyone interested in vectorized genetic algorithms in Matlab to read this paper: UCL: Efficiently Vectorized Code for Population Based Optimization Algorithms
It is what i based all of the code off of in the examples and it will teach you why you are writing the code that way. Its a great resource and what got me started with GAs.
For wheel selection to work, you should start with designing a fitness measure with fitter individuals having a bigger value. In contrast to the distance where better individuals having a smaller value. Then your approach with the cumsum should work.
Where is the issue with ranking selection?

matlab code nested loop performance improvement

I would be very interested to receive suggestions on how to improve performance of the following nested for loop:
I = (U > q); % matrix of indicator variables, I(i,j) is 1 if U(i,j) > q
for i = 2:K
for j = 1:(i-1)
mTau(i,j) = sum(I(:,i) .* I(:,j));
mTau(j,i) = mTau(i,j);
end
end
The code evaluates if for pairs of variables both variables are below a certain threshold, thereby filling a matrix. I appreciate your help!
You can use matrix multiplication:
I = double(U>q);
mTau = I.'*I;
This will have none-zero values on diagonal so you can set them to zero by
mTau = mTau - diag(diag(mTau));
One approach with bsxfun -
out = squeeze(sum(bsxfun(#and,I,permute(I,[1 3 2])),1));
out(1:size(out,1)+1:end)=0;

Am I using a wrong numerical method?

This is the code:
f = dsolve('D3y+12*Dy+y = 0 ,y(2) = 1 ,Dy(2) = 1, D2y(2) = -1');
feval(symengine, 'numeric::solve',strcat(char(f),'=1'),'t=-4..16','AllRealRoots')
If I remove 'AllRealRoots' option it works fast and finds a solution, but when I enable the option Matlab does not finish for an hour. Am I using a wrong numerical method?
First, straight from the documentation for numeric::solve:
If eqs is a non-polynomial/non-rational equation or a set or list containing such an equation, then the equations and the appropriate optional arguments are passed to the numerical solver numeric::fsolve.
So, as your equation f is non-polynomial, you should probably call numeric::fsolve directly. However, even with the 'MultiSolutions' it fails to return more than one root over your range (A bug perhaps? – I'm using R2013b). A workaround is to call numeric::realroots to get bounds on each of the district real roots in your range and then solve for them separately:
f = dsolve('D3y+12*Dy+y = 0 ,y(2) = 1 ,Dy(2) = 1, D2y(2) = -1');
r = feval(symengine, 'numeric::realroots', f==1, 't = -4 .. 16');
num_roots = numel(r);
T = zeros(num_roots,1); % Wrap in sym or vpa for higher precision output
syms t;
for i = 1:num_roots
bnds = r(i);
ri = feval(symengine, '_range', bnds(1), bnds(2));
s = feval(symengine, 'numeric::fsolve', f==1, t==ri);
T(i) = feval(symengine, 'rhs', s(1));
end
The resultant solution vector, T, is double-precision (allocate it as sym or vpa you want higher precision):
T =
-0.663159371123072
0.034848320470578
0.999047064621451
2.000000000000000
2.695929753727520
3.933983894260340
4.405822476913172
5.868112290810963
6.108685019679461
You may be able to remove the for loop if you can figure out how to cleanly pass in the output of 'numeric::realroots' to 'numeric::fsolve' in one go (it's doable, but may require converting stuf to strings unless you're clever).
Another (possibly even faster) approach is to switch to using the numeric (floating-point) function fzero for the second half after you bound all of the roots:
f = dsolve('D3y+12*Dy+y = 0 ,y(2) = 1 ,Dy(2) = 1, D2y(2) = -1');
r = feval(symengine, 'numeric::realroots', f==1, 't = -4 .. 16');
num_roots = numel(r);
T = zeros(num_roots,1);
g = matlabFunction(f-1); % Create anonymous function from f
for i = 1:num_roots
bnds = double(r(i));
T(i) = fzero(g,bnds);
end
I checked and, for your problem here and using the default tolerances, the resultant T is within a few times machine epsilon (eps) of the numeric::fsolve' solution.

Sorting rows of two matrices using same ordering [duplicate]

Suppose I have a matrix A and I sort the rows of this matrix. How do I replicate the same ordering on a matrix B (same size of course)?
E.g.
A = rand(3,4);
[val ind] = sort(A,2);
B = rand(3,4);
%// Reorder the elements of B according to the reordering of A
This is the best I've come up with
m = size(A,1);
B = B(bsxfun(#plus,(ind-1)*m,(1:m)'));
Out of curiosity, any alternatives?
Update: Jonas' excellent solution profiled on 2008a (XP):
n = n
0.048524 1.4632 1.4791 1.195 1.0662 1.108 1.0082 0.96335 0.93155 0.90532 0.88976
n = 2m
0.63202 1.3029 1.1112 1.0501 0.94703 0.92847 0.90411 0.8849 0.8667 0.92098 0.85569
It just goes to show that loops aren't anathema to MATLAB programmers anymore thanks to JITA (perhaps).
A somewhat clearer way to do this is to use a loop
A = rand(3,4);
B = rand(3,4);
[sortedA,ind] = sort(A,2);
for r = 1:size(A,1)
B(r,:) = B(r,ind(r,:));
end
Interestingly, the loop version is faster for small (<12 rows) and large (>~700 rows) square arrays (r2010a, OS X). The more columns there are relative to rows, the better the loop performs.
Here's the code I quickly hacked up for testing:
siz = 10:100:1010;
tt = zeros(100,2,length(siz));
for s = siz
for k = 1:100
A = rand(s,1*s);
B = rand(s,1*s);
[sortedA,ind] = sort(A,2);
tic;
for r = 1:size(A,1)
B(r,:) = B(r,ind(r,:));
end,tt(k,1,s==siz) = toc;
tic;
m = size(A,1);
B = B(bsxfun(#plus,(ind-1)*m,(1:m).'));
tt(k,2,s==siz) = toc;
end
end
m = squeeze(mean(tt,1));
m(1,:)./m(2,:)
For square arrays
ans =
0.7149 2.1508 1.2203 1.4684 1.2339 1.1855 1.0212 1.0201 0.8770 0.8584 0.8405
For twice as many columns as there are rows (same number of rows)
ans =
0.8431 1.2874 1.3550 1.1311 0.9979 0.9921 0.8263 0.7697 0.6856 0.7004 0.7314
Sort() returns the index along the dimension you sorted on. You can explicitly construct indexes for the other dimensions that cause the rows to remain stable, and then use linear indexing to rearrange the whole array.
A = rand(3,4);
B = A; %// Start with same values so we can programmatically check result
[A2 ix2] = sort(A,2);
%// ix2 is the index along dimension 2, and we want dimension 1 to remain unchanged
ix1 = repmat([1:size(A,1)]', [1 size(A,2)]); %//'
%// Convert to linear index equivalent of the reordering of the sort() call
ix = sub2ind(size(A), ix1, ix2)
%// And apply it
B2 = B(ix)
ok = isequal(A2, B2) %// confirm reordering
Can't you just do this?
[val ind]=sort(A);
B=B(ind);
It worked for me, unless I'm understanding your problem wrong.

Resources