I set a 3 layers neural network,it has 2 hidden layers,But when I try to implement gradient check ,I got my max_weigh=1 ,which means I have some error in my backprop.here are my backprop function,I really need some help
is there something wrong with my codes?
thanks!
def loss(self,X,y,reg = 0.0):
#forward prop
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
W3, b3 = self.params['W3'], self.params['b3']
N,D = X.shape
H1out = np.maximum(0,X.dot(W1)+b1) #H1out (N,H1)
H2out = np.maximum(0,H1out.dot(W2)+b2) #H2out (N,H2)
scores = None
scores = H2out.dot(W3)+b3
scores_shift = scores-np.max(scores,axis = 1).reshape(-1,1)
softmaxout = np.exp(scores_shift)/np.sum(np.exp(scores_shift),axis=1).reshape(-1,1)
loss_main = None
loss = None
loss_main = -np.sum(np.log(softmaxout[range(N),list(y)]))
loss = loss_main/N + reg*np.sum(W1*W1)*np.sum(
W2*W2)+np.sum(W3*W3)
#backward prop
dscores = softmaxout.copy() #dscores (N,C)
dscores[range(N),list(y)] -= 1
dscores /= N
dW3 = H2out.T.dot(dscores)
db3 = np.sum(dscores,axis = 0)
dh2 = dscores.dot(W3.T) #dh2 (N,H2)
dh_Relu2 = (H2out > 0) * dh2 #dh_ReLu2 (N,H2)
dW2 = H1out.T.dot(dh_Relu2)
db2 = np.sum(dh_Relu2,axis = 0)
dh1 = dh_Relu2.dot(W2.T) #dh1 (N,H1)
dh_Relu1 = (H1out>0) * dh1
dW1 = X.T.dot(dh_Relu1)
db1 = np.sum(dh_Relu1,axis = 0)
grad = {}
grad['W1'] = dW1
grad['b1'] = db1
grad['W2'] = dW2
grad['b2'] = db2
grad['W3'] = dW3
grad['b3'] = db3
return loss,grad
Related
enter image description here
Hi I came into this error but I already defined lambda. I really don't know where it went wrong. Does anybody know how to solve this?
Here is my code
binomial.model.JAGS = function(){
y ~ dbin(p,n)
p<-lambda*mu+rho*(1-mu)
lambda ~ dunif(min = 0.2,max = 1.4)
mu ~ dunif(min = 0,max = 1)
rho ~ dunif(min = 0.1,max = 1.7)
}
n = 100000
y = 30000
data.JAGS = list(y = y,n = n)
inits.JAGS = list(list(lambda = 0.8,mu = 0.5, rho = 0.9))
para.JAGS = c('p', 'lambda', 'mu', 'rho')
fit.JAGS = jags(data = data.JAGS, inits = inits.JAGS,
parameters.to.save = para.JAGS,
n.chains = 1,
n.iter = 9000,
n.burnin = 1000,
model.file = binomial.model.JAGS())
I am making a numerical problem to show as an example and am trying to find an optimal control using gekko for the following problem:
minimize the integral of a*x(t) from 0 to T, where T is the first time x(t) is 0, i.e., it is a random time. The constraints are such that x(t) follows some dynamic f(x(t),u(t)), x(t) >= 0, and u(t) is between 0 and 1.
I followed the tutorials on GEKKO website and youtube for fixed final time, but I could not find any information on a random final time. The following is the current code I have, but how would I be able to move from a fixed final time to a random final time? Any help would be appreciated! Thanks!
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from gekko import GEKKO
# Initial conditions
xhh0 = 3; xhi0 = 0;
xvh0 = 30; xvi0 = 0;
hin0 = 0; vin0 = 0;
tt0 = 0
# Parameters
a1 = 0.1; a2 = 0.1;
b1 = 0.01; b2 = 0.5;
delta1 = 0.1; delta2 = 0.5;
rho1 = 0.3; rho2 = 0.01
mu = 1
# Gekko
m = GEKKO()
# Control variable
u = m.MV(0.5, lb = 0, ub = 1)
# Final time <------------------------ currently a fixed final time
T = 10
# Initialize
xhh, xhi, xvh, xvi, Ah, Av = m.Array(m.Var, 6)
xhh.value = xhh0; xhi.value = xhi0;
xvh.value = xvh0; xvi.value = xvi0;
Ah.value = hin0; Av.value = vin0;
# System dynamics
m.Equations([xhh.dt() == -a1*xhh - mu*u - b1*xhi*xhh,\
xhi.dt() == a1*xhh + b1*xhi*xhh - delta1*xhi - rho1*xhi,\
xvh.dt() == -a2*xvh - mu*(1-u) - b2*xvi*xvh,\
xvi.dt() == a2*xvh + b2*xvi*xvh - delta2*xvi - rho2*xvi,\
Ah.dt() == a1*xhh,\
Av.dt() == a2*xvh])
# Time space
t = np.linspace(0, T, 101)
m.time = t
# initialize with simulation
m.options.IMODE = 7
m.options.NODES = 3
m.solve(disp = False)
# optimization
m.options.IMODE = 6
xhh.LOWER = 0; xhi.LOWER = 0; xvh.LOWER = 0; xvi.LOWER = 0
u.STATUS = 1
m.options.SOLVER = 3
xhh.value = xhh.value.value
xhi.value = xhi.value.value
xvh.value = xvh.value.value
xvi.value = xvi.value.value
Ah.value = Ah.value.value
Av.value = Av.value.value
# Objective function
m.Minimize(Ah + Av)
m.solve()
The final time is adjustable with T = m.FV() and T.STATUS=1 when each differential is divided by T. This scales the problem to any arbitrary final time when t = np.linspace(0,1).
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint
from gekko import GEKKO
# Initial conditions
xhh0 = 3; xhi0 = 0;
xvh0 = 30; xvi0 = 0;
hin0 = 0; vin0 = 0;
tt0 = 0
# Parameters
a1 = 0.1; a2 = 0.1;
b1 = 0.01; b2 = 0.5;
delta1 = 0.1; delta2 = 0.5;
rho1 = 0.3; rho2 = 0.01; mu = 1
# Gekko
m = GEKKO()
# Control variable
u = m.MV(0.5, lb = 0, ub = 1)
# Final time
T = m.FV(10,lb=1e-2,ub=100); T.STATUS = 1
# Initialize
xhh, xhi, xvh, xvi, Ah, Av = m.Array(m.Var, 6)
xhh.value = xhh0; xhi.value = xhi0;
xvh.value = xvh0; xvi.value = xvi0;
Ah.value = hin0; Av.value = vin0;
xhh.LOWER = 0; xhi.LOWER = 0; xvh.LOWER = 0; xvi.LOWER = 0
u.STATUS = 1
# System dynamics
m.Equations([xhh.dt()/T == -a1*xhh - mu*u - b1*xhi*xhh,\
xhi.dt()/T == a1*xhh + b1*xhi*xhh - delta1*xhi - rho1*xhi,\
xvh.dt()/T == -a2*xvh - mu*(1-u) - b2*xvi*xvh,\
xvi.dt()/T == a2*xvh + b2*xvi*xvh - delta2*xvi - rho2*xvi,\
Ah.dt()/T == a1*xhh,\
Av.dt()/T == a2*xvh])
# Time space
t = np.linspace(0, 1, 101)
m.time = t
# optimization
m.options.IMODE = 6
m.options.SOLVER = 3
# Objective function
m.Minimize(Ah + Av)
m.solve()
print('Final time: ', T.value[0])
There may be a missing constraint or some other information because the optimal final time always goes to the lower bound. The Jennings problem is a related example with variable final time.
I want to be able to use a text file of requirements to be prioritized.
I want to male swarm_size,min_values and maximum_values inputs from text file.
SSA Function
def salp_swarm_algorithm(swarm_size = 5, min_values = [-5,-5], max_values = [5,5], iterations = 50):
count = 0
position = initial_position(swarm_size = swarm_size, min_values = min_values, max_values = max_values)
food = food_position(dimension = len(min_values))
while (count <= iterations):
print("Iteration = ", count, " Requirement = ", food.iloc[food['Fitness'].idxmin(),-1])
c1 = 2*math.exp(-(4*(count/iterations))**2)
food = update_food(position, food)
position = update_position(position, food, c1 = c1, min_values = min_values, max_values = max_values)
count = count + 1
print(food.iloc[food['Fitness'].idxmin(),:].copy(deep = True))
return food.iloc[food['Fitness'].idxmin(),:].copy(deep = True)
I'm trying to develop the adaptive unsharp algorithm described by Polesel et al. in the article "Image Enhancement via Adaptive Unsharp Masking" (link to the article). The core of the algorithm is the minimization of a cost function defined as:
J(m,n) = E[e(m,n)^2] = E[(gd(m,n)-gy(m,n))^2]
where E[] is the statistical expectation and gy(m,n) is:
gy(m,n) = gx(m,n) + lambda1(m,n)*gzx(m,n) + lambda2(m,n)*gzy(m,n);
I want to find lambda1 and lambda2 for each pixel in order to minimize the cost function in each pixel.
Here the code that I wrote so far:
function [ o_sharpened_image ] = AdaptativeUnsharpMask( i_image , t1, t2)
%ADAPTATIVEUNSHARPMASK Summary of this function goes here
% Detailed explanation goes here
if isa(i_image,'dip_image')
i_image = dip_array(i_image);
end
if ~isfloat(i_image)
i_image = im2double(i_image);
end
adh = 4;
adl = 3;
g = [-1 -1 -1; -1 8 -1; -1 -1 -1];
dim = size(i_image);
lambda_x = 0.5*ones(dim);
lambda_y = 0.5*ones(dim);
z_x = conv2(i_image,[-1 2 -1],'same');
z_y = conv2(i_image,[-1; 2; -1],'same');
g_x = conv2(i_image,g,'same');
g_zx = conv2(z_x,g,'same');
g_zy = conv2(z_y,g,'same');
a = ones(dim);
variance_map = colfilt(i_image,[3 3],'sliding',#var);
a(variance_map >= t1 & variance_map < t2) = adh;
a(variance_map >= t2) = adl;
g_d = a.*g_x;
lambda = [lambda_x lambda_y];
lambda0 = lambda;
lambda_min = lsqnonlin(#(lambda) UnsharpCostFunction(lambda,g_d,g_zx,g_zy),lambda0);
o_sharpened_image = i_image + lambda_min(:,1:size(i_image,2)).*z_x + lambda_min(:,size(i_image,2)+1:end).*z_y;
end
Here the code of the cost function:
function [ J ] = UnsharpCostFunction( i_lambda, i_gd, i_gzx, i_gzy )
%UNSHARPCOSTFUNCTION Summary of this function goes herek
gy = i_gd + i_lambda(:,1:size(i_gd,2)).*i_gzx + i_lambda(:,size(i_gd,2)+1:end).*i_gzy;
J = mean((i_gd(:) - gy(:)).^2);
end
For each iteration I print on the command window the value of the J function and it is always the same. What am I doing wrong?
Thank you.
I have a problem that I can't seem to solve. I want a query to determine whether a given value lies within a predefined range, but my loop is very slow for big datasets. Is there a more efficient way?
clear all
close all
Regression(1,1) = 1.001415645694801;
Regression(1,2) = 0.043822386790753;
FF_Value(:,1) = [24.24 30.77 31.37 29.05 29.20 29.53 29.67 27.78];
FF_Value(:,2) = [24.16 30.54 31.15 29.53 29.39 29.34 29.53 28.17];
FF_Distance = FF_Value(:,2)-(Regression(1,2)+Regression(1,1)*FF_Value(:,1));
FF_Distance_Positiv = sort(FF_Distance(FF_Distance > 0));
FF_Distance_Positiv(FF_Distance_Positiv == 0) = [];
FF_Distance_Negativ = sort(FF_Distance(FF_Distance < 0),'descend');
FF_Distance_Negativ(FF_Distance_Negativ == 0) = [];
A = repmat(FF_Distance_Positiv,length(FF_Distance_Negativ),1);
B = repmat(FF_Distance_Negativ',length(FF_Distance_Positiv),1);
C = reshape(B,[length(FF_Distance_Positiv)*length(FF_Distance_Negativ),1]);
Recognition(:,1) = A;
Recognition(:,2) = C;
FF_Recognition = zeros(length(FF_Value),1);
for i = 1:length(Recognition)
for j = 1:length(FF_Value)
if (Regression(1,2)+Recognition(i,1))+Regression(1,1)*FF_Value(j,1) >= FF_Value(j,2) &&...
(Regression(1,2)+Recognition(i,2))+Regression(1,1)*FF_Value(j,1) <= FF_Value(j,2)
FF_Recognition(j,1) = 1;
end
end
end
Welcome to the world of bsxfun's replacing your world of repmats -
%------------ Original code -----------------------------------------
FF_Distance = FF_Value(:,2)-(Regression(1,2)+Regression(1,1)*FF_Value(:,1));
FF_Distance_Positiv = sort(FF_Distance(FF_Distance > 0));
FF_Distance_Positiv(FF_Distance_Positiv == 0) = [];
%// Note for Performance: If number of elements satisfying `FF_Distance_Positiv == 0`
%// is a lot, consider doing this instead -
%// `FF_Distance_Positiv = FF_Distance_Positiv(FF_Distance_Positiv~=0)`.
%// Follow this strategy for `FF_Distance_Negativ` too.
FF_Distance_Negativ = sort(FF_Distance(FF_Distance < 0),'descend');
FF_Distance_Negativ(FF_Distance_Negativ == 0) = [];
%------- Added vectorization replacing `repmats` and nested loops ------------
mult = Regression(1,1)*FF_Value(:,1);
y1 = bsxfun(#plus,Regression(1,2),FF_Distance_Positiv);
y2 = bsxfun(#plus,y1.',mult); %//'
mc1 = bsxfun(#ge,y2,FF_Value(:,2));
z1 = bsxfun(#plus,Regression(1,2),FF_Distance_Negativ);
z2 = bsxfun(#plus,z1.',mult); %//'
mc2 = bsxfun(#le,z2,FF_Value(:,2));
FF_Recognition = all([any(mc1,2) any(mc2,2)],2);