have a question on curve fitting / optimizing. I have three coupled ODEs that descibe a biochemical reaction with a disappearing substrate and two products being formed. I've found examples that have helped me create code to solve the ODEs (below). Now I want to optimize the unknown rate constants (k, k3 and k4) to fit to the experimental data, P, which is a signal from product y[1]. What would be the easiest way of doing this?
Thanks.
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
# Experimental data
P = [29.976,193.96,362.64,454.78,498.42,517.14,515.76,496.38,472.14,432.81,386.95,
352.93,318.93,279.47,260.19,230.92,202.67,180.3,159.09,137.31,120.47,104.51,99.371,
89.606,75.431,67.137,58.561,55.721]
# Three coupled ODEs
def conc (y, t) :
a1 = k * y[0]
a2 = k2 * y[0]
a3 = k3 * y[1]
a4 = k4 * y[1]
a5 = k5 * y[2]
f1 = -a1 -a2
f2 = a1 -a3 -a4
f3 = a4 -a5
f = np.array([f1, f2, f3])
return f
# Initial conditions for y[0], y[1] and y[2]
y0 = np.array([50000, 0.0, 0.0])
# Times at which the solution is to be computed.
t = np.linspace(0.5, 54.5, 28)
# Experimentally determined parameters.
k2 = 0.071
k5 = 0.029
# Parameters which would have to be fitted
k = 0.002
k3 = 0.1
k4 = 0.018
# Solve the equation
y = odeint(conc, y0, t)
# Plot data and the solution.
plt.plot(t, P, "bo")
#plt.plot(t, y[:,0]) # Substrate
plt.plot(t, y[:,1]) # Product 1
plt.plot(t, y[:,2]) # Product 2
plt.xlabel('t')
plt.ylabel('y')
plt.show()
Edit: I made some changes to the code in order to show how to fit to the experimental data of all ODEs.
Like this:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
from scipy.odr import Model, Data, ODR
# Experimental data
P = [29.976,193.96,362.64,454.78,498.42,517.14,515.76,496.38,472.14,432.81,386.95,
352.93,318.93,279.47,260.19,230.92,202.67,180.3,159.09,137.31,120.47,104.51,99.371,
89.606,75.431,67.137,58.561,55.721]
# Times at which the solution is to be computed.
t = np.linspace(0.5, 54.5, 28)
def coupledODE(beta, x):
k, k3, k4 = beta
# Three coupled ODEs
def conc (y, t) :
a1 = k * y[0]
a2 = k2 * y[0]
a3 = k3 * y[1]
a4 = k4 * y[1]
a5 = k5 * y[2]
f1 = -a1 -a2
f2 = a1 -a3 -a4
f3 = a4 -a5
f = np.array([f1, f2, f3])
return f
# Initial conditions for y[0], y[1] and y[2]
y0 = np.array([50000, 0.0, 0.0])
# Experimentally determined parameters.
k2 = 0.071
k5 = 0.029
# Parameters which would have to be fitted
#k = 0.002
#k3 = 0.1
#k4 = 0.018
# Solve the equation
y = odeint(conc, y0, x)
return y[:,1]
# in case you are only fitting to experimental findings of ODE #1
# return y.ravel()
# in case you have experimental findings of all three ODEs
data = Data(t, P)
# with P being experimental findings of ODE #1
# data = Data(np.repeat(t, 3), P.ravel())
# with P being a (3,N) array of experimental findings of all ODEs
model = Model(coupledODE)
guess = [0.1,0.1,0.1]
odr = ODR(data, model, guess)
odr.set_job(2)
out = odr.run()
print out.beta
print out.sd_beta
f = plt.figure()
p = f.add_subplot(111)
p.plot(t, P, 'ro')
p.plot(t, coupledODE(out.beta, t))
plt.show()
In case you were using peak-o-mat (http://lorentz.sf.net) which is an interactive curve fitting program based on scipy, you could add your ODE model and save it to userfunc.py (see the customisation section in the docs):
import numpy as np
from scipy.integrate import odeint
from peak_o_mat import peaksupport as ps
def coupODE(x, k, k3, k4):
# Three coupled ODEs
def conc (y, t) :
a1 = k * y[0]
a2 = k2 * y[0]
a3 = k3 * y[1]
a4 = k4 * y[1]
a5 = k5 * y[2]
f1 = -a1 -a2
f2 = a1 -a3 -a4
f3 = a4 -a5
f = np.array([f1, f2, f3])
return f
# Initial conditions for y[0], y[1] and y[2]
y0 = np.array([50000, 0.0, 0.0])
# Times at which the solution is to be computed.
#t = np.linspace(0.5, 54.5, 28)
# Experimentally determined parameters.
k2 = 0.071
k5 = 0.029
# Parameters which would have to be fitted
#k = 0.002
#k3 = 0.1
#k4 = 0.018
# Solve the equation
y = odeint(conc, y0, x)
print y
return y[:,1]
ps.add('ODE',
func='coupODE(x,k,k3,k4)',
info='thre coupled ODEs',
ptype='MISC')
You would need to prepare your data as a text file with two columns for time and experimental data. Import the data into peak-o-mat, enter 'ODE' as fit model, choose appropriate initial parameters for k,k3,k4 and hit 'Fit'.
Related
I am trying to solve an optimal control problem that involves minimizing an integral objective with fixed states but free terminal time. It is a relatively simple problem that can be solved analytically. Gekko's solution doesn't match the analytical. If I relax the lower bound of terminal time, then I am getting something close to the analytical solution. Am I doing anything wrong in the Gekko code?
I had earlier posted a similar question here.
The analytical solution is given as follows. (lambda is the Lagrange multiplier)
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
# constants
k1 = 0.5
k2 = 0.1
k3 = 0.5
g = 0.5
# create GEKKO model
m = GEKKO()
# time points
n = 501
# tm = np.array([0,1e-5,1e-4,1e-2])
# tm = np.hstack((tm,np.linspace(1e-1, 1, n)))
tm = np.linspace(0, 1, n)
m.time = tm
# Variables
x1 = m.Var(value=1,lb=0,ub=1) # x1
u = m.MV(value=0.1,fixed_initial=False,lb=0,ub=1)
u.STATUS = 1
u.DCOST = 1e-5
J = m.Var(value=0.0) # objective function differential form intial value
p = np.zeros(len(tm))
p[-1] = 1.0
final = m.Param(value=p)
# FV
tf = m.FV(value=0.1, lb=3, ub=5.0)
tf.STATUS = 1
# equations
m.Equation(x1.dt()/tf == -u -g*x1)
m.Equation(J.dt()/tf==k1*k3*(u-k2)/(u+k3))
# Final conditions
soft = True
if soft:
# soft terminal constraint
m.Minimize(final*1e5*(x1-0)**2)
m.Minimize(final*1e5*(u-0)**2)
# m.Minimize(final*1e5*(x2-2)**2)
else:
# hard terminal constraint
x1f = m.Param()
m.free(x1f)
m.fix_final(x1f, 0)
uf = m.Param()
m.free(uf)
m.fix_final(uf, 0)
# connect endpoint parameters to x1 and x2
m.Equations([x1f == x1])
m.Equations([uf == u])
# Objective Function
# obj = m.Intermediate(m.integral((u-k2)/(u+k3)))
obj = m.Intermediate(J)
m.Maximize(obj*final)
m.options.IMODE = 6
m.options.NODES = 3
m.options.SOLVER = 3
m.options.MAX_ITER = 50000
# m.options.MV_TYPE = 0
# m.options.DIAGLEVEL = 0
m.solve(disp=True)
plt.close('all')
tm = tm * tf.value[0]
# Create a figure
plt.figure(figsize=(10, 4))
plt.subplot(2, 2, 1)
# plt.plot([0,1],[1/9,1/9],'k2:',label=r'$x<\frac{1}{9}$')
plt.plot(tm, x1.value, 'k-', lw=2, label=r'$x1$')
plt.ylabel('x1')
plt.legend(loc='best')
plt.subplot(2, 2, 2)
plt.plot(tm, u.value, 'k2--', lw=2, label=r'$u$')
plt.ylabel('control')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 3)
plt.plot(tm, J.value, 'g-', lw=2)
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Objective')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 4)
U = np.array(u.value)
G =k1*k3*(U-k2)/(U+k3)
plt.plot(tm, G, 'g-', lw=2)
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Gopt')
plt.legend(loc='best')
plt.xlabel('Time')
plt.show()
Is a constraint or some other information missing? When the lower bound of tf is set to be non-restrictive at 0.1, it finds the same objective function as when the lower bound is set to 3.0.
tf = m.FV(value=0.1, lb=2.0, ub=5.0)
Both produce an objective of 0.1404.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
# constants
k1 = 0.5
k2 = 0.1
k3 = 0.5
g = 0.5
# create GEKKO model
m = GEKKO()
# time points
n = 501
# tm = np.array([0,1e-5,1e-4,1e-2])
# tm = np.hstack((tm,np.linspace(1e-1, 1, n)))
tm = np.linspace(0, 1, n)
m.time = tm
# Variables
x1 = m.Var(value=1,lb=0,ub=1) # x1
u = m.MV(value=0.1,fixed_initial=False,lb=0,ub=1)
u.STATUS = 1
u.DCOST = 1e-5
J = m.Var(value=0.0) # objective function differential form intial value
p = np.zeros(len(tm))
p[-1] = 1.0
final = m.Param(value=p)
# FV
tf = m.FV(value=0.1, lb=2.0, ub=5.0)
tf.STATUS = 1
# equations
m.Equation(x1.dt()/tf == -u -g*x1)
m.Equation(J.dt()/tf==k1*k3*(u-k2)/(u+k3))
# Final conditions
soft = True
if soft:
# soft terminal constraint
m.Minimize(final*1e5*(x1-0)**2)
m.Minimize(final*1e5*(u-0)**2)
# m.Minimize(final*1e5*(x2-2)**2)
else:
# hard terminal constraint
x1f = m.Param()
m.free(x1f)
m.fix_final(x1f, 0)
uf = m.Param()
m.free(uf)
m.fix_final(uf, 0)
# connect endpoint parameters to x1 and x2
m.Equations([x1f == x1])
m.Equations([uf == u])
# Objective Function
# obj = m.Intermediate(m.integral((u-k2)/(u+k3)))
obj = m.Intermediate(J)
m.Maximize(obj*final)
m.options.IMODE = 6
m.options.NODES = 3
m.options.SOLVER = 3
m.options.MAX_ITER = 50000
# m.options.MV_TYPE = 0
# m.options.DIAGLEVEL = 0
m.solve(disp=True)
plt.close('all')
tm = tm * tf.value[0]
# Create a figure
plt.figure(figsize=(10, 4))
plt.subplot(2, 2, 1)
# plt.plot([0,1],[1/9,1/9],'k2:',label=r'$x<\frac{1}{9}$')
plt.plot(tm, x1.value, 'k-', lw=2, label=r'$x1$')
plt.ylabel('x1')
plt.legend(loc='best')
plt.subplot(2, 2, 2)
plt.plot(tm, u.value, 'k2--', lw=2, label=r'$u$')
plt.ylabel('control')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 3)
plt.plot(tm, J.value, 'g-', lw=2, label='J')
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Objective')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 4)
U = np.array(u.value)
G =k1*k3*(U-k2)/(U+k3)
plt.plot(tm, G, 'g-', lw=2, label='G')
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Gopt')
plt.legend(loc='best')
plt.xlabel('Time')
plt.show()
I am trying to solve an optimal control problem that involves minimizing an integral objective with fixed states but free terminal time. It is a relatively simple problem that can be solved analytically. Gekko's solution doesn't match the analytical.
I am not sure what I am doing wrong. I followed several Gekko examples to solve this one. Any help is much appreciated.
Another problem I am having is how to let Gekko automatically calculate initial values of control. Optimal control always starts with the specified initial guess of control.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
# create GEKKO model
m = GEKKO()
# time points
n = 501
tm = np.linspace(0, 1, n)
m.time = tm
# Variables
x1 = m.Var(value=1) # x1
x2 = m.Var(value=2) # x2
# u = m.Var(value=-1) # control variable used as normal var
u = m.MV(value=-1) # manipulative variable
u.STATUS = 1
u.DCOST = 1e-5
p = np.zeros(n)
p[-1] = 1.0
final = m.Param(value=p)
# FV
tf = m.FV(value=10.0, lb=0.0, ub=100.0)
tf.STATUS = 1
# equations
m.Equation(x1.dt()/tf == x2)
m.Equation(x2.dt()/tf == u)
# Final conditions
soft = True
if soft:
# soft terminal constraint
m.Minimize(final*1e5*(x1-3)**2)
# m.Minimize(final*1e5*(x2-2)**2)
else:
# hard terminal constraint
x1f = m.Param()
m.free(x1f)
m.fix_final(x1f, 3)
# connect endpoint parameters to x1 and x2
m.Equations([x1f == x1])
# Objective Function
obj = m.Intermediate(tf*final*m.integral(0.5*u**2))
m.Minimize(final*obj)
m.options.IMODE = 6
m.options.NODES = 2
m.options.SOLVER = 3
m.options.MAX_ITER = 500
# m.options.MV_TYPE = 0
m.options.DIAGLEVEL = 0
m.solve(disp=False)
# Create a figure
plt.figure(figsize=(10, 4))
plt.subplot(2, 2, 1)
# plt.plot([0,1],[1/9,1/9],'r:',label=r'$x<\frac{1}{9}$')
plt.plot(tm, x1.value, 'k-', lw=2, label=r'$x1$')
plt.ylabel('x1')
plt.legend(loc='best')
plt.subplot(2, 2, 2)
plt.plot(tm, x2.value, 'b--', lw=2, label=r'$x2$')
plt.ylabel('x2')
plt.legend(loc='best')
plt.subplot(2, 2, 3)
plt.plot(tm, u.value, 'r--', lw=2, label=r'$u$')
plt.ylabel('control')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 4)
plt.plot(tm, obj.value, 'g-', lw=2, label=r'$\frac{1}{2} \int u^2$')
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Objective')
plt.legend(loc='best')
plt.xlabel('Time')
plt.show()
Here are a few modifications:
# u = m.MV(value=-1)
u = m.MV(value=-1,fixed_initial=False)
#obj = m.Intermediate(tf*final*m.integral(0.5*u**2))
obj = m.Intermediate(m.integral(0.5*u**2))
m.options.NODES = 3 # increase accuracy
If you add a constraint that tf<=3 then it gives the same solution as above.
However, if you relax the tf constraint to <=100 then there is a better solution.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
# create GEKKO model
m = GEKKO()
# time points
n = 501
tm = np.linspace(0, 1, n)
m.time = tm
# Variables
x1 = m.Var(value=1) # x1
x2 = m.Var(value=2) # x2
u = m.MV(value=-1,fixed_initial=False) # manipulated variable
u.STATUS = 1
u.DCOST = 1e-5
p = np.zeros(n)
p[-1] = 1.0
final = m.Param(value=p)
# FV
tf = m.FV(value=10.0, lb=0.0, ub=100.0)
tf.STATUS = 1
# equations
m.Equation(x1.dt()/tf == x2)
m.Equation(x2.dt()/tf == u)
# Final conditions
soft = True
if soft:
# soft terminal constraint
m.Minimize(final*1e5*(x1-3)**2)
# m.Minimize(final*1e5*(x2-2)**2)
else:
# hard terminal constraint
x1f = m.Param()
m.free(x1f)
m.fix_final(x1f, 3)
# connect endpoint parameters to x1 and x2
m.Equations([x1f == x1])
# Objective Function
obj = m.Intermediate(m.integral(0.5*u**2))
m.Minimize(final*obj)
m.options.IMODE = 6
m.options.NODES = 3
m.options.SOLVER = 3
m.options.MAX_ITER = 500
# m.options.MV_TYPE = 0
m.options.DIAGLEVEL = 0
m.solve(disp=True)
# Create a figure
tm = tm*tf.value[0]
plt.figure(figsize=(10, 4))
plt.subplot(2, 2, 1)
# plt.plot([0,1],[1/9,1/9],'r:',label=r'$x<\frac{1}{9}$')
plt.plot(tm, x1.value, 'k-', lw=2, label=r'$x1$')
plt.ylabel('x1')
plt.legend(loc='best')
plt.subplot(2, 2, 2)
plt.plot(tm, x2.value, 'b--', lw=2, label=r'$x2$')
plt.ylabel('x2')
plt.legend(loc='best')
plt.subplot(2, 2, 3)
plt.plot(tm, u.value, 'r--', lw=2, label=r'$u$')
plt.ylabel('control')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 4)
plt.plot(tm, obj.value, 'g-', lw=2, label=r'$\frac{1}{2} \int u^2$')
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Objective')
plt.legend(loc='best')
plt.xlabel('Time')
plt.show()
I am learning how to use GEKKO for kinetic parameter estimation based on laboratory batch reactor data, which essentially consists of the concentration profiles of three species A, C, and P. For the purposes of my question, I am using a model that I previously featured in a question related to parameter estimation from a single data set.
My ultimate goal is to be able to use multiple experimental runs for parameter estimation, leveraging data that may be collected at different temperatures, species concentrations, etc. Due to the independent nature of individual batch reactor experiments, each data set features samples collected at different time points. These different time points (and in the future, different temperatures for instance) are difficult for me to implement into a GEKKO model, as I previosly used the experimental data collection time points as the m.time parameter for the GEKKO model. (See end of post for code) I have solved problems like this in the past with gPROMS and Athena Visual Studio.
To illustrate my problem, I generated an artificial data set of 'experimental' data from my original model by introducing noise to the species concentration profiles, and shifting the experimental time points slightly. I then combined all data sets of the same experimental species into new arrays featuring multiple columns. My thought process here was that GEKKO would carry out the parameter estimation by using the experimental data of each corresponding column of the arrays, so that times_comb[:,0] would be related to A_comb[:,0] while times_comb[:,1] would be related to A_comb[:,1].
When I attempt to run the GEKKO model, the system does obtain a solution for the parameter estimation, but it is unclear to me if the problem solution is reasonable, as I notice that the GEKKO Variables A, B, C, and P are 34 element vectors, which is double the elements in each of the experimental data sets. I presume GEKKO is somehow combining both columns of the time and Parameter vectors during model setup that leads to those 34 element variables? I am also concerned that during this combination of the columns of each input parameter, that the relationship between a certain time point and the collected species information is lost.
How could I improve the use of multiple data sets that GEKKO can simultaneously use for parameter estimation, with the consideration that the time points of each data set may be different? I looked on the GEKKO documentation examples as well as the APMonitor website, but I could not find examples featuring multiple data sets that I could use for guidance, as I am fairly new to the GEKKO package.
Thank you for your time reading my question and for any help/ideas you may have.
Code below:
import numpy as np
import matplotlib.pyplot as plt
from gekko import GEKKO
#Experimental data
times = np.array([0.0, 0.071875, 0.143750, 0.215625, 0.287500, 0.359375, 0.431250,
0.503125, 0.575000, 0.646875, 0.718750, 0.790625, 0.862500,
0.934375, 1.006250, 1.078125, 1.150000])
A_obs = np.array([1.0, 0.552208, 0.300598, 0.196879, 0.101175, 0.065684, 0.045096,
0.028880, 0.018433, 0.011509, 0.006215, 0.004278, 0.002698,
0.001944, 0.001116, 0.000732, 0.000426])
C_obs = np.array([0.0, 0.187768, 0.262406, 0.350412, 0.325110, 0.367181, 0.348264,
0.325085, 0.355673, 0.361805, 0.363117, 0.327266, 0.330211,
0.385798, 0.358132, 0.380497, 0.383051])
P_obs = np.array([0.0, 0.117684, 0.175074, 0.236679, 0.234442, 0.270303, 0.272637,
0.274075, 0.278981, 0.297151, 0.297797, 0.298722, 0.326645,
0.303198, 0.277822, 0.284194, 0.301471])
#Generate second set of 'experimental data'
times_new = times + np.random.uniform(0.0,0.01)
P_obs_noisy = P_obs+np.random.normal(0,0.05,P_obs.shape)
A_obs_noisy = A_obs+np.random.normal(0,0.05,A_obs.shape)
C_obs_noisy = A_obs+np.random.normal(0,0.05,C_obs.shape)
#Combine two data sets into multi-column arrays
times_comb = np.array([times, times_new]).T
P_comb = np.array([P_obs, P_obs_noisy]).T
A_comb = np.array([A_obs, A_obs_noisy]).T
C_comb = np.array([C_obs, C_obs_noisy]).T
m = GEKKO(remote=False)
t = m.time = times_comb #using two column time array
Am = m.Param(value=A_comb) #Using the two column data as observed parameter
Cm = m.Param(value=C_comb)
Pm = m.Param(value=P_comb)
A = m.Var(1, lb = 0)
B = m.Var(0, lb = 0)
C = m.Var(0, lb = 0)
P = m.Var(0, lb = 0)
k = m.Array(m.FV,6,value=1,lb=0)
for ki in k:
ki.STATUS = 1
k1,k2,k3,k4,k5,k6 = k
r1 = m.Var(0, lb = 0)
r2 = m.Var(0, lb = 0)
r3 = m.Var(0, lb = 0)
r4 = m.Var(0, lb = 0)
r5 = m.Var(0, lb = 0)
r6 = m.Var(0, lb = 0)
m.Equation(r1 == k1 * A)
m.Equation(r2 == k2 * A * B)
m.Equation(r3 == k3 * C * B)
m.Equation(r4 == k4 * A)
m.Equation(r5 == k5 * A)
m.Equation(r6 == k6 * A * B)
#mass balance diff eqs, function calls rxn function
m.Equation(A.dt() == - r1 - r2 - r4 - r5 - r6)
m.Equation(B.dt() == r1 - r2 - r3 - r6)
m.Equation(C.dt() == r2 - r3 + r4)
m.Equation(P.dt() == r3 + r5 + r6)
m.Minimize((A-Am)**2)
m.Minimize((P-Pm)**2)
m.Minimize((C-Cm)**2)
m.options.IMODE = 5
m.options.SOLVER = 3 #IPOPT optimizer
m.options.NODES = 6
m.solve()
k_opt = []
for ki in k:
k_opt.append(ki.value[0])
print(k_opt)
plt.plot(t,A)
plt.plot(t,C)
plt.plot(t,P)
plt.plot(t,B)
plt.plot(times,A_obs,'bo')
plt.plot(times,C_obs,'gx')
plt.plot(times,P_obs,'rs')
plt.plot(times_new, A_obs_noisy,'b*')
plt.plot(times_new, C_obs_noisy,'g*')
plt.plot(times_new, P_obs_noisy,'r*')
plt.show()
To have multiple data sets with different times and data points, you can join the data sets as a pandas dataframe. Here is a simple example:
# data set 1
t_data1 = [0.0, 0.1, 0.2, 0.4, 0.8, 1.00]
x_data1 = [2.0, 1.6, 1.2, 0.7, 0.3, 0.15]
# data set 2
t_data2 = [0.0, 0.15, 0.25, 0.45, 0.85, 0.95]
x_data2 = [3.6, 2.25, 1.75, 1.00, 0.35, 0.20]
The merged data has NaN where the data is missing:
x1 x2
Time
0.00 2.0 3.60
0.10 1.6 NaN
0.15 NaN 2.25
0.20 1.2 NaN
0.25 NaN 1.75
Take note of where the data is missing with a 1=measured and 0=not measured.
# indicate which points are measured
z1 = (data['x1']==data['x1']).astype(int) # 0 if NaN
z2 = (data['x2']==data['x2']).astype(int) # 1 if number
The final step is to set up Gekko variables, equations, and objective to accommodate the data sets.
xm = m.Array(m.Param,2)
zm = m.Array(m.Param,2)
for i in range(2):
m.Equation(x[i].dt()== -k * x[i]) # differential equations
m.Minimize(zm[i]*(x[i]-xm[i])**2) # objectives
You can also calculate the initial condition with m.free_initial(x[i]). This gives an optimal solution for one parameter value (k) over the 2 data sets. This approach can be expanded to multiple variables or multiple data sets with different times.
from gekko import GEKKO
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# data set 1
t_data1 = [0.0, 0.1, 0.2, 0.4, 0.8, 1.00]
x_data1 = [2.0, 1.6, 1.2, 0.7, 0.3, 0.15]
# data set 2
t_data2 = [0.0, 0.15, 0.25, 0.45, 0.85, 0.95]
x_data2 = [3.6, 2.25, 1.75, 1.00, 0.35, 0.20]
# combine with dataframe join
data1 = pd.DataFrame({'Time':t_data1,'x1':x_data1})
data2 = pd.DataFrame({'Time':t_data2,'x2':x_data2})
data1.set_index('Time', inplace=True)
data2.set_index('Time', inplace=True)
data = data1.join(data2,how='outer')
print(data.head())
# indicate which points are measured
z1 = (data['x1']==data['x1']).astype(int) # 0 if NaN
z2 = (data['x2']==data['x2']).astype(int) # 1 if number
# replace NaN with any number (0)
data.fillna(0,inplace=True)
m = GEKKO(remote=False)
# measurements
xm = m.Array(m.Param,2)
xm[0].value = data['x1'].values
xm[1].value = data['x2'].values
# index for objective (0=not measured, 1=measured)
zm = m.Array(m.Param,2)
zm[0].value=z1
zm[1].value=z2
m.time = data.index
x = m.Array(m.Var,2) # fit to measurement
x[0].value=x_data1[0]; x[1].value=x_data2[0]
k = m.FV(); k.STATUS = 1 # adjustable parameter
for i in range(2):
m.free_initial(x[i]) # calculate initial condition
m.Equation(x[i].dt()== -k * x[i]) # differential equations
m.Minimize(zm[i]*(x[i]-xm[i])**2) # objectives
m.options.IMODE = 5 # dynamic estimation
m.options.NODES = 2 # collocation nodes
m.solve(disp=True) # solve
k = k.value[0]
print('k = '+str(k))
# plot solution
plt.plot(m.time,x[0].value,'b.--',label='Predicted 1')
plt.plot(m.time,x[1].value,'r.--',label='Predicted 2')
plt.plot(t_data1,x_data1,'bx',label='Measured 1')
plt.plot(t_data2,x_data2,'rx',label='Measured 2')
plt.legend(); plt.xlabel('Time'); plt.ylabel('Value')
plt.xlabel('Time');
plt.show()
Including my updated code (not fully cleaned up to minimize number of variables) incorporating the selected answer to my question for reference. The model does a regression of 3 measured species in two separate 'datasets.'
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from gekko import GEKKO
#Experimental data
times = np.array([0.0, 0.071875, 0.143750, 0.215625, 0.287500, 0.359375, 0.431250,
0.503125, 0.575000, 0.646875, 0.718750, 0.790625, 0.862500,
0.934375, 1.006250, 1.078125, 1.150000])
A_obs = np.array([1.0, 0.552208, 0.300598, 0.196879, 0.101175, 0.065684, 0.045096,
0.028880, 0.018433, 0.011509, 0.006215, 0.004278, 0.002698,
0.001944, 0.001116, 0.000732, 0.000426])
C_obs = np.array([0.0, 0.187768, 0.262406, 0.350412, 0.325110, 0.367181, 0.348264,
0.325085, 0.355673, 0.361805, 0.363117, 0.327266, 0.330211,
0.385798, 0.358132, 0.380497, 0.383051])
P_obs = np.array([0.0, 0.117684, 0.175074, 0.236679, 0.234442, 0.270303, 0.272637,
0.274075, 0.278981, 0.297151, 0.297797, 0.298722, 0.326645,
0.303198, 0.277822, 0.284194, 0.301471])
#Generate second set of 'experimental data'
times_new = times + np.random.uniform(0.0,0.01)
P_obs_noisy = (P_obs+ np.random.normal(0,0.05,P_obs.shape))
A_obs_noisy = (A_obs+np.random.normal(0,0.05,A_obs.shape))
C_obs_noisy = (C_obs+np.random.normal(0,0.05,C_obs.shape))
#Combine two data sets into multi-column arrays using pandas DataFrames
#Set dataframe index to be combined time discretization of both data sets
exp1 = pd.DataFrame({'Time':times,'A':A_obs,'C':C_obs,'P':P_obs})
exp2 = pd.DataFrame({'Time':times_new,'A':A_obs_noisy,'C':C_obs_noisy,'P':P_obs_noisy})
exp1.set_index('Time',inplace=True)
exp2.set_index('Time',inplace=True)
exps = exp1.join(exp2, how ='outer',lsuffix = '_1',rsuffix = '_2')
#print(exps.head())
#Combine both data sets into a single data frame
meas_data = pd.DataFrame().reindex_like(exps)
#define measurement locations for each data set, with NaN written for time points
#not common in both data sets
for cols in exps:
meas_data[cols] = (exps[cols]==exps[cols]).astype(int)
exps.fillna(0,inplace = True) #replace NaN with 0
m = GEKKO(remote=False)
t = m.time = exps.index #set GEKKO time domain to use experimental time points
#Generate two-column GEKKO arrays to store observed values of each species, A, C and P
Am = m.Array(m.Param,2)
Cm = m.Array(m.Param,2)
Pm = m.Array(m.Param,2)
Am[0].value = exps['A_1'].values
Am[1].value = exps['A_2'].values
Cm[0].value = exps['C_1'].values
Cm[1].value = exps['C_2'].values
Pm[0].value = exps['P_1'].values
Pm[1].value = exps['P_2'].values
#Define GEKKO variables that determine if time point contatins data to be used in regression
#If time point contains species data, meas_ variable = 1, else = 0
meas_A = m.Array(m.Param,2)
meas_C = m.Array(m.Param,2)
meas_P = m.Array(m.Param,2)
meas_A[0].value = meas_data['A_1'].values
meas_A[1].value = meas_data['A_2'].values
meas_C[0].value = meas_data['C_1'].values
meas_C[1].value = meas_data['C_2'].values
meas_P[0].value = meas_data['P_1'].values
meas_P[1].value = meas_data['P_2'].values
#Define Variables for differential equations A, B, C, P, with initial conditions set by experimental observation at first time point
A = m.Array(m.Var,2, lb = 0)
B = m.Array(m.Var,2, lb = 0)
C = m.Array(m.Var,2, lb = 0)
P = m.Array(m.Var,2, lb = 0)
A[0].value = exps['A_1'][0] ; A[1].value = exps['A_2'][0]
B[0].value = 0 ; B[1].value = 0
C[0].value = exps['C_1'][0] ; C[1].value = exps['C_2'][0]
P[0].value = exps['P_1'][0] ; P[1].value = exps['P_2'][0]
#Define kinetic coefficients, k1-k6 as regression FV's
k = m.Array(m.FV,6,value=1,lb=0,ub = 20)
for ki in k:
ki.STATUS = 1
k1,k2,k3,k4,k5,k6 = k
#If doing paramrter estimation, enable free_initial condition, else not include them in model to reduce DOFs (for simulation, for example)
if k1.STATUS == 1:
for i in range(2):
m.free_initial(A[i])
m.free_initial(B[i])
m.free_initial(C[i])
m.free_initial(P[i])
#Define reaction rate variables
r1 = m.Array(m.Var,2, value = 1, lb = 0)
r2 = m.Array(m.Var,2, value = 1, lb = 0)
r3 = m.Array(m.Var,2, value = 1, lb = 0)
r4 = m.Array(m.Var,2, value = 1, lb = 0)
r5 = m.Array(m.Var,2, value = 1, lb = 0)
r6 = m.Array(m.Var,2, value = 1, lb = 0)
#Model Equations
for i in range(2):
#Rate equations
m.Equation(r1[i] == k1 * A[i])
m.Equation(r2[i] == k2 * A[i] * B[i])
m.Equation(r3[i] == k3 * C[i] * B[i])
m.Equation(r4[i] == k4 * A[i])
m.Equation(r5[i] == k5 * A[i])
m.Equation(r6[i] == k6 * A[i] * B[i])
#Differential species balances
m.Equation(A[i].dt() == - r1[i] - r2[i] - r4[i] - r5[i] - r6[i])
m.Equation(B[i].dt() == r1[i] - r2[i] - r3[i] - r6[i])
m.Equation(C[i].dt() == r2[i] - r3[i] + r4[i])
m.Equation(P[i].dt() == r3[i] + r5[i] + r6[i])
#Minimization objective functions
m.Obj(meas_A[i]*(A[i]-Am[i])**2)
m.Obj(meas_P[i]*(P[i]-Pm[i])**2)
m.Obj(meas_C[i]*(C[i]-Cm[i])**2)
#Solver options
m.options.IMODE = 5
m.options.SOLVER = 3 #APOPT optimizer
m.options.NODES = 6
m.solve()
k_opt = []
for ki in k:
k_opt.append(ki.value[0])
print(k_opt)
plt.plot(t,A[0],'b-')
plt.plot(t,A[1],'b--')
plt.plot(t,C[0],'g-')
plt.plot(t,C[1],'g--')
plt.plot(t,P[0],'r-')
plt.plot(t,P[1],'r--')
plt.plot(times,A_obs,'bo')
plt.plot(times,C_obs,'gx')
plt.plot(times,P_obs,'rs')
plt.plot(times_new, A_obs_noisy,'b*')
plt.plot(times_new, C_obs_noisy,'g*')
plt.plot(times_new, P_obs_noisy,'r*')
plt.show()
I was wondering how to adapt the following code from github batchnorm_five_layers to read in two classes (cats&dogs) from local image paths with image size 780x780 and RBG. Here is the uncommented code from the link:
# encoding: UTF-8
import tensorflow as tf
import tensorflowvisu
import math
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
tf.set_random_seed(0)
# Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
mnist = read_data_sets("data", one_hot=True, reshape=False, validation_size=0)
# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])
# variable learning rate
lr = tf.placeholder(tf.float32)
# train/test selector for batch normalisation
tst = tf.placeholder(tf.bool)
# training iteration
iter = tf.placeholder(tf.int32)
# five layers and their number of neurons (tha last layer has 10 softmax neurons)
L = 200
M = 100
N = 60
P = 30
Q = 10
# Weights initialised with small random values between -0.2 and +0.2
# When using RELUs, make sure biases are initialised with small *positive* values for example 0.1 = tf.ones([K])/10
W1 = tf.Variable(tf.truncated_normal([784, L], stddev=0.1)) # 784 = 28 * 28
B1 = tf.Variable(tf.ones([L])/10)
W2 = tf.Variable(tf.truncated_normal([L, M], stddev=0.1))
B2 = tf.Variable(tf.ones([M])/10)
W3 = tf.Variable(tf.truncated_normal([M, N], stddev=0.1))
B3 = tf.Variable(tf.ones([N])/10)
W4 = tf.Variable(tf.truncated_normal([N, P], stddev=0.1))
B4 = tf.Variable(tf.ones([P])/10)
W5 = tf.Variable(tf.truncated_normal([P, Q], stddev=0.1))
B5 = tf.Variable(tf.ones([Q])/10)
def batchnorm(Ylogits, is_test, iteration, offset, convolutional=False):
exp_moving_avg = tf.train.ExponentialMovingAverage(0.999, iteration) # adding the iteration prevents from averaging across non-existing iterations
bnepsilon = 1e-5
if convolutional:
mean, variance = tf.nn.moments(Ylogits, [0, 1, 2])
else:
mean, variance = tf.nn.moments(Ylogits, [0])
update_moving_everages = exp_moving_avg.apply([mean, variance])
m = tf.cond(is_test, lambda: exp_moving_avg.average(mean), lambda: mean)
v = tf.cond(is_test, lambda: exp_moving_avg.average(variance), lambda: variance)
Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon)
return Ybn, update_moving_everages
def no_batchnorm(Ylogits, is_test, iteration, offset, convolutional=False):
return Ylogits, tf.no_op()
# The model
XX = tf.reshape(X, [-1, 784])
# batch norm scaling is not useful with relus
# batch norm offsets are used instead of biases
Y1l = tf.matmul(XX, W1)
Y1bn, update_ema1 = batchnorm(Y1l, tst, iter, B1)
Y1 = tf.nn.relu(Y1bn)
Y2l = tf.matmul(Y1, W2)
Y2bn, update_ema2 = batchnorm(Y2l, tst, iter, B2)
Y2 = tf.nn.relu(Y2bn)
Y3l = tf.matmul(Y2, W3)
Y3bn, update_ema3 = batchnorm(Y3l, tst, iter, B3)
Y3 = tf.nn.relu(Y3bn)
Y4l = tf.matmul(Y3, W4)
Y4bn, update_ema4 = batchnorm(Y4l, tst, iter, B4)
Y4 = tf.nn.relu(Y4bn)
Ylogits = tf.matmul(Y4, W5) + B5
Y = tf.nn.softmax(Ylogits)
update_ema = tf.group(update_ema1, update_ema2, update_ema3, update_ema4)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy)*100
# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# matplotlib visualisation
allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), tf.reshape(W3, [-1])], 0)
allbiases = tf.concat([tf.reshape(B1, [-1]), tf.reshape(B2, [-1]), tf.reshape(B3, [-1])], 0)
# to use for sigmoid
#allactivations = tf.concat([tf.reshape(Y1, [-1]), tf.reshape(Y2, [-1]), tf.reshape(Y3, [-1]), tf.reshape(Y4, [-1])], 0)
# to use for RELU
allactivations = tf.concat([tf.reduce_max(Y1, [0]), tf.reduce_max(Y2, [0]), tf.reduce_max(Y3, [0]), tf.reduce_max(Y4, [0])], 0)
alllogits = tf.concat([tf.reshape(Y1l, [-1]), tf.reshape(Y2l, [-1]), tf.reshape(Y3l, [-1]), tf.reshape(Y4l, [-1])], 0)
I = tensorflowvisu.tf_format_mnist_images(X, Y, Y_)
It = tensorflowvisu.tf_format_mnist_images(X, Y, Y_, 1000, lines=25)
datavis = tensorflowvisu.MnistDataVis(title4="Logits", title5="Max activations across batch", histogram4colornum=2, histogram5colornum=2)
# training step, the learning rate is a placeholder
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# You can call this function in a loop to train the model, 100 images at a time
def training_step(i, update_test_data, update_train_data):
# training on batches of 100 images with 100 labels
batch_X, batch_Y = mnist.train.next_batch(100)
max_learning_rate = 0.03
min_learning_rate = 0.0001
decay_speed = 1000.0
learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)
# compute training values for visualisation
if update_train_data:
a, c, im, al, ac = sess.run([accuracy, cross_entropy, I, alllogits, allactivations], {X: batch_X, Y_: batch_Y, tst: False})
print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c) + " (lr:" + str(learning_rate) + ")")
datavis.append_training_curves_data(i, a, c)
datavis.update_image1(im)
datavis.append_data_histograms(i, al, ac)
# compute test values for visualisation
if update_test_data:
a, c, im = sess.run([accuracy, cross_entropy, It], {X: mnist.test.images, Y_: mnist.test.labels, tst: True})
print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c))
datavis.append_test_curves_data(i, a, c)
datavis.update_image2(im)
# the backpropagation training step
sess.run(train_step, {X: batch_X, Y_: batch_Y, lr: learning_rate, tst: False})
sess.run(update_ema, {X: batch_X, Y_: batch_Y, tst: False, iter: i})
datavis.animate(training_step, iterations=10000+1, train_data_update_freq=20, test_data_update_freq=100, more_tests_at_start=True)
print("max test accuracy: " + str(datavis.get_max_test_accuracy()))
To answer your question in the comments: this is probably what you want to change your code into:
# input X: images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 780, 780, 3])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 2])
And an image can be read like this:
from scipy import misc
input = misc.imread('input.png')
Now it might be best to follow a Tensorflow tutorial. This one is really good: kadenze.com/courses/creative-applications-of-deep-learning-with-tensorflow-iv/info
Good luck!
Logistic regression's objective function is
and the gradient is
where w is a scipy's csr sparse matrix with dim n-by-1.
My question is, when I have one scipy's csr sparse matrix and one numpy array, X_train and y_train respectively. (Each row of X_train is x_i, each element of y_train is y_i)
Is there a better way to calculate the gradient without using manully for loop?
For further information, I'm implementing large scale logistic regression. Therefore the performance is important.
Thanks.
Update 5/19 (Add my current code)
Thanks for #Jaime's reminding, here is my code. I basically want to see if there is a better way to implement gradient(X, y, w).
import numpy as np
import scipy as sp
from sklearn import datasets
from numpy.linalg import norm
from scipy import sparse
eta = 0.01
xi = 0.1
C = 1
X_train, y_train = datasets.load_svmlight_file('lr/datasets/a9a')
X_test, y_test = datasets.load_svmlight_file('lr/datasets/a9a.t', n_features=X_train.shape[1])
def gradient(X, y, w):
# w should be a col vector
summation = w
for i in range(X.shape[0]):
exp_i = np.exp( y[i] * X.getrow(i).dot(w)[0, 0] )
summation = summation - (y[i] / (1 + exp_i)) * X.getrow(i).T
return summation
def hes_mul(X, D, s):
# w and s should be a col vector
# should return a col vector
return s + C * X.T.dot( D.dot( X.dot(s) ) )
def cg(X, y, w):
# gradF is col vector, so all of these are col vectors
gradF = gradient(X, y, w)
s = sparse.csr_matrix( np.zeros(X_train.shape[1]) ).T
r = -1 * gradF
d = r
D = []
for i in range(X.shape[0]):
exp_i = np.exp( (-1) * y[i] * w.T.dot(X.getrow(i).T)[0, 0] )
D.append(exp_i / ((1 + exp_i) ** 2))
D = sparse.diags(D, 0)
while True:
r_norm = np.sqrt((r.data ** 2).sum())
print r_norm
print np.sqrt((gradF.data ** 2).sum())
if r_norm <= xi * np.sqrt((gradF.data ** 2).sum()):
return s
hes_mul_d = hes_mul(X, D, d)
alpha = (r_norm ** 2) / d.T.dot( hes_mul_d )[0, 0]
s = s + alpha * d
r = r - alpha * hes_mul_d
beta = (r.data ** 2).sum() / (r_norm ** 2)
d = r + beta * d
w = sparse.csr_matrix( np.zeros(X_train.shape[1]) ).T
s = cg(X_train, y_train, w)