Regression with constraints on contribution from variables - gekko

I'm trying to develop a regression model with constraints on effect from the independent variables. So my model equation is y = a0 + a1x1 + a2x2 with 200 datapoints. What I want to achieve is sum(a1x1) over 200 datapoints should fall in certain range i.e. lb1<sum(a1x1)<ub1. I am using Gekko for the optimization part and a got stuck while applying this condition.
I am using the following code where ubdict is the dictionary for the boundaries:
m = gk.GEKKO(remote=False)
m.options.IMODE=2 #Regression mode
y = np.array(df['y']) #dependant vars for optimization
x = np.array(df[X]) #array of independent vars for optimization
n = x.shape[1] #number of variables
c = m.Array(m.FV, n+1) #array of parameters and intercept
for ci in c:
ci.STATUS = 1 #calculate fixed parameter
xp = [None]*n
#load data
xd = m.Array(m.Param,n)
yd = m.Param(value=y)
for i in range(n):
xd[i].value = x[:,i]
xp[i] = m.Var()
if ubound_dict[i] >= 0:
xp[i] = m.Var(lb=0, ub=ubdict[i])
elif ubound_dict[i] < 0:
xp[i] = m.Var(lb=ubdict[i], ub=0)
m.Equation(xp[i]==c[i]*xd[i])
yp = m.Var()
m.Equation(yp==m.sum([xp[i] for i in range(n)] + [c[n]]))
#Minimize difference between actual and predicted y
m.Minimize((yd-yp)**2)
#APOPT solver
m.options.SOLVER = 1
#Solve
m.solve(disp=True)
#Retrieve parameter values
a = [i.value[0] for i in c]
print(a)
But this is applying the constraint row-wise. What I want is something like
xp[i] = m.Var(lb=0, ub=ubdict[i])
m.Equation(xp[i]==sum(c[i]*xd[i]) over observations)
Any suggestion would be of great help!

Below is a similar problem with sample data.
Regression Mode with IMODE=2
Use the m.vsum() object in Gekko with IMODE=2. Gekko lets you write the equations once and then applies the data to each equation. This is more efficient for large-scale data sets.
import numpy as np
from gekko import GEKKO
# load data
x1 = np.array([1,2,5,3,2,5,2])
x2 = np.array([5,6,7,2,1,3,2])
ym = np.array([3,2,3,5,6,7,8])
# model
m = GEKKO()
c = m.Array(m.FV,3)
for ci in c:
ci.STATUS=1
x1 = m.Param(value=x1)
x2 = m.Param(value=x2)
ymeas = m.Param(value=ym)
ypred = m.Var()
m.Equation(ypred == c[0] + c[1]*x1 + c[2]*x2)
# add constraint on sum(c[1]*x1) with vsum
v1 = m.Var(); m.Equation(v1==c[1]*x1)
con = m.Var(lb=0,ub=10); m.Equation(con==m.vsum(v1))
m.Minimize((ypred-ymeas)**2)
m.options.IMODE = 2
m.solve()
print('Final SSE Objective: ' + str(m.options.objfcnval))
print('Solution')
for i,ci in enumerate(c):
print(i,ci.value[0])
# plot solution
import matplotlib.pyplot as plt
plt.figure(figsize=(8,4))
plt.plot(ymeas,ypred,'ro')
plt.plot([0,10],[0,10],'k-')
plt.xlabel('Meas')
plt.ylabel('Pred')
plt.savefig('results.png',dpi=300)
plt.show()
Optimization Mode (IMODE=3)
The optimization mode 3 allows you to write each equation and objective term individually. Both give the same solution.
import numpy as np
from gekko import GEKKO
# load data
x1 = np.array([1,2,5,3,2,5,2])
x2 = np.array([5,6,7,2,1,3,2])
ym = np.array([3,2,3,5,6,7,8])
n = len(ym)
# model
m = GEKKO()
c = m.Array(m.FV,3)
for ci in c:
ci.STATUS=1
yp = m.Array(m.Var,n)
for i in range(n):
m.Equation(yp[i]==c[0]+c[1]*x1[i]+c[2]*x2[i])
m.Minimize((yp[i]-ym[i])**2)
# add constraint on sum(c[1]*x1)
s = m.Var(lb=0,ub=10); m.Equation(s==c[1]*sum(x1))
m.options.IMODE = 3
m.solve()
print('Final SSE Objective: ' + str(m.options.objfcnval))
print('Solution')
for i,ci in enumerate(c):
print(i,ci.value[0])
# plot solution
import matplotlib.pyplot as plt
plt.figure(figsize=(8,4))
ypv = [yp[i].value[0] for i in range(n)]
plt.plot(ym,ypv,'ro')
plt.plot([0,10],[0,10],'k-')
plt.xlabel('Meas')
plt.ylabel('Pred')
plt.savefig('results.png',dpi=300)
plt.show()
For future questions, please create a simple and complete example that demonstrates the issue.

Related

Runge-Kutta curve fitting extremely slow

I am currently trying to do a regression of a function calculated via a RK4 method performed on a non-linear Volterra integral of the second kind. The problem I found is that the code is extremely slow, for 1 call of the curve_fit function (fitt), it takes about 30-40 minute to generate a data. Overall, there will be a lot of calls to fitt before the parameters are determined, this takes more than 6 hours to run. Is there anyway to optimize this code? Thanks in advance!
from scipy.special import gamma
from ml_internal import LTInversion
from scipy.optimize import curve_fit , fsolve
from scipy.misc import derivative
from sklearn.metrics import r2_score
from math import comb , factorial
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
# Gets the data
df = pd.read_excel('D:\\CoMat\\Fractional_fit\\optimized\\data_optimized.xlsx')
skipTime = 1
skipIndex = df[df['Time']== skipTime].index.values[0]
xls = pd.read_excel('D:\\CoMat\\Fractional_fit\\optimized\\data_optimized.xlsx',skiprows=np.arange(1,skipIndex+1,1))
timeDF = xls['Time']
tempDF = xls['Temp']
taDF = xls['Ta']
timeDF = timeDF - timeDF[0]
tempDF = tempDF + 273.15
t0 = tempDF[0]
ta = sum(taDF)/len(taDF)
ta = ta + 273.15
###########################################
#Spliting into intervals
h = 0.05
a = 0
b = timeDF[len(timeDF)-1]
N = int(np.round((b-a)/h))
#Each xi
def xidx(index):
return a + h*index
#Function in the image are written here.
def gx(t,lamda,alpha):
return t0 * ml(lamda*(t**alpha),alpha)
gx = np.vectorize(gx)
def kernel(t,s,rad,lamda,alpha,beta):
if t == s:
return 0
return (t-s)**(alpha-1) * ml_(lamda*((t-s)**alpha),alpha,alpha,1) * (beta*(rad**4) - beta*(ta**4) - lamda*ta)
kernel = np.vectorize(kernel)
############################
# The problem is here!!!!!!
def fx(x,n,lamda,alpha,beta):
ans = gx(x,lamda,alpha)
for j in range(n):
ans += (h/6)*(kernel(x,xidx(j),f0[j],lamda,alpha,beta) + 2*kernel(x,xidx(j+1/2),f1[j],lamda,alpha,beta) + 2*kernel(x,xidx(j+1/2),f2[j],lamda,alpha,beta) + kernel(x,xidx(j+1),f3[j],lamda,alpha,beta))
return ans
#########################
f0 = np.zeros(N+1)
f0[0] = t0
f1 = np.zeros(N+1)
f2 = np.zeros(N+1)
f3 = np.zeros(N+1)
F = np.zeros((3,N+1))
def fitt(xvalue,lamda,alpha,beta):
global f0,f1,f2,f3,F
n = int(np.round(xvalue/h))
f1[n] = fx(xidx(n) + 1/2,n,lamda,alpha,beta) + (h/2)*kernel(xidx(n + 1/2),xidx(n),f0[n],lamda,alpha,beta)
f2[n] = fx(xidx(n + 1/2),n,lamda,alpha,beta)
f3[n] = fx(xidx(n+1),n,lamda,alpha,beta) + h*kernel(xidx(n+1),xidx(n+1/2),f2[n],lamda,alpha,beta)
if n+1 <= N:
f0[n+1] = fx(xidx(n+1),n,lamda,alpha,beta) + (h/6)*(kernel(xidx(n+1),xidx(n),f0[n],lamda,alpha,beta) + 2*kernel(xidx(n+1),xidx(n+1/2),f1[n],lamda,alpha,beta) + 2*kernel(xidx(n+1),xidx(n+1/2),f2[n],lamda,alpha,beta) + kernel(xidx(n+1),xidx(n+1),f3[n],lamda,alpha,beta))
if(xvalue == timeDF[len(timeDF) - 1]):
print(f0[n],n)
returnValue = f0[n]
f0 = np.zeros(N+1)
f0[0] = t0
f1 = np.zeros(N+1)
f2 = np.zeros(N+1)
f3 = np.zeros(N+1)
return returnValue
print(f0[n],n)
return f0[n]
fitt = np.vectorize(fitt)
#Fitting, plotting and giving (Adj) R-squared
popt , pcov = curve_fit(fitt,timeDF,tempDF,p0=(-0.1317,0.95,-1e-11),bounds=((-np.inf,0,-np.inf),(0,1,0)))
print(popt)
y_fit = np.array(fitt(timeDF,popt[0],popt[1],popt[2]))
plt.scatter(timeDF,tempDF,color='ORANGE',marker='.',s=0.5)
plt.fill_between(timeDF,tempDF-0.5,tempDF+0.5,color='ORANGE', alpha=0.2)
plt.plot(timeDF,y_fit,color='RED',linewidth=1)
plt.legend(["Experimental data", "Caputo fit"], loc ="upper right")
plt.xlabel("Time (min)")
plt.ylabel("Temperature (Kelvin)")
plt.show()
plt.close()
r2 = r2_score(tempDF,y_fit)
print(r2)
adjr2 = 1 - (1 - r2)*((len(xls)-1)/(len(xls)-3-1))
print(adjr2)
I already tried computing the values f0,f1,f2,f3 all at once, but the thing consuming the most time is Fn(x) which I haven't figured in out how to compute them all at once. If this is possible to compute at once, I think the program will run much faster. PS: ML,ML_ is a function from https://github.com/khinsen/mittag-leffler.
This is the function necesssary. Fn is the only one I haven't figured out yet.
There are two typing errors in the cited image. The combination of x_n and 1/2 is always meant to be the midpoint x_{n+1/2} = x_n + h/2. The second error is a duplication of x_{n+1/2} in the formula for f^{(4)}_n in its third term. The first error is probably producing errors that are large enough to make convergence complicated and any limit wrong for the intended problem.
In the Simpson/RK4 step, the 4 fx computations can be reduced to 2.
The F_n implement the left side of the integral equation
F(x) = g(x) + int(s=0 to x of K(x,s,f(s))
where the integral is approximated with the sample sequences f0,...,f3. Due to the structure of problem and algorithm F_n(x_n)=f^0_n = f^4_{n-1}.
Note that K(x,s,f) should be set to zero for s >= x. In the exact version of the equation these values "above the diagonal" are not used.
If an increase in accuracy is needed, for instance to avoid divergence where there is none in the exact solution, you can decrease the step site by a factor of 10 and then sub-sample the f^0_n sequence to produce the numerical guess for the given data. Other factors than 10 are of course also possible.

Issues with m.if2, m.abs2 in implementing a contact switch

This is a continuation of a prior question with a slightly different emphasis.
In summary, the prior solution helped with the implementation of the model inputs. The below model is working and provides a solution for the full contact condition. The framework and basic mechanics are in place for the variable contact constraint. However, no solution is available for when the contact switch variable is applied to the geometric constraints, whether as a Param or FV. (Note: no issues when applied to the dynamic equations).
One thing I've noted is that the m.if2 output is not correct in the [0] position. Below is the output of the switch-related variables:
adiff= [0.1, 0.10502512563, 0.11005025126, 0.11507537688, 0.12010050251,...
bdiff= [1.0, 0.99497487437, 0.98994974874, 0.98492462312, 0.97989949749,...
swtch= [0.1, 0.10449736118, 0.10894421858, 0.11334057221, 0.11768642206,...
c= [0.0, 1.000000005, 1.000000005, 1.000000005, 1.000000005, 1.000000005,...
Based on the logic swtch = adiff*bdiff and m.if2(swtch-thres,0,1), c[0] should be ~1.0. I've played with these parameters and haven't found a way to affect that first cell. I can't say for sure that this initial position is causing issues, but this seems like an erroneous output regardless.
Second, given that the m.if() outputs as approximately 0 and 1, I've attempted to soften the geometric constraint as: m.abs2({constraint}) <= {tol}. Even in the case when a generous tolerance is applied and c is excluded, this fails to produce a solution (whereas the hard constraint will).
Any suggestions for correcting either issue are appreciated.
Lastly, in the prior post, the use of m.integral() for setting the value of c was suggested. I'm unclear if that entails using if2 as well. If you can expand on implementing a switch that enables at t=a and switches off at t=b using an integral, that would be appreciated.
Full code:
###Import Libraries
import math
import matplotlib
matplotlib.use("TkAgg")
import matplotlib.animation as animation
import numpy as np
from gekko import GEKKO
###Defining a model
m = GEKKO(remote=True)
v = 1 #set walking speed (m/s)
L1 = .5 #set thigh length (m)
L2 = .5 #set shank length (m)
M = 75 #set mass (kg)
#################################
###Define secondary parameters
D = L1 + L2 #leg length parameter
pi = math.pi #define pi
g = 9.81 #define gravity
###Define initial and final conditions and limits
xmin = -D; xmax = D
xdotmin = .5*v; xdotmax = 1.5*v
ymin = 0*D; ymax = 5*D
q1min = -pi/2; q1max = pi/2
q2min = -pi/2; q2max = -.01
tfmin = .25; tfmax = 10
#amin = 0; amax = .45 #limits for FVs (future capability)
#bmin = .55; bmax = 1
###Defining the time parameter (0, 1)
N = 200
t = np.linspace(0,1,N)
m.time = t
###Final time Fixed Variable
TF = m.FV(1,lb=tfmin,ub=tfmax); TF.STATUS = 1
end_loc = len(m.time)-1
###Defining initial and final condition vectors
init = np.zeros(len(m.time))
final = np.zeros(len(m.time))
init[1] = 1
final[-1] = 1
init = m.Param(value=init)
final = m.Param(value=final)
###Parameters
M = m.Param(value=M) #cart mass
L1 = m.Param(value=L1) #link 1 length
L2 = m.Param(value=L2) #link 1 length
g = m.Const(value=g) #gravity
###Control Input Manipulated Variable
u = m.MV(0, lb=-70, ub=70); u.STATUS = 1
###Ground Contact Fixed Variables
#as fixed variables (future state)
#a = m.FV(0,lb=amin,ub=amax); a.STATUS = 1 #equates to the unscaled time when contact first occurs
#b = m.FV(1,lb=bmin,ub=bmax); b.STATUS = 1 #equates to the unscaled time when contact last occurs
#as fixed parameter
a = m.Param(value=-.1) #a<0 to drive m.time-a positive
b = m.Param(value=1)
###State Variables
x, y, xdot, ydot, q1, q2 = m.Array(m.Var, 6)
#Define BCs
m.free_initial(x)
m.free_final(x)
m.free_initial(xdot)
m.free_final(xdot)
m.free_initial(y)
m.free_initial(ydot)
#Define Limits
y.LOWER = ymin; y.UPPER = ymax
x.LOWER = xmin; x.UPPER = xmax
xdot.LOWER = xdotmin; xdot.UPPER = xdotmax
q1.LOWER = q1min; q1.UPPER = q1max
q2.LOWER = q2min; q2.UPPER = q2max
###Intermediates
xdot_int = m.Intermediate(final*m.integral(xdot)) #for average velocity constraint
adiff = m.Param(m.time-a.VALUE) #positive if m.time>a
bdiff = m.Param(b.VALUE-m.time) #positive if m.time<b
swtch = m.Intermediate(adiff*bdiff) #positive if m.time > a AND m.time < b
thres = .001
c = m.if2(swtch-thres,0,1) #c=0 if swtch <0, c=1 if swtch >0
###Defining the State Space Model
m.Equation(xdot.dt()/TF == -c*u*(L1*m.sin(q1)
+L2*m.sin(q1+q2))
/(M*L1*L2*m.sin(q2)))
m.Equation(ydot.dt()/TF == c*u*(L1*m.cos(q1)
+L2*m.cos(q1+q2))
/(M*L1*L2*m.sin(q2))-g)
m.Equation(x.dt()/TF == xdot)
m.Equation(y.dt()/TF == ydot)
m.periodic(y) #initial and final y position must be equal
m.periodic(ydot) #initial and final y velocity must be equal
m.periodic(xdot) #initial and final x velocity must be equal
m.Equation(m.abs2(xdot_int*final - v*final) <= .02) #soft constraint for average velocity ~= v
###Geometric constraints
#with no contact switch, this works
m.Equation(x + L1*m.sin(q1) + L2*m.sin(q1+q2) == 0) #x geometric constraint when in contact
m.Equation(y - L1*m.cos(q1) - L2*m.cos(q1+q2) == 0) #y geometric constraint when in contact
#soft constraint for contact switch. Produces no solution, with or without c, abs2 or abs3:
#m.Equation(c*m.abs2(x + L1*m.sin(q1) + L2*m.sin(q1+q2)) <= .01) #x geometric constraint when in contact
#.Equation(c*m.abs2(y - L1*m.cos(q1) - L2*m.cos(q1+q2)) <= .01) #y geometric constraint when in contact
###Objectives
#Maximize stride length
m.Maximize(100*final*x)
m.Minimize(100*init*x)
#Minimize torque
m.Obj(0.01*u**2)
###Solve
m.options.IMODE = 6
m.options.SOLVER = 3
m.solve()
###Scale time vector
m.time = np.multiply(TF, m.time)
###Display Outputs
print("adiff=", adiff.VALUE)
print("bdiff=", bdiff.VALUE)
print("swtch=", swtch.VALUE)
print("c=", c.VALUE)
########################################
####Plotting the results
import matplotlib.pyplot as plt
plt.close('all')
fig1 = plt.figure()
fig2 = plt.figure()
fig3 = plt.figure()
fig4 = plt.figure()
ax1 = fig1.add_subplot()
ax2 = fig2.add_subplot(221)
ax3 = fig2.add_subplot(222)
ax4 = fig2.add_subplot(223)
ax5 = fig2.add_subplot(224)
ax6 = fig3.add_subplot()
ax7 = fig4.add_subplot(121)
ax8 = fig4.add_subplot(122)
ax1.plot(m.time,u.value,'m',lw=2)
ax1.legend([r'$u$'],loc=1)
ax1.set_title('Control Input')
ax1.set_ylabel('Torque (N-m)')
ax1.set_xlabel('Time (s)')
ax1.set_xlim(m.time[0],m.time[-1])
ax1.grid(True)
ax2.plot(m.time,x.value,'r',lw=2)
ax2.set_ylabel('X Position (m)')
ax2.set_xlabel('Time (s)')
ax2.legend([r'$x$'],loc='upper left')
ax2.set_xlim(m.time[0],m.time[-1])
ax2.grid(True)
ax2.set_title('Mass X-Position')
ax3.plot(m.time,xdot.value,'g',lw=2)
ax3.set_ylabel('X Velocity (m/s)')
ax3.set_xlabel('Time (s)')
ax3.legend([r'$xdot$'],loc='upper left')
ax3.set_xlim(m.time[0],m.time[-1])
ax3.grid(True)
ax3.set_title('Mass X-Velocity')
ax4.plot(m.time,y.value,'r',lw=2)
ax4.set_ylabel('Y Position (m)')
ax4.set_xlabel('Time (s)')
ax4.legend([r'$y$'],loc='upper left')
ax4.set_xlim(m.time[0],m.time[-1])
ax4.grid(True)
ax4.set_title('Mass Y-Position')
ax5.plot(m.time,ydot.value,'g',lw=2)
ax5.set_ylabel('Y Velocity (m/s)')
ax5.set_xlabel('Time (s)')
ax5.legend([r'$ydot$'],loc='upper left')
ax5.set_xlim(m.time[0],m.time[-1])
ax5.grid(True)
ax5.set_title('Mass Y-Velocity')
ax6.plot(x.value, y.value,'g',lw=2)
ax6.set_ylabel('Y-Position (m)')
ax6.set_xlabel('X-Position (m)')
ax6.legend([r'$mass coordinate$'],loc='upper left')
ax6.set_xlim(x.value[0],x.value[-1])
ax6.set_ylim(0,1.1)
ax6.grid(True)
ax6.set_title('Mass Position')
ax7.plot(m.time,q1.value,'r',lw=2)
ax7.set_ylabel('q1 Position (rad)')
ax7.set_xlabel('Time (s)')
ax7.legend([r'$q1$'],loc='upper left')
ax7.set_xlim(m.time[0],m.time[-1])
ax7.grid(True)
ax7.set_title('Hip Joint Angle')
ax8.plot(m.time,q2.value,'r',lw=2)
ax8.set_ylabel('q2 Position (rad)')
ax8.set_xlabel('Time (s)')
ax8.legend([r'$q2$'],loc='upper left')
ax8.set_xlim(m.time[0],m.time[-1])
ax8.grid(True)
ax8.set_title('Knee Joint Angle')
plt.show()
The m.if2() function is a Mathematical Program with Complementarity Constraints (MPCC). It does not use a binary variable like the m.if3() function and therefore can be solved with any NLP solver, such as IPOPT. The disadvantage is that it has a saddle point at the switching condition and often gets stuck at the local solution. One way to overcome this issue is to use m.if3() with the IPOPT solver for initialization and then switch to the APOPT solver to generate an exact MINLP solution.
m.options.SOLVER=3 # IPOPT
m.solve()
m.options.SOLVER=1 # APOPT
m.options.TIME_SHIFT = 0 # don't update initial conditions
m.solve()
Additional information on MPCCs and binary conditional statements is in the Design Optimization course section on Logical Conditions.

Comparing Biweekly HFMD Cases with and without using the Squared Error Objective & L1-Norm Objective

I wish to model the biweekly HFMD cases in Malaysia.
Then, I want to show that the model using the Squared Error Objective and L1-Norm Objective can better model the biweekly HFMD cases than the model without objectives.
My question is, is it possible to model the biweekly HFMD cases without using the Squared Error Objective and L1-Norm Objective?
With this, I have attached the coding below:
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
m1 = GEKKO(remote=False)
m2 = GEKKO(remote=False)
m = m1
# Known parameters
nb = 26 # Number of biweeks in a year
ny = 3 # Number of years
biweeks = np.zeros((nb,ny*nb+1))
biweeks[0][0] = 1
for i in range(nb):
for j in range(ny):
biweeks[i][j*nb+i+1] = 1
# Write csv data file
tm = np.linspace(0,78,79)
# case data
# Malaysia weekly HFMD data from the year 2013 - 2015
cases = np.array([506,506,700,890,1158,1605,1694,1311,1490,1310,1368,\
1009,1097,934,866,670,408,481,637,749,700,648,710,\
740,627,507,516,548,636,750,1066,1339,1565,\
1464,1575,1759,1631,1601,1227,794,774,623,411,\
750,1017,976,1258,1290,1546,1662,1720,1553,1787,1291,1712,2227,2132,\
2550,2140,1645,1743,1296,1153,871,621,570,388,\
347,391,446,442,390,399,421,398,452,470,437,411])
data = np.vstack((tm,cases))
data = data.T
# np.savetxt('measles_biweek_2.csv',data,delimiter=',',header='time,cases')
np.savetxt('hfmd_biweek_2.csv',data,delimiter=',',header='time,cases')
# Load data from csv
# m.time, cases_meas = np.loadtxt('measles_biweek_2.csv', \
m.time, cases_hfmd = np.loadtxt('hfmd_biweek_2.csv', \
delimiter=',',skiprows=1,unpack=True)
# m.Vr = m.Param(value = 0)
# Variables
# m.N = m.FV(value = 3.2e6)
# m.mu = m.FV(value = 7.8e-4)
# m.N = m.FV(value = 3.11861e7)
# m.mu = m.FV(value = 6.42712e-4)
m.N = m.FV(value = 3.16141e7) # Malaysia average total population (2015 - 2017)
m.mu = m.FV(value = 6.237171519e-4) # Malaysia scaled birth rate (births/biweek/total population)
m.rep_frac = m.FV(value = 0.45) # Percentage of underreporting
# Beta values (unknown parameters in the model)
m.beta = [m.FV(value=1, lb=0.1, ub=5) for i in range(nb)]
# Predicted values
m.S = m.SV(value = 0.162492875*m.N.value, lb=0,ub=m.N) # Susceptibles (Kids from 0 - 9 YO: 5137066 people) - Average of 94.88% from total reported cases
m.I = m.SV(value = 7.907863896e-5*m.N.value, lb=0,ub=m.N) #
# m.V = m.Var(value = 2e5)
# measured values
m.cases = m.CV(value = cases_hfmd, lb=0)
# turn on feedback status for CASES
m.cases.FSTATUS = 1
# weight on prior model predictions
m.cases.WMODEL = 0
# meas_gap = deadband that represents level of
# accuracy / measurement noise
db = 100
m.cases.MEAS_GAP = db
for i in range(nb):
m.beta[i].STATUS=1
#m.gamma = m.FV(value=0.07)
m.gamma = m.FV(value=0.07)
m.gamma.STATUS = 1
m.gamma.LOWER = 0.05
m.gamma.UPPER = 0.5
m.biweek=[None]*nb
for i in range(nb):
m.biweek[i] = m.Param(value=biweeks[i])
# Intermediate
m.Rs = m.Intermediate(m.S*m.I/m.N)
# Equations
sum_biweek = sum([m.biweek[i]*m.beta[i]*m.Rs for i in range(nb)])
# m.Equation(m.S.dt()== -sum_biweek + m.mu*m.N - m.Vr)
m.Equation(m.S.dt()== -sum_biweek + m.mu*m.N)
m.Equation(m.I.dt()== sum_biweek - m.gamma*m.I)
m.Equation(m.cases == m.rep_frac*sum_biweek)
# m.Equation(m.V.dt()==-m.Vr)
# options
m.options.SOLVER = 1
m.options.NODES=3
# imode = 5, dynamic estimation
m.options.IMODE = 5
# ev_type = 1 (L1-norm) or 2 (squared error)
m.options.EV_TYPE = 2
# solve model and print solver output
m.solve()
[print('beta['+str(i+1)+'] = '+str(m.beta[i][0])) \
for i in range(nb)]
print('gamma = '+str(m.gamma.value[0]))
# export data
# stack time and avg as column vectors
my_data = np.vstack((m.time,np.asarray(m.beta),m.gamma))
# transpose data
my_data = my_data.T
# save text file with comma delimiter
beta_str = ''
for i in range(nb):
beta_str = beta_str + ',beta[' + str(i+1) + ']'
header_name = 'time,gamma' + beta_str
##np.savetxt('solution_data.csv',my_data,delimiter=',',\
## header = header_name, comments='')
np.savetxt('solution_data_EVTYPE_'+str(m.options.EV_TYPE)+\
'_gamma'+str(m.gamma.STATUS)+'.csv',\
my_data,delimiter=',',header = header_name)
plt.figure(num=1, figsize=(16,8))
plt.suptitle('Estimation')
plt.subplot(2,2,1)
plt.plot(m.time,m.cases, label='Cases (model)')
plt.plot(m.time,cases_hfmd, label='Cases (measured)')
if m.options.EV_TYPE==2:
plt.plot(m.time,cases_hfmd+db/2, 'k-.',\
lw=0.5, label=r'$Cases_{db-hi}$')
plt.plot(m.time,cases_hfmd-db/2, 'k-.',\
lw=0.5, label=r'$Cases_{db-lo}$')
plt.fill_between(m.time,cases_hfmd-db/2,\
cases_hfmd+db/2,color='gold',alpha=.5)
plt.legend(loc='best')
plt.ylabel('Cases')
plt.subplot(2,2,2)
plt.plot(m.time,m.S,'r--')
plt.ylabel('S')
plt.subplot(2,2,3)
[plt.plot(m.time,m.beta[i], label='_nolegend_')\
for i in range(nb)]
plt.plot(m.time,m.gamma,'c--', label=r'$\gamma$')
plt.legend(loc='best')
plt.ylabel(r'$\beta, \gamma$')
plt.xlabel('Time')
plt.subplot(2,2,4)
plt.plot(m.time,m.I,'g--')
plt.xlabel('Time')
plt.ylabel('I')
plt.subplots_adjust(hspace=0.2,wspace=0.4)
name = 'cases_EVTYPE_'+ str(m.options.EV_TYPE) +\
'_gamma' + str(m.gamma.STATUS) + '.png'
plt.savefig(name)
plt.show()
To define a custom objective, use the m.Minimize() or m.Maximize() functions instead of the squared error or l1-norm objectives that are built into the m.CV() objects. To create a custom objective, use m.Var() instead of m.CV() such as:
from gekko import GEKKO
import numpy as np
m = GEKKO()
x = m.Array(m.Var,4,value=1,lb=1,ub=5)
x1,x2,x3,x4 = x
# change initial values
x2.value = 5; x3.value = 5
m.Equation(x1*x2*x3*x4>=25)
m.Equation(x1**2+x2**2+x3**2+x4**2==40)
m.Minimize(x1*x4*(x1+x2+x3)+x3)
m.solve()
print('x: ', x)
print('Objective: ',m.options.OBJFCNVAL)
Here is a similar problem with disease prediction (Measles) that uses m.CV().
import numpy as np
from gekko import GEKKO
import matplotlib.pyplot as plt
# Import Data
# write csv data file
t_s = np.linspace(0,78,79)
# case data
cases_s = np.array([180,180,271,423,465,523,649,624,556,420,\
423,488,441,268,260,163,83,60,41,48,65,82,\
145,122,194,237,318,450,671,1387,1617,2058,\
3099,3340,2965,1873,1641,1122,884,591,427,282,\
174,127,84,97,68,88,79,58,85,75,121,174,209,458,\
742,929,1027,1411,1885,2110,1764,2001,2154,1843,\
1427,970,726,416,218,160,160,188,224,298,436,482,468])
# Initialize gekko model
m = GEKKO()
# Number of collocation nodes
nodes = 4
# Number of phases (years in this case)
n = 3
#Biweek periods per year
bi = 26
# Time horizon (for all 3 phases)
m.time = np.linspace(0,1,bi+1)
# Parameters that will repeat each year
N = m.Param(3.2e6)
mu = m.Param(7.8e-4)
rep_frac = m.Param(0.45)
Vr = m.Param(0)
beta = m.MV(2,lb = 0.1)
beta.STATUS = 1
gamma = m.FV(value=0.07)
gamma.STATUS = 1
gamma.LOWER = 0.05
gamma.UPPER = 0.5
# Data used to control objective function
casesobj1 = m.Param(cases_s[0:(bi+1)])
casesobj2 = m.Param(cases_s[bi:(2*bi+1)])
casesobj3 = m.Param(cases_s[2*bi:(3*bi+1)])
# Variables that vary between years, one version for each year
cases = [m.CV(value = cases_s[(i*bi):(i+1)*(bi+1)-i],lb=0) for i in range(n)]
for i in cases:
i.FSTATUS = 1
i.WMODEL = 0
i.MEAS_GAP = 100
S = [m.Var(0.06*N,lb = 0,ub = N) for i in range(n)]
I = [m.Var(0.001*N, lb = 0,ub = N) for i in range(n)]
V = [m.Var(2e5) for i in range(n)]
# Equations (created for each year)
for i in range(n):
R = m.Intermediate(beta*S[i]*I[i]/N)
m.Equation(S[i].dt() == -R + mu*N - Vr)
m.Equation(I[i].dt() == R - gamma*I[i])
m.Equation(cases[i] == rep_frac*R)
m.Equation(V[i].dt() == -Vr)
# Connect years together at endpoints
for i in range(n-1):
m.Connection(cases[i+1],cases[i],1,bi,1,nodes)#,1,nodes)
m.Connection(cases[i+1],'CALCULATED',pos1=1,node1=1)
m.Connection(S[i+1],S[i],1,bi,1,nodes)
m.Connection(S[i+1],'CALCULATED',pos1=1,node1=1)
m.Connection(I[i+1],I[i],1,bi,1,nodes)
m.Connection(I[i+1],'CALCULATED',pos1=1, node1=1)
# Solver options
m.options.IMODE = 5
m.options.NODES = nodes
m.EV_TYPE = 1
m.options.SOLVER = 1
# Solve
m.Obj(2*(casesobj1-cases[0])**2+(casesobj3-cases[2])**2)
m.solve()
# Calculate the start time of each phase
ts = np.linspace(1,n,n)
# Plot
plt.figure()
plt.subplot(4,1,1)
tm = np.empty(len(m.time))
for i in range(n):
tm = m.time + ts[i]
plt.plot(tm,cases[i].value,label='Cases Year %s'%(i+1))
plt.plot(tm,cases_s[(i*bi):(i+1)*(bi+1)-i],'.')
plt.legend()
plt.ylabel('Cases')
plt.subplot(4,1,2)
for i in range(n):
tm = m.time + ts[i]
plt.plot(tm,beta.value,label='Beta Year %s'%(i+1))
plt.legend()
plt.ylabel('Contact Rate')
plt.subplot(4,1,3)
for i in range(n):
tm = m.time + ts[i]
plt.plot(tm,I[i].value,label='I Year %s'%(i+1))
plt.legend()
plt.ylabel('Infectives')
plt.subplot(4,1,4)
for i in range(n):
tm = m.time + ts[i]
plt.plot(tm,S[i].value,label='S Year %s'%(i+1))
plt.legend()
plt.ylabel('Susceptibles')
plt.xlabel('Time (yr)')
plt.show()

What is the formula being used in the in-sample prediction of statsmodels?

I would like to know what formula is being used in statsmodels ARIMA predict/forecast. For a simple AR(1) model I thought that it would be y_t = a1 * y_t-1. However, I am not able to recreate the results produced by forecast or predict.
Here's what I am trying to do:
from statsmodels.tsa.arima.model import ARIMA
import numpy as np
def ar_series(n):
# generate the series y_t = a1 y_t-1 + eps
np.random.seed(1)
y0 = np.random.rand()
y = [y0]
a1 = 0.7 # the AR coefficient
for i in range(1, n):
y.append(a1 * y[i - 1] + 0.3 * np.random.rand())
return np.array(y)
series = ar_series(10)
model = ARIMA(series, order=(1, 0, 0))
fit = model.fit()
#print(fit.summary())
# const = 0.3441; ar.L1 = 0.6518
print(fit.predict())
y_pred = [0.3441]
for i in range(1, 10):
y_pred.append( 0.6518 * series[i-1])
y_pred = np.array(y_pred)
print(y_pred)
The two series don't match and I have no idea how the in-sample predictions are being calculated?
Found the answer here. I think what I was trying to do is valid only if the process mean is zero.
https://faculty.washington.edu/ezivot/econ584/notes/forecast.pdf

How to set up GEKKO for parameter estimation from multiple independent sets of data?

I am learning how to use GEKKO for kinetic parameter estimation based on laboratory batch reactor data, which essentially consists of the concentration profiles of three species A, C, and P. For the purposes of my question, I am using a model that I previously featured in a question related to parameter estimation from a single data set.
My ultimate goal is to be able to use multiple experimental runs for parameter estimation, leveraging data that may be collected at different temperatures, species concentrations, etc. Due to the independent nature of individual batch reactor experiments, each data set features samples collected at different time points. These different time points (and in the future, different temperatures for instance) are difficult for me to implement into a GEKKO model, as I previosly used the experimental data collection time points as the m.time parameter for the GEKKO model. (See end of post for code) I have solved problems like this in the past with gPROMS and Athena Visual Studio.
To illustrate my problem, I generated an artificial data set of 'experimental' data from my original model by introducing noise to the species concentration profiles, and shifting the experimental time points slightly. I then combined all data sets of the same experimental species into new arrays featuring multiple columns. My thought process here was that GEKKO would carry out the parameter estimation by using the experimental data of each corresponding column of the arrays, so that times_comb[:,0] would be related to A_comb[:,0] while times_comb[:,1] would be related to A_comb[:,1].
When I attempt to run the GEKKO model, the system does obtain a solution for the parameter estimation, but it is unclear to me if the problem solution is reasonable, as I notice that the GEKKO Variables A, B, C, and P are 34 element vectors, which is double the elements in each of the experimental data sets. I presume GEKKO is somehow combining both columns of the time and Parameter vectors during model setup that leads to those 34 element variables? I am also concerned that during this combination of the columns of each input parameter, that the relationship between a certain time point and the collected species information is lost.
How could I improve the use of multiple data sets that GEKKO can simultaneously use for parameter estimation, with the consideration that the time points of each data set may be different? I looked on the GEKKO documentation examples as well as the APMonitor website, but I could not find examples featuring multiple data sets that I could use for guidance, as I am fairly new to the GEKKO package.
Thank you for your time reading my question and for any help/ideas you may have.
Code below:
import numpy as np
import matplotlib.pyplot as plt
from gekko import GEKKO
#Experimental data
times = np.array([0.0, 0.071875, 0.143750, 0.215625, 0.287500, 0.359375, 0.431250,
0.503125, 0.575000, 0.646875, 0.718750, 0.790625, 0.862500,
0.934375, 1.006250, 1.078125, 1.150000])
A_obs = np.array([1.0, 0.552208, 0.300598, 0.196879, 0.101175, 0.065684, 0.045096,
0.028880, 0.018433, 0.011509, 0.006215, 0.004278, 0.002698,
0.001944, 0.001116, 0.000732, 0.000426])
C_obs = np.array([0.0, 0.187768, 0.262406, 0.350412, 0.325110, 0.367181, 0.348264,
0.325085, 0.355673, 0.361805, 0.363117, 0.327266, 0.330211,
0.385798, 0.358132, 0.380497, 0.383051])
P_obs = np.array([0.0, 0.117684, 0.175074, 0.236679, 0.234442, 0.270303, 0.272637,
0.274075, 0.278981, 0.297151, 0.297797, 0.298722, 0.326645,
0.303198, 0.277822, 0.284194, 0.301471])
#Generate second set of 'experimental data'
times_new = times + np.random.uniform(0.0,0.01)
P_obs_noisy = P_obs+np.random.normal(0,0.05,P_obs.shape)
A_obs_noisy = A_obs+np.random.normal(0,0.05,A_obs.shape)
C_obs_noisy = A_obs+np.random.normal(0,0.05,C_obs.shape)
#Combine two data sets into multi-column arrays
times_comb = np.array([times, times_new]).T
P_comb = np.array([P_obs, P_obs_noisy]).T
A_comb = np.array([A_obs, A_obs_noisy]).T
C_comb = np.array([C_obs, C_obs_noisy]).T
m = GEKKO(remote=False)
t = m.time = times_comb #using two column time array
Am = m.Param(value=A_comb) #Using the two column data as observed parameter
Cm = m.Param(value=C_comb)
Pm = m.Param(value=P_comb)
A = m.Var(1, lb = 0)
B = m.Var(0, lb = 0)
C = m.Var(0, lb = 0)
P = m.Var(0, lb = 0)
k = m.Array(m.FV,6,value=1,lb=0)
for ki in k:
ki.STATUS = 1
k1,k2,k3,k4,k5,k6 = k
r1 = m.Var(0, lb = 0)
r2 = m.Var(0, lb = 0)
r3 = m.Var(0, lb = 0)
r4 = m.Var(0, lb = 0)
r5 = m.Var(0, lb = 0)
r6 = m.Var(0, lb = 0)
m.Equation(r1 == k1 * A)
m.Equation(r2 == k2 * A * B)
m.Equation(r3 == k3 * C * B)
m.Equation(r4 == k4 * A)
m.Equation(r5 == k5 * A)
m.Equation(r6 == k6 * A * B)
#mass balance diff eqs, function calls rxn function
m.Equation(A.dt() == - r1 - r2 - r4 - r5 - r6)
m.Equation(B.dt() == r1 - r2 - r3 - r6)
m.Equation(C.dt() == r2 - r3 + r4)
m.Equation(P.dt() == r3 + r5 + r6)
m.Minimize((A-Am)**2)
m.Minimize((P-Pm)**2)
m.Minimize((C-Cm)**2)
m.options.IMODE = 5
m.options.SOLVER = 3 #IPOPT optimizer
m.options.NODES = 6
m.solve()
k_opt = []
for ki in k:
k_opt.append(ki.value[0])
print(k_opt)
plt.plot(t,A)
plt.plot(t,C)
plt.plot(t,P)
plt.plot(t,B)
plt.plot(times,A_obs,'bo')
plt.plot(times,C_obs,'gx')
plt.plot(times,P_obs,'rs')
plt.plot(times_new, A_obs_noisy,'b*')
plt.plot(times_new, C_obs_noisy,'g*')
plt.plot(times_new, P_obs_noisy,'r*')
plt.show()
To have multiple data sets with different times and data points, you can join the data sets as a pandas dataframe. Here is a simple example:
# data set 1
t_data1 = [0.0, 0.1, 0.2, 0.4, 0.8, 1.00]
x_data1 = [2.0, 1.6, 1.2, 0.7, 0.3, 0.15]
# data set 2
t_data2 = [0.0, 0.15, 0.25, 0.45, 0.85, 0.95]
x_data2 = [3.6, 2.25, 1.75, 1.00, 0.35, 0.20]
The merged data has NaN where the data is missing:
x1 x2
Time
0.00 2.0 3.60
0.10 1.6 NaN
0.15 NaN 2.25
0.20 1.2 NaN
0.25 NaN 1.75
Take note of where the data is missing with a 1=measured and 0=not measured.
# indicate which points are measured
z1 = (data['x1']==data['x1']).astype(int) # 0 if NaN
z2 = (data['x2']==data['x2']).astype(int) # 1 if number
The final step is to set up Gekko variables, equations, and objective to accommodate the data sets.
xm = m.Array(m.Param,2)
zm = m.Array(m.Param,2)
for i in range(2):
m.Equation(x[i].dt()== -k * x[i]) # differential equations
m.Minimize(zm[i]*(x[i]-xm[i])**2) # objectives
You can also calculate the initial condition with m.free_initial(x[i]). This gives an optimal solution for one parameter value (k) over the 2 data sets. This approach can be expanded to multiple variables or multiple data sets with different times.
from gekko import GEKKO
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# data set 1
t_data1 = [0.0, 0.1, 0.2, 0.4, 0.8, 1.00]
x_data1 = [2.0, 1.6, 1.2, 0.7, 0.3, 0.15]
# data set 2
t_data2 = [0.0, 0.15, 0.25, 0.45, 0.85, 0.95]
x_data2 = [3.6, 2.25, 1.75, 1.00, 0.35, 0.20]
# combine with dataframe join
data1 = pd.DataFrame({'Time':t_data1,'x1':x_data1})
data2 = pd.DataFrame({'Time':t_data2,'x2':x_data2})
data1.set_index('Time', inplace=True)
data2.set_index('Time', inplace=True)
data = data1.join(data2,how='outer')
print(data.head())
# indicate which points are measured
z1 = (data['x1']==data['x1']).astype(int) # 0 if NaN
z2 = (data['x2']==data['x2']).astype(int) # 1 if number
# replace NaN with any number (0)
data.fillna(0,inplace=True)
m = GEKKO(remote=False)
# measurements
xm = m.Array(m.Param,2)
xm[0].value = data['x1'].values
xm[1].value = data['x2'].values
# index for objective (0=not measured, 1=measured)
zm = m.Array(m.Param,2)
zm[0].value=z1
zm[1].value=z2
m.time = data.index
x = m.Array(m.Var,2) # fit to measurement
x[0].value=x_data1[0]; x[1].value=x_data2[0]
k = m.FV(); k.STATUS = 1 # adjustable parameter
for i in range(2):
m.free_initial(x[i]) # calculate initial condition
m.Equation(x[i].dt()== -k * x[i]) # differential equations
m.Minimize(zm[i]*(x[i]-xm[i])**2) # objectives
m.options.IMODE = 5 # dynamic estimation
m.options.NODES = 2 # collocation nodes
m.solve(disp=True) # solve
k = k.value[0]
print('k = '+str(k))
# plot solution
plt.plot(m.time,x[0].value,'b.--',label='Predicted 1')
plt.plot(m.time,x[1].value,'r.--',label='Predicted 2')
plt.plot(t_data1,x_data1,'bx',label='Measured 1')
plt.plot(t_data2,x_data2,'rx',label='Measured 2')
plt.legend(); plt.xlabel('Time'); plt.ylabel('Value')
plt.xlabel('Time');
plt.show()
Including my updated code (not fully cleaned up to minimize number of variables) incorporating the selected answer to my question for reference. The model does a regression of 3 measured species in two separate 'datasets.'
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from gekko import GEKKO
#Experimental data
times = np.array([0.0, 0.071875, 0.143750, 0.215625, 0.287500, 0.359375, 0.431250,
0.503125, 0.575000, 0.646875, 0.718750, 0.790625, 0.862500,
0.934375, 1.006250, 1.078125, 1.150000])
A_obs = np.array([1.0, 0.552208, 0.300598, 0.196879, 0.101175, 0.065684, 0.045096,
0.028880, 0.018433, 0.011509, 0.006215, 0.004278, 0.002698,
0.001944, 0.001116, 0.000732, 0.000426])
C_obs = np.array([0.0, 0.187768, 0.262406, 0.350412, 0.325110, 0.367181, 0.348264,
0.325085, 0.355673, 0.361805, 0.363117, 0.327266, 0.330211,
0.385798, 0.358132, 0.380497, 0.383051])
P_obs = np.array([0.0, 0.117684, 0.175074, 0.236679, 0.234442, 0.270303, 0.272637,
0.274075, 0.278981, 0.297151, 0.297797, 0.298722, 0.326645,
0.303198, 0.277822, 0.284194, 0.301471])
#Generate second set of 'experimental data'
times_new = times + np.random.uniform(0.0,0.01)
P_obs_noisy = (P_obs+ np.random.normal(0,0.05,P_obs.shape))
A_obs_noisy = (A_obs+np.random.normal(0,0.05,A_obs.shape))
C_obs_noisy = (C_obs+np.random.normal(0,0.05,C_obs.shape))
#Combine two data sets into multi-column arrays using pandas DataFrames
#Set dataframe index to be combined time discretization of both data sets
exp1 = pd.DataFrame({'Time':times,'A':A_obs,'C':C_obs,'P':P_obs})
exp2 = pd.DataFrame({'Time':times_new,'A':A_obs_noisy,'C':C_obs_noisy,'P':P_obs_noisy})
exp1.set_index('Time',inplace=True)
exp2.set_index('Time',inplace=True)
exps = exp1.join(exp2, how ='outer',lsuffix = '_1',rsuffix = '_2')
#print(exps.head())
#Combine both data sets into a single data frame
meas_data = pd.DataFrame().reindex_like(exps)
#define measurement locations for each data set, with NaN written for time points
#not common in both data sets
for cols in exps:
meas_data[cols] = (exps[cols]==exps[cols]).astype(int)
exps.fillna(0,inplace = True) #replace NaN with 0
m = GEKKO(remote=False)
t = m.time = exps.index #set GEKKO time domain to use experimental time points
#Generate two-column GEKKO arrays to store observed values of each species, A, C and P
Am = m.Array(m.Param,2)
Cm = m.Array(m.Param,2)
Pm = m.Array(m.Param,2)
Am[0].value = exps['A_1'].values
Am[1].value = exps['A_2'].values
Cm[0].value = exps['C_1'].values
Cm[1].value = exps['C_2'].values
Pm[0].value = exps['P_1'].values
Pm[1].value = exps['P_2'].values
#Define GEKKO variables that determine if time point contatins data to be used in regression
#If time point contains species data, meas_ variable = 1, else = 0
meas_A = m.Array(m.Param,2)
meas_C = m.Array(m.Param,2)
meas_P = m.Array(m.Param,2)
meas_A[0].value = meas_data['A_1'].values
meas_A[1].value = meas_data['A_2'].values
meas_C[0].value = meas_data['C_1'].values
meas_C[1].value = meas_data['C_2'].values
meas_P[0].value = meas_data['P_1'].values
meas_P[1].value = meas_data['P_2'].values
#Define Variables for differential equations A, B, C, P, with initial conditions set by experimental observation at first time point
A = m.Array(m.Var,2, lb = 0)
B = m.Array(m.Var,2, lb = 0)
C = m.Array(m.Var,2, lb = 0)
P = m.Array(m.Var,2, lb = 0)
A[0].value = exps['A_1'][0] ; A[1].value = exps['A_2'][0]
B[0].value = 0 ; B[1].value = 0
C[0].value = exps['C_1'][0] ; C[1].value = exps['C_2'][0]
P[0].value = exps['P_1'][0] ; P[1].value = exps['P_2'][0]
#Define kinetic coefficients, k1-k6 as regression FV's
k = m.Array(m.FV,6,value=1,lb=0,ub = 20)
for ki in k:
ki.STATUS = 1
k1,k2,k3,k4,k5,k6 = k
#If doing paramrter estimation, enable free_initial condition, else not include them in model to reduce DOFs (for simulation, for example)
if k1.STATUS == 1:
for i in range(2):
m.free_initial(A[i])
m.free_initial(B[i])
m.free_initial(C[i])
m.free_initial(P[i])
#Define reaction rate variables
r1 = m.Array(m.Var,2, value = 1, lb = 0)
r2 = m.Array(m.Var,2, value = 1, lb = 0)
r3 = m.Array(m.Var,2, value = 1, lb = 0)
r4 = m.Array(m.Var,2, value = 1, lb = 0)
r5 = m.Array(m.Var,2, value = 1, lb = 0)
r6 = m.Array(m.Var,2, value = 1, lb = 0)
#Model Equations
for i in range(2):
#Rate equations
m.Equation(r1[i] == k1 * A[i])
m.Equation(r2[i] == k2 * A[i] * B[i])
m.Equation(r3[i] == k3 * C[i] * B[i])
m.Equation(r4[i] == k4 * A[i])
m.Equation(r5[i] == k5 * A[i])
m.Equation(r6[i] == k6 * A[i] * B[i])
#Differential species balances
m.Equation(A[i].dt() == - r1[i] - r2[i] - r4[i] - r5[i] - r6[i])
m.Equation(B[i].dt() == r1[i] - r2[i] - r3[i] - r6[i])
m.Equation(C[i].dt() == r2[i] - r3[i] + r4[i])
m.Equation(P[i].dt() == r3[i] + r5[i] + r6[i])
#Minimization objective functions
m.Obj(meas_A[i]*(A[i]-Am[i])**2)
m.Obj(meas_P[i]*(P[i]-Pm[i])**2)
m.Obj(meas_C[i]*(C[i]-Cm[i])**2)
#Solver options
m.options.IMODE = 5
m.options.SOLVER = 3 #APOPT optimizer
m.options.NODES = 6
m.solve()
k_opt = []
for ki in k:
k_opt.append(ki.value[0])
print(k_opt)
plt.plot(t,A[0],'b-')
plt.plot(t,A[1],'b--')
plt.plot(t,C[0],'g-')
plt.plot(t,C[1],'g--')
plt.plot(t,P[0],'r-')
plt.plot(t,P[1],'r--')
plt.plot(times,A_obs,'bo')
plt.plot(times,C_obs,'gx')
plt.plot(times,P_obs,'rs')
plt.plot(times_new, A_obs_noisy,'b*')
plt.plot(times_new, C_obs_noisy,'g*')
plt.plot(times_new, P_obs_noisy,'r*')
plt.show()

Resources