GEKKO MPC Solver with real-time measurements - gekko

Trying to solve MPC with an objective function and real-time measurements, one measurement getting in at a time. I am a bit at a loss on the followings:
1 - Is it necessary to shorten the prediction horizon to n_steps - step + 1 and reinitialize the MVs and CVs at every time interval when new measurement comes in?
2 - Not sure how to collect the next step predicted actuation inputs/ states values after the model is solved.
Should that the predicted actuation inputs be:
self.mpc_u_state[step] = np.array([n_fans.NEWVAL,
Cw.NEWVAL,
n_pumps.NEWVAL,
Cp.NEWVAL])
or
self.mpc_u_state[step] = np.array([n_fans[step],
Cw [step],
n_pumps[step],
Cp[step]])
3 - How about the newly predicted state? Should that be:
mpc_x_state[step] = np.array([topoil.VALUE[step],
hotspot.VALUE[step],
puload.VALUE[step]])
Here is my real-time MPC code. Any help would be much appreciated.
#!/usr/bin/python
from datetime import datetime
import numpy as np
import pandas as pd
import csv as csv
from gekko import GEKKO
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
ALPHA = 0.5
DELTA_TOP = 5 # 5 degC
DELTA_HOT = 5 # 5 degC
DELTA_PU = 0.05 # 0.05 p.u
NUM_FANS = 8 # MAX Number of fans
NUM_PUMPS = 3 # MAX number of pumps
FAN_POWERS = [145, 130, 120, 100, 500, 460, 430, 370, 860, 800, 720, 610, 1500, 1350, 1230, 1030]
PUMP_POWERS = [430.0, 1070.0, 2950.0, 6920.0, 8830.0] # [0.43, 1.07, 2.95, 6.92, 8.83]
# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
from IPython import display
class MPCooController:
def __init__(self):
self.ref_state = pd.DataFrame([
[0 , '2022-11-11T15:12:17.476577', 67.78, 77.94, 0.6],
[1 , '2022-11-11T15:12:17.535194', 64.31, 73.03, 0.6],
[2 , '2022-11-11T15:12:17.566615', 61.44, 69.90, 0.6],
[3 , '2022-11-11T15:12:17.613887', 58.41, 67.16, 0.6],
[4 , '2022-11-11T15:12:17.653718', 55.98, 64.62, 0.6],
[5 , '2022-11-11T15:12:17.696774', 53.47, 62.41, 0.6],
[6 , '2022-11-11T15:12:17.726733', 51.41, 60.38, 0.6],
[7 , '2022-11-11T15:12:17.765546', 49.37, 58.57, 0.6],
[8 , '2022-11-11T15:12:17.809288', 47.63, 56.93, 0.6],
[9 , '2022-11-11T15:12:17.841497', 46.04, 55.50, 0.6],
[10 , '2022-11-11T15:12:17.878795', 44.61, 54.22, 0.6],
[11 , '2022-11-11T15:12:17.921976', 43.46, 53.14, 0.6],
[12 , '2022-11-11T15:12:17.964345', 42.32, 52.75, 0.7],
[13 , '2022-11-11T15:12:17.997516', 42.10, 54.73, 0.7],
[14 , '2022-11-11T15:12:18.037895', 41.82, 55.56, 0.8],
[15 , '2022-11-11T15:12:18.076159', 42.63, 58.60, 0.8],
[16 , '2022-11-11T15:12:18.119739', 43.19, 60.29, 0.9],
[17 , '2022-11-11T15:12:18.153816', 44.96, 64.24, 0.9],
[18 , '2022-11-11T15:12:18.185398', 46.34, 66.69, 1.0],
[19 , '2022-11-11T15:12:18.219051', 49.00, 71.43, 1.0],
[20 , '2022-11-11T15:12:18.249319', 51.10, 73.73, 1.0],
[21 , '2022-11-11T15:12:18.278797', 53.67, 75.80, 1.0],
[22 , '2022-11-11T15:12:18.311761', 55.53, 77.71, 1.0],
[23 , '2022-11-11T15:12:18.339181', 57.86, 79.58, 1.0],
[24 , '2022-11-11T15:12:18.386485', 59.56, 81.72, 1.05],
[25 , '2022-11-11T15:12:18.421970', 62.10, 85.07, 1.05],
[26 , '2022-11-11T15:12:18.451925', 64.14, 87.55, 1.1],
[27 , '2022-11-11T15:12:18.502646', 66.91, 91.12, 1.1],
[28 , '2022-11-11T15:12:18.529126', 69.22, 93.78, 1.15],
[29 , '2022-11-11T15:12:18.557800', 72.11, 97.48, 1.15],
[30 , '2022-11-11T15:12:18.591488', 74.60, 100.25, 1.2],
[31 , '2022-11-11T15:12:18.620894', 77.50, 103.99, 1.2],
[32 , '2022-11-11T15:12:18.652168', 80.04, 105.84, 1.15],
[33 , '2022-11-11T15:12:18.692116', 81.82, 106.17, 1.15],
[34 , '2022-11-11T15:12:18.739722', 83.28, 106.96, 1.1],
[35 , '2022-11-11T15:12:18.786310', 83.99, 106.39, 1.1],
[36 , '2022-11-11T15:12:18.839116', 84.62, 106.82, 1.1],
[37 , '2022-11-11T15:12:18.872161', 84.91, 107.12, 1.1],
[38 , '2022-11-11T15:12:18.908019', 85.34, 107.36, 1.1],
[39 , '2022-11-11T15:12:18.938229', 85.30, 107.40, 1.1],
[40 , '2022-11-11T15:12:18.967031', 85.46, 106.54, 1.0],
[41 , '2022-11-11T15:12:19.001552', 84.21, 103.19, 1.0],
[42 , '2022-11-11T15:12:19.035265', 83.19, 101.22, 0.9],
[43 , '2022-11-11T15:12:19.069475', 80.95, 97.04, 0.9],
[44 , '2022-11-11T15:12:19.094408', 79.11, 94.33, 0.8],
[45 , '2022-11-11T15:12:19.123621', 76.21, 89.62, 0.8],
[46 , '2022-11-11T15:12:19.158660', 73.81, 86.42, 0.7],
[47 , '2022-11-11T15:12:19.192915', 70.51, 81.42, 0.7],
[48 , '2022-11-11T15:12:19.231802', 67.78, 77.94, 0.6]], columns=['id', 'sampdate', 'optopoil', 'ophotspot', 'opload'])
self.puload = np.zeros(len(self.ref_state))
self.hot_noise = np.zeros(len(self.ref_state))
self.top_noise = np.zeros(len(self.ref_state))
self.ref_puload = []
self.ref_hotspot = []
self.ref_topoil = []
self.mpc_play_time = []
self.mpc_ref_state = []
self.mpc_x_state = []
self.mpc_u_state = []
# This function simulates observations
def get_observation(self, step, u_state):
# Slee 5 seconds to pretend to actuate something with (u_state) and get the resulting state back
# here the resulting state is simulated with the reference curve affected by a random noise
# time.sleep(5)
optopoil = float(self.ref_state['optopoil'][step]) + self.top_noise[step] # Top oil temperature
ophotspot = float(self.ref_state['ophotspot'][step]) + self.hot_noise[step] # Winding X temperature # Water activity
opuload = float(self.ref_state['opload'][step]) + self.puload[step] # pu load current X Winding
return np.array([optopoil, ophotspot, opuload])
def mpc_free_resources(self):
n_steps = len(self.ref_state)
self.mpc_play_time = list(np.empty(n_steps))
self.mpc_x_state = list(np.empty(n_steps))
self.mpc_u_state = list(np.empty(n_steps))
self.mpc_x_meas = list(np.empty(n_steps))
self.pu_noise = np.random.normal(0, .05, len(self.ref_state))
self.hot_noise = np.random.normal(0, 5, len(self.ref_state))
self.top_noise = np.random.normal(0, 5, len(self.ref_state))
def mpc_real_mpc(self):
m = GEKKO(remote=False)
n_steps = len(self.ref_state )
m.time = np.linspace(0, n_steps -1 , n_steps)
self.mpc_ref_state = self.ref_state
mpc_play_time = list(np.empty(n_steps))
mpc_x_state = list(np.empty(n_steps))
mpc_u_state = list(np.empty(n_steps))
mpc_x_meas = list(np.empty(n_steps))
alpha = m.Const(value = ALPHA)
delta_top = m.Const(value = DELTA_TOP)
delta_hot = m.Const(value = DELTA_HOT)
delta_pu = m.Const(value = DELTA_PU)
C_base = m.Const(value = NUM_FANS * np.max(FAN_POWERS) + NUM_PUMPS * np.max(PUMP_POWERS)) # kW
# Reference parameters
ref_puload = m.Param(np.array(self.ref_state['opload']))
ref_hotspot = m.Param(np.array(self.ref_state['ophotspot']))
ref_topoil = m.Param(np.array(self.ref_state['optopoil']))
# Reference curves lower and higher bounds
tophigh = m.Param(value = ref_topoil.VALUE)
toplow = m.Param(value = ref_topoil.VALUE - delta_top.VALUE)
hothigh = m.Param(value = ref_hotspot.VALUE)
hotlow = m.Param(value = ref_hotspot.VALUE - delta_hot.VALUE)
puhigh = m.Param(value = ref_puload.VALUE)
pulow = m.Param(value = ref_puload.VALUE - delta_pu.VALUE)
# Controlled Variables
puload = m.CV(lb = np.min(pulow.VALUE), ub = np.max(puhigh.VALUE))
hotspot = m.CV(lb = np.min(hotlow.VALUE), ub = np.max(hothigh.VALUE))
topoil = m.CV(lb = np.min(toplow.VALUE), ub = np.max(tophigh.VALUE))
# Manipulated variables
n_fans = m.MV(value = 0, lb = 0, ub = NUM_FANS, integer=True)
n_pumps = m.MV(value = 1, lb = 1, ub = NUM_PUMPS, integer=True)
Cw = m.MV(value = np.min(FAN_POWERS), lb = np.min(FAN_POWERS), ub = np.max(FAN_POWERS))
Cp = m.MV(value = np.min(PUMP_POWERS), lb = np.min(PUMP_POWERS), ub = np.max(PUMP_POWERS))
# CVs Status (both measured and calculated)
puload.FSTATUS = 1
hotspot.FSTATUS = 1
topoil.FSTATUS = 1
puload.STATUS = 1
hotspot.STATUS = 1
topoil.STATUS = 1
# Action status
n_fans.STATUS = 1
n_pumps.STATUS = 1
Cw.STATUS = 1
Cp.STATUS = 1
# Not measured
n_fans.FSTATUS = 0
n_pumps.FSTATUS = 0
Cw.FSTATUS = 0
Cp.FSTATUS = 0
# The Objective Function (Fuv) cumulating overtime
power_cost = m.Intermediate((((n_fans * Cw + n_pumps * Cp) - C_base) / C_base)**2)
tracking_cost = m.Intermediate (((ref_puload - puload) / ref_puload)**2
+ ((ref_hotspot - hotspot) / ref_hotspot)**2
+ ((ref_topoil - topoil) / ref_topoil)**2)
Fuv = m.Intermediate(alpha * power_cost + (1 - alpha) * tracking_cost)
# Initial solution
step = 0
u_state = np.array([0, np.min(FAN_POWERS), 1, np.min(PUMP_POWERS)])
x_state = self.get_observation(step, u_state)
topoil.MEAS = x_state[0]
hotspot.MEAS = x_state[1]
puload.MEAS = x_state[2]
m.options.TIME_SHIFT = 1
m.options.CV_TYPE = 2
m.Obj(Fuv)
m.options.IMODE = 6
m.options.SOLVER = 1
m.solve(disp=True, debug=False)
mpc_x_state[0] = np.array([topoil.MODEL, hotspot.MODEL, puload.MODEL])
mpc_u_state[0] = np.array([n_fans.NEWVAL, Cw.NEWVAL, n_pumps.NEWVAL, Cp.NEWVAL])
mpc_x_meas[0] = np.array([topoil.MEAS, hotspot.MEAS, puload.MEAS])
u_state = mpc_u_state[0]
mpc_play_time[0] = 0
# Actuation Input at time step = 0
while(True):
for step in range(1, n_steps):
x_state = self.get_observation(step, u_state)
topoil.MEAS = x_state[0]
hotspot.MEAS = x_state[1]
puload.MEAS = x_state[2]
topoil.SP = tophigh[step]
hotspot.SP = hothigh[step]
puload.SP = puhigh[step]
m.solve(disp=True, debug=False)
mpc_x_state[step] = np.array([topoil.MODEL, hotspot.MODEL, puload.MODEL])
mpc_x_meas[step] = np.array([topoil.MEAS, hotspot.MEAS, puload.MEAS])
mpc_u_state[step] = np.array([n_fans.NEWVAL, Cw.NEWVAL, n_pumps.NEWVAL, Cp.NEWVAL])
# New actuation inputs
u_state = mpc_u_state[step]
mpc_play_time[step] = step
self.mpc_x_state = mpc_x_state
self.mpc_x_meas = mpc_x_meas
self.mpc_u_state = mpc_u_state
self.mpc_play_time = mpc_play_time
self.plot_ctl_mpc()
self.mpc_free_resources()
def plot_ctl_mpc(self):
print("\n\n\n\n===== mpc_u_state ========\n", self.mpc_u_state)
print("\n\n===== mpc_x_state ========\n", self.mpc_x_state)
self.mpc_x_state = pd.DataFrame(self.mpc_x_state, columns=['optopoil','ophotspot','opload'])
self.mpc_x_meas = pd.DataFrame(self.mpc_x_meas, columns=['optopoil','ophotspot','opload'])
self.mpc_u_state = pd.DataFrame(self.mpc_u_state, columns=['nfans', 'fpower', 'npumps', 'ppower'])
print("\n\n===== mpc_u_state ========\n", self.mpc_u_state)
print("\n\n===== mpc_x_state ========\n", self.mpc_x_state)
print("\n\n===== mpc_x_meas ========\n", self.mpc_x_meas)
# Results Collection over play time
fig1, ax = plt.subplots()
ref_lns_hot, = ax.plot(self.mpc_play_time, self.mpc_ref_state['ophotspot'], 'r', label="ref-hot spot")
mpc_lns_hot, = ax.plot(self.mpc_play_time, self.mpc_x_state['ophotspot'], 'r--', label="mpc-hot spot")
# mpc_hot_meas, = ax.plot(self.mpc_play_time, self.mpc_x_meas['ophotspot'], 'r+-', label="mpc_hot_meas")
ref_lns_top, = ax.plot(self.mpc_play_time, self.mpc_ref_state['optopoil'], 'y', label="ref-top oil")
mpc_lns_top, = ax.plot(self.mpc_play_time, self.mpc_x_state['optopoil'], 'y--', label="mpc-top oil")
# mpc_top_meas, = ax.plot(self.mpc_play_time, self.mpc_x_meas['optopoil'], 'y+-', label="mpc_top_meas")
ax2 = ax.twinx()
ref_lns_load, = ax2.plot(self.mpc_play_time, self.mpc_ref_state['opload'], 'k', drawstyle='steps-post', label='ref-pu-load')
mpc_lns_load, = ax2.plot(self.mpc_play_time, self.mpc_x_state['opload'], 'k--', drawstyle='steps-post', label="mpc-pu-load")
# mpc_load_meas, = ax2.plot(self.mpc_play_time, self.mpc_x_meas['opload'], 'k+-', drawstyle='steps-post', label="meas-pu-load")
ax2.set_ylabel('Load[p.u]')
ax.set_xlabel('Time [min]')
ax.set_ylabel('Temperatures[degC]')
ax.set_title('Thermal and loads stimuli distribution')
# ax2.legend(handles=[ref_lns_hot, mpc_lns_hot, rl_lns_hot, ref_lns_top, mpc_lns_top, rl_lns_top, ref_lns_load, mpc_lns_load, rl_lns_load], loc='best')
fig2, ax3 = plt.subplots()
ax3.plot(self.mpc_play_time, self.mpc_u_state['fpower'] * self.mpc_u_state['nfans'], drawstyle='steps-post', label="Fans Power")
ax3.plot(self.mpc_play_time, self.mpc_u_state['ppower'] * self.mpc_u_state['npumps'], drawstyle='steps-post', label="Pumps Power")
plt.show()
if __name__ == '__main__':
mpco_controller = MPCooController()
mpco_controller.mpc_real_mpc()

Every time the m.solve() command is issued, Gekko manages the time shifting, re-initialization, and solution.
It is not necessary to shorten the time horizon with every cycle. The time horizon remains constant unless it is a batch process that shortens the horizon as the batch proceeds. Here is a graphic that shows how the time horizon remains constant. The two CVs (top plots) have a prediction horizon with a setpoint indicated by the dashed target region.
The predicted value is:
self.mpc_u_state[step] = np.array([n_fans.NEWVAL,
Cw.NEWVAL,
n_pumps.NEWVAL,
Cp.NEWVAL])
this is equivalent to:
self.mpc_u_state[step] = np.array([n_fans.value[1],
Cw.value[1],
n_pumps.value[1],
Cp.value[1]])
The newly predicted state is:
mpc_x_state[step] = np.array([topoil.MODEL,
hotspot.MODEL,
puload.MODEL])
or you can take any value from the time horizon such as the initial condition:
mpc_x_state[step] = np.array([topoil.value[0],
hotspot.value[0],
puload.value[0]])
The Temperature Control Lab is a good example of real-time MPC that runs with an Arduino Leonardo for DAQ and has a serial interface to Python or Matlab for the plots. The TCLab examples can be run with TCLab() or with TCLabModel() if the TCLab hardware is not available.
Response to Edit
Each m.Var(), m.SV(), and m.CV() needs a corresponding equation with m.Equation() to determine the value. The declaration of an m.Var() creates an additional degree of freedom and m.Equation() reduces the degree of freedom by one. The model has three m.CV() definitions but no corresponding equations for puload, hotspot, and topoil. Equations need to be defined that relate the MVs or other adjustable inputs to these outputs. The optimizer then selects the best MVs or FVs to minimize the objective function that combines power and tracking costs.
A convenient way to check that the degrees of freedom are specified correctly is to set m.options.COLDSTART=1 for the first solve.
m.options.COLDSTART = 1
m.solve(disp=True, debug=True)
m.options.COLDSTART = 0
m.solve(disp=True, debug=False)
If the degrees of freedom are not set properly, there is an error:
Number of state variables: 1104
Number of total equations: - 960
Number of slack variables: - 0
---------------------------------------
Degrees of freedom : 144
#error: Degrees of Freedom
* Error: DOF must be zero for this mode
STOPPING...
Once the degrees of freedom are correct, another suggestion is to avoid hard constraints on the CVs. This can lead to an infeasibility.
puload = m.CV() #lb = np.min(pulow.VALUE), ub = np.max(puhigh.VALUE))
hotspot = m.CV() #lb = np.min(hotlow.VALUE), ub = np.max(hothigh.VALUE))
topoil = m.CV() #lb = np.min(toplow.VALUE), ub = np.max(tophigh.VALUE))
It is better to use CV_TYPE=1 and set SPHI and SPLO values so that violations of these constraints can occur to maintain feasibility.

Related

GEKKO MPC Solver generating #error: Inequality Definition invalid inequalities: z > x < y minimize<generatorobject<..>

I am learning to use the GEKKO MPC solver, and write the above code as test. After multiple attempts, I am still not able to get it running and keep getting the following exception.
APMonitor, Version 1.0.1
APMonitor Optimization Suite
----------------------------------------------------------------
#error: Inequality Definition
invalid inequalities: z > x < y
minimize<generatorobject<genexpr>at0x7f8cb1a1c950>
STOPPING . . .
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
<ipython-input-11-e92d32444662> in <module>
122 # Solver
123 m.options.IMODE = 6 # control
--> 124 m.solve(disp=True, debug=True)
125
126 # Plot the results
/usr/local/lib/python3.7/dist-packages/gekko/gekko.py in solve(self, disp, debug, GUI, **kwargs)
2183 #print APM error message and die
2184 if (debug >= 1) and ('#error' in response):
-> 2185 raise Exception(response)
2186
2187 #load results
Exception: #error: Inequality Definition
invalid inequalities: z > x < y
minimize<generatorobject<genexpr>at0x7f8cb1a1c950>
STOPPING . . .
Any help will be much appreciated. Here is the code I wrote:
import numpy as np
import pandas as pd
m = GEKKO()
ALPHA = 0.5
NUM_FANS = 8 # MAX Number of fans
NUM_PUMPS = 1 # MAX number of pumps
n_steps = 25
m.time = np.linspace(0, n_steps - 1, n_steps)
DELTA_TOP = 5 # 5 degC
DELTA_HOT = 5 # 5 degC
DELTA_PU = 0.05 # 0.05 p.u
fan_powers = np.array([145, 130, 120, 100, 500, 460, 430, 370, 860, 800, 720, 610, 1500, 1350, 1230, 1030]) # kW
pump_powers = np.array([0.43, 1.07, 2.95, 6.92, 8.83]) # kW
C_base = NUM_FANS * np.max(fan_powers) + NUM_PUMPS * np.max(pump_powers) # kW
x_state = np.array([61.29027692, 70.15582365, 0.86972331])
u_state = np.array([5, 1.00, 1500.00, 0.43]) # np.array(pd.DataFrame([[5, 1.00, 1500.00, 0.43]], columns=['nfans', 'npumps', 'fpower', 'ppower'] )) #np.array([1.00, 1500.00, 0.43])
ref_state = pd.DataFrame([
[0, '2022-08-30T19:33:07.637217', 50.949829, 56.055570, 0.70],
[1, '2022-08-30T19:33:07.719390', 46.113708, 48.741882, 0.60],
[2, '2022-08-30T19:33:07.754899', 43.921465, 49.425708, 0.60],
[3, '2022-08-30T19:33:07.782037', 44.792515, 49.895490, 0.60],
[4, '2022-08-30T19:33:07.831646', 45.814439, 51.055404, 0.60],
[5, '2022-08-30T19:33:07.910940', 46.677830, 51.900248, 0.60],
[6, '2022-08-30T19:33:07.951684', 47.500278, 52.609172, 0.60],
[7, '2022-08-30T19:33:08.050460', 48.187270, 53.240813, 0.60],
[8, '2022-08-30T19:33:08.126050', 48.866124, 53.806335, 0.60],
[9, '2022-08-30T19:33:08.205533', 49.395292, 54.303250, 0.60],
[10, '2022-08-30T19:33:08.237825', 49.908234, 54.732465, 0.60],
[11, '2022-08-30T19:33:08.261200', 50.315668, 55.112417, 0.60],
[12, '2022-08-30T19:33:08.303079', 50.750658, 55.793464, 0.70],
[13, '2022-08-30T19:33:08.370096', 51.341523, 57.619243, 0.70],
[14, '2022-08-30T19:33:08.463300', 51.666736, 58.602764, 0.80],
[15, '2022-08-30T19:33:08.524749', 52.738678, 60.785766, 0.80],
[16, '2022-08-30T19:33:08.552913', 53.460458, 62.226178, 0.90],
[17, '2022-08-30T19:33:08.589561', 55.055422, 64.867184, 0.90],
[18, '2022-08-30T19:33:08.633709', 56.231096, 66.286857, 0.90],
[19, '2022-08-30T19:33:08.671211', 57.876352, 67.644409, 0.90],
[20, '2022-08-30T19:33:08.708004', 59.015503, 68.931404, 0.90],
[21, '2022-08-30T19:33:08.729763', 60.586943, 70.227752, 0.90],
[22, '2022-08-30T19:33:08.753146', 61.809524, 71.492779, 0.90],
[23, '2022-08-30T19:33:08.779459', 63.232974, 72.710304, 0.90],
[24, '2022-08-30T19:33:08.808419', 64.324357, 74.556550, 1.05]], columns=['id', 'sampdate', 'optopoil', 'ophotspot', 'opload'])
# Initial State at i = 0
puload_0 = x_state[2]
hotspot_0 = x_state[1]
topoil_0 = x_state[0]
# Initial Controls at i =0
n_fans_0 = u_state[0]
n_pumps_0 = u_state[1]
Cw_0 = u_state[0] * u_state[2]
Cp_0 = u_state[1] * u_state[3]
# References
ref_puload = np.array(ref_state['opload'])
ref_hotspot = np.array(ref_state['ophotspot'])
ref_topoil = np.array(ref_state['optopoil'])
# Controlled variables
tophigh = m.Param(value = ref_topoil)
toplow = m.Param(value = ref_topoil - DELTA_TOP)
hothigh = m.Param(value=ref_hotspot)
hotlow = m.Param(value=ref_topoil - DELTA_TOP)
pulow = m.Param(value=ref_puload)
puhigh= m.Param(value=ref_puload - DELTA_PU)
puload = m.CV (value = np.array([puload_0]*n_steps), lb = ref_puload - DELTA_PU, ub = ref_puload)
hotspot = m.CV (value = np.array([hotspot_0]*n_steps), lb = ref_hotspot - DELTA_HOT, ub = ref_hotspot)
topoil = m.CV (value = np.array([topoil_0]*n_steps), lb = ref_topoil - DELTA_TOP, ub = ref_topoil)
m.Equations([topoil >= toplow, topoil <= tophigh])
m.Equations([hotspot >= hotlow, hotspot <= hothigh])
m.Equations([puload >= pulow, puload <= puhigh])
puload.STATUS = 1
hotspot.STATUS = 1
topoil.STATUS = 1
# Manipulated variables
fan_low = m.Param(value = np.zeros(n_steps))
fan_high = m.Param(value = np.array([NUM_FANS]*n_steps))
pump_low = m.Param(value = np.ones(n_steps))
pump_high = m.Param(value = np.array([NUM_PUMPS]*n_steps))
Cw_low = m.Param(value = np.zeros(n_steps))
Cw_high = m.Param(value= np.array([NUM_FANS * np.max(fan_powers)]*n_steps))
Cp_low = m.Param(value = np.ones(n_steps))
Cp_high = m.Param(value = np.array([NUM_PUMPS * np.max(pump_powers)]*n_steps))
n_fans = m.MV (value = np.array([n_fans_0]*n_steps), lb = np.zeros(n_steps), ub = np.array([NUM_FANS]*n_steps))
n_pumps = m.MV (value = np.array([n_pumps_0]*n_steps), lb = np.ones(n_steps), ub = np.array([NUM_PUMPS]*n_steps))
Cw = m.MV (value = np.array([Cw_0]*n_steps), lb = np.zeros(n_steps), ub = np.array([NUM_FANS * np.max(fan_powers)]*n_steps))
Cp = m.MV (value = np.array([Cp_0]*n_steps), lb = np.ones(n_steps), ub = np.array([NUM_PUMPS * np.max(pump_powers)]*n_steps))
m.Equation([n_fans >= fan_low, n_fans <= fan_high])
m.Equation([n_pumps >= pump_low, n_pumps <= pump_high])
m.Equation([Cw >= Cw_low, Cw <= Cw_high])
m.Equation([Cp>= Cp_low, Cp <= Cp_high])
n_fans.STATUS = 1
n_pumps.STATUS = 1
Cw.STATUS = 1
Cp.STATUS = 1
# Objective Function
Fuv = m.Var(value=0)
m.Minimize(Fuv = sum(ALPHA * (Cw[i] + Cp[i]) / C_base
+ (1 - ALPHA) * (
pow ((ref_puload[i] - puload[i]) / DELTA_PU, 2)
+ pow ((ref_hotspot[i]- hotspot[i]) / DELTA_HOT, 2)
+ pow ((ref_topoil[i] - topoil[i]) / DELTA_TOP, 2)
)) for i in range(n_steps))
# Solver
m.options.IMODE = 6 # control
m.solve(disp=True, debug=True)
Gekko does the time indexing. Here is a modified version of the code that solves successfully. Solution was found by first solving without the inequalities as:
#%% Solve without inequalities
m.options.IMODE = 6 # control
m.options.SOLVER=1
m.solve(disp=True, debug=True)
----------------------------------------------
Dynamic Control with APOPT Solver
----------------------------------------------
Iter Objective Convergence
0 6.51001E+07 0.00000E+00
1 1.19052E+16 0.00000E+00
2 7.77312E+01 0.00000E+00
3 7.49548E+00 0.00000E+00
4 7.49548E+00 0.00000E+00
6 7.49548E+00 0.00000E+00
Successful solution
---------------------------------------------------
Solver : APOPT (v1.0)
Solution time : 0.043000000000000003 sec
Objective : 7.4954845726019945
Successful solution
---------------------------------------------------
and then using that solution to initialize the case with the inequalities:
#%% Turn on DOF and include inequalities
n_fans.STATUS = 1
n_pumps.STATUS = 1
Cw.STATUS = 1
Cp.STATUS = 1
m.Equations([topoil >= toplow, topoil <= tophigh])
m.Equations([hotspot >= hotlow, hotspot <= hothigh])
m.Equations([puload >= pulow, puload <= puhigh])
m.Equations([n_fans >= fan_low, n_fans <= fan_high])
m.Equations([n_pumps >= pump_low, n_pumps <= pump_high])
m.Equations([Cw >= Cw_low, Cw <= Cw_high])
m.Equations([Cp>= Cp_low, Cp <= Cp_high])
m.options.TIME_SHIFT=0
m.solve(disp=True, debug=True)
----------------------------------------------
Dynamic Control with APOPT Solver
----------------------------------------------
Iter Objective Convergence
0 7.91421E-02 7.49900E+03
1 7.70285E-02 1.30000E-09
2 7.70285E-02 2.00000E-10
3 7.70285E-02 2.00000E-10
Successful solution
---------------------------------------------------
Solver : APOPT (v1.0)
Solution time : 0.033999999999999996 sec
Objective : 0.07702852753968847
Successful solution
---------------------------------------------------
Here is the complete script:
import numpy as np
import pandas as pd
from gekko import GEKKO
m = GEKKO(remote=False)
ALPHA = 0.5
NUM_FANS = 8 # MAX Number of fans
NUM_PUMPS = 1 # MAX number of pumps
n_steps = 25
m.time = np.linspace(0, n_steps - 1, n_steps)
DELTA_TOP = 5 # 5 degC
DELTA_HOT = 5 # 5 degC
DELTA_PU = 0.05 # 0.05 p.u
fan_powers = np.array([145, 130, 120, 100, 500, 460, 430, 370, 860, 800, 720, 610, 1500, 1350, 1230, 1030]) # kW
pump_powers = np.array([0.43, 1.07, 2.95, 6.92, 8.83]) # kW
C_base = NUM_FANS * np.max(fan_powers) + NUM_PUMPS * np.max(pump_powers) # kW
x_state = np.array([61.29027692, 70.15582365, 0.86972331])
u_state = np.array([5, 1.00, 1500.00, 0.43])
ref_state = pd.DataFrame([
[0, '2022-08-30T19:33:07.637217', 50.949829, 56.055570, 0.70],
[1, '2022-08-30T19:33:07.719390', 46.113708, 48.741882, 0.60],
[2, '2022-08-30T19:33:07.754899', 43.921465, 49.425708, 0.60],
[3, '2022-08-30T19:33:07.782037', 44.792515, 49.895490, 0.60],
[4, '2022-08-30T19:33:07.831646', 45.814439, 51.055404, 0.60],
[5, '2022-08-30T19:33:07.910940', 46.677830, 51.900248, 0.60],
[6, '2022-08-30T19:33:07.951684', 47.500278, 52.609172, 0.60],
[7, '2022-08-30T19:33:08.050460', 48.187270, 53.240813, 0.60],
[8, '2022-08-30T19:33:08.126050', 48.866124, 53.806335, 0.60],
[9, '2022-08-30T19:33:08.205533', 49.395292, 54.303250, 0.60],
[10, '2022-08-30T19:33:08.237825', 49.908234, 54.732465, 0.60],
[11, '2022-08-30T19:33:08.261200', 50.315668, 55.112417, 0.60],
[12, '2022-08-30T19:33:08.303079', 50.750658, 55.793464, 0.70],
[13, '2022-08-30T19:33:08.370096', 51.341523, 57.619243, 0.70],
[14, '2022-08-30T19:33:08.463300', 51.666736, 58.602764, 0.80],
[15, '2022-08-30T19:33:08.524749', 52.738678, 60.785766, 0.80],
[16, '2022-08-30T19:33:08.552913', 53.460458, 62.226178, 0.90],
[17, '2022-08-30T19:33:08.589561', 55.055422, 64.867184, 0.90],
[18, '2022-08-30T19:33:08.633709', 56.231096, 66.286857, 0.90],
[19, '2022-08-30T19:33:08.671211', 57.876352, 67.644409, 0.90],
[20, '2022-08-30T19:33:08.708004', 59.015503, 68.931404, 0.90],
[21, '2022-08-30T19:33:08.729763', 60.586943, 70.227752, 0.90],
[22, '2022-08-30T19:33:08.753146', 61.809524, 71.492779, 0.90],
[23, '2022-08-30T19:33:08.779459', 63.232974, 72.710304, 0.90],
[24, '2022-08-30T19:33:08.808419', 64.324357, 74.556550, 1.05]], \
columns=['id', 'sampdate', 'optopoil', 'ophotspot', 'opload'])
# Initial State at i = 0
puload_0 = x_state[2]
hotspot_0 = x_state[1]
topoil_0 = x_state[0]
# Initial Controls at i =0
n_fans_0 = u_state[0]
n_pumps_0 = u_state[1]
Cw_0 = u_state[0] * u_state[2]
Cp_0 = u_state[1] * u_state[3]
# References
ref_puload = m.Param(np.array(ref_state['opload']))
ref_hotspot = m.Param(np.array(ref_state['ophotspot']))
ref_topoil = m.Param(np.array(ref_state['optopoil']))
# Controlled variables
tophigh = m.Param(value = ref_topoil)
toplow = m.Param(value = ref_topoil - DELTA_TOP)
hothigh = m.Param(value=ref_hotspot)
hotlow = m.Param(value=ref_topoil - DELTA_TOP)
pulow = m.Param(value=ref_puload)
puhigh= m.Param(value=ref_puload - DELTA_PU)
puload = m.Var(value = puload_0)
hotspot = m.Var(value = hotspot_0)
topoil = m.Var(value = topoil_0)
# Manipulated variables
fan_low = m.Param(value = 0)
fan_high = m.Param(value = NUM_FANS)
pump_low = m.Param(value = 1)
pump_high = m.Param(value = NUM_PUMPS)
Cw_low = m.Param(value = 0)
Cw_high = m.Param(value= NUM_FANS * np.max(fan_powers))
Cp_low = m.Param(value = 1)
Cp_high = m.Param(value = NUM_PUMPS * np.max(pump_powers))
n_fans = m.MV(value = n_fans_0, lb = 0, ub = NUM_FANS)
n_pumps = m.MV(value = n_pumps_0, lb = 1, ub = NUM_PUMPS)
Cw = m.MV(value = Cw_0, lb = 0, ub = NUM_FANS * np.max(fan_powers))
Cp = m.MV(value = Cp_0, lb = 1, ub = NUM_PUMPS * np.max(pump_powers))
# Objective Function
Fuv = m.Intermediate(ALPHA * (Cw + Cp) / C_base
+ (1 - ALPHA) * ((ref_puload - puload) / DELTA_PU)**2
+ ((ref_hotspot- hotspot) / DELTA_HOT)**2
+ ((ref_topoil - topoil) / DELTA_TOP)**2)
m.Minimize(Fuv)
#%% Solve without inequalities
m.options.IMODE = 6 # control
m.options.SOLVER=1
m.solve(disp=True, debug=True)
#%% Turn on DOF and include inequalities
n_fans.STATUS = 1
n_pumps.STATUS = 1
Cw.STATUS = 1
Cp.STATUS = 1
m.Equations([topoil >= toplow, topoil <= tophigh])
m.Equations([hotspot >= hotlow, hotspot <= hothigh])
m.Equations([puload >= pulow, puload <= puhigh])
m.Equations([n_fans >= fan_low, n_fans <= fan_high])
m.Equations([n_pumps >= pump_low, n_pumps <= pump_high])
m.Equations([Cw >= Cw_low, Cw <= Cw_high])
m.Equations([Cp>= Cp_low, Cp <= Cp_high])
m.options.TIME_SHIFT=0
m.solve(disp=True, debug=True)

Free terminal time, integral objective type 2

I am trying to solve an optimal control problem that involves minimizing an integral objective with fixed states but free terminal time. It is a relatively simple problem that can be solved analytically. Gekko's solution doesn't match the analytical. If I relax the lower bound of terminal time, then I am getting something close to the analytical solution. Am I doing anything wrong in the Gekko code?
I had earlier posted a similar question here.
The analytical solution is given as follows. (lambda is the Lagrange multiplier)
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
# constants
k1 = 0.5
k2 = 0.1
k3 = 0.5
g = 0.5
# create GEKKO model
m = GEKKO()
# time points
n = 501
# tm = np.array([0,1e-5,1e-4,1e-2])
# tm = np.hstack((tm,np.linspace(1e-1, 1, n)))
tm = np.linspace(0, 1, n)
m.time = tm
# Variables
x1 = m.Var(value=1,lb=0,ub=1) # x1
u = m.MV(value=0.1,fixed_initial=False,lb=0,ub=1)
u.STATUS = 1
u.DCOST = 1e-5
J = m.Var(value=0.0) # objective function differential form intial value
p = np.zeros(len(tm))
p[-1] = 1.0
final = m.Param(value=p)
# FV
tf = m.FV(value=0.1, lb=3, ub=5.0)
tf.STATUS = 1
# equations
m.Equation(x1.dt()/tf == -u -g*x1)
m.Equation(J.dt()/tf==k1*k3*(u-k2)/(u+k3))
# Final conditions
soft = True
if soft:
# soft terminal constraint
m.Minimize(final*1e5*(x1-0)**2)
m.Minimize(final*1e5*(u-0)**2)
# m.Minimize(final*1e5*(x2-2)**2)
else:
# hard terminal constraint
x1f = m.Param()
m.free(x1f)
m.fix_final(x1f, 0)
uf = m.Param()
m.free(uf)
m.fix_final(uf, 0)
# connect endpoint parameters to x1 and x2
m.Equations([x1f == x1])
m.Equations([uf == u])
# Objective Function
# obj = m.Intermediate(m.integral((u-k2)/(u+k3)))
obj = m.Intermediate(J)
m.Maximize(obj*final)
m.options.IMODE = 6
m.options.NODES = 3
m.options.SOLVER = 3
m.options.MAX_ITER = 50000
# m.options.MV_TYPE = 0
# m.options.DIAGLEVEL = 0
m.solve(disp=True)
plt.close('all')
tm = tm * tf.value[0]
# Create a figure
plt.figure(figsize=(10, 4))
plt.subplot(2, 2, 1)
# plt.plot([0,1],[1/9,1/9],'k2:',label=r'$x<\frac{1}{9}$')
plt.plot(tm, x1.value, 'k-', lw=2, label=r'$x1$')
plt.ylabel('x1')
plt.legend(loc='best')
plt.subplot(2, 2, 2)
plt.plot(tm, u.value, 'k2--', lw=2, label=r'$u$')
plt.ylabel('control')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 3)
plt.plot(tm, J.value, 'g-', lw=2)
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Objective')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 4)
U = np.array(u.value)
G =k1*k3*(U-k2)/(U+k3)
plt.plot(tm, G, 'g-', lw=2)
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Gopt')
plt.legend(loc='best')
plt.xlabel('Time')
plt.show()
Is a constraint or some other information missing? When the lower bound of tf is set to be non-restrictive at 0.1, it finds the same objective function as when the lower bound is set to 3.0.
tf = m.FV(value=0.1, lb=2.0, ub=5.0)
Both produce an objective of 0.1404.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
# constants
k1 = 0.5
k2 = 0.1
k3 = 0.5
g = 0.5
# create GEKKO model
m = GEKKO()
# time points
n = 501
# tm = np.array([0,1e-5,1e-4,1e-2])
# tm = np.hstack((tm,np.linspace(1e-1, 1, n)))
tm = np.linspace(0, 1, n)
m.time = tm
# Variables
x1 = m.Var(value=1,lb=0,ub=1) # x1
u = m.MV(value=0.1,fixed_initial=False,lb=0,ub=1)
u.STATUS = 1
u.DCOST = 1e-5
J = m.Var(value=0.0) # objective function differential form intial value
p = np.zeros(len(tm))
p[-1] = 1.0
final = m.Param(value=p)
# FV
tf = m.FV(value=0.1, lb=2.0, ub=5.0)
tf.STATUS = 1
# equations
m.Equation(x1.dt()/tf == -u -g*x1)
m.Equation(J.dt()/tf==k1*k3*(u-k2)/(u+k3))
# Final conditions
soft = True
if soft:
# soft terminal constraint
m.Minimize(final*1e5*(x1-0)**2)
m.Minimize(final*1e5*(u-0)**2)
# m.Minimize(final*1e5*(x2-2)**2)
else:
# hard terminal constraint
x1f = m.Param()
m.free(x1f)
m.fix_final(x1f, 0)
uf = m.Param()
m.free(uf)
m.fix_final(uf, 0)
# connect endpoint parameters to x1 and x2
m.Equations([x1f == x1])
m.Equations([uf == u])
# Objective Function
# obj = m.Intermediate(m.integral((u-k2)/(u+k3)))
obj = m.Intermediate(J)
m.Maximize(obj*final)
m.options.IMODE = 6
m.options.NODES = 3
m.options.SOLVER = 3
m.options.MAX_ITER = 50000
# m.options.MV_TYPE = 0
# m.options.DIAGLEVEL = 0
m.solve(disp=True)
plt.close('all')
tm = tm * tf.value[0]
# Create a figure
plt.figure(figsize=(10, 4))
plt.subplot(2, 2, 1)
# plt.plot([0,1],[1/9,1/9],'k2:',label=r'$x<\frac{1}{9}$')
plt.plot(tm, x1.value, 'k-', lw=2, label=r'$x1$')
plt.ylabel('x1')
plt.legend(loc='best')
plt.subplot(2, 2, 2)
plt.plot(tm, u.value, 'k2--', lw=2, label=r'$u$')
plt.ylabel('control')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 3)
plt.plot(tm, J.value, 'g-', lw=2, label='J')
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Objective')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 4)
U = np.array(u.value)
G =k1*k3*(U-k2)/(U+k3)
plt.plot(tm, G, 'g-', lw=2, label='G')
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Gopt')
plt.legend(loc='best')
plt.xlabel('Time')
plt.show()

Free terminal time, integral objective and differential equations as constraints

I am trying to solve an optimal control problem that involves minimizing an integral objective with fixed states but free terminal time. It is a relatively simple problem that can be solved analytically. Gekko's solution doesn't match the analytical.
I am not sure what I am doing wrong. I followed several Gekko examples to solve this one. Any help is much appreciated.
Another problem I am having is how to let Gekko automatically calculate initial values of control. Optimal control always starts with the specified initial guess of control.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
# create GEKKO model
m = GEKKO()
# time points
n = 501
tm = np.linspace(0, 1, n)
m.time = tm
# Variables
x1 = m.Var(value=1) # x1
x2 = m.Var(value=2) # x2
# u = m.Var(value=-1) # control variable used as normal var
u = m.MV(value=-1) # manipulative variable
u.STATUS = 1
u.DCOST = 1e-5
p = np.zeros(n)
p[-1] = 1.0
final = m.Param(value=p)
# FV
tf = m.FV(value=10.0, lb=0.0, ub=100.0)
tf.STATUS = 1
# equations
m.Equation(x1.dt()/tf == x2)
m.Equation(x2.dt()/tf == u)
# Final conditions
soft = True
if soft:
# soft terminal constraint
m.Minimize(final*1e5*(x1-3)**2)
# m.Minimize(final*1e5*(x2-2)**2)
else:
# hard terminal constraint
x1f = m.Param()
m.free(x1f)
m.fix_final(x1f, 3)
# connect endpoint parameters to x1 and x2
m.Equations([x1f == x1])
# Objective Function
obj = m.Intermediate(tf*final*m.integral(0.5*u**2))
m.Minimize(final*obj)
m.options.IMODE = 6
m.options.NODES = 2
m.options.SOLVER = 3
m.options.MAX_ITER = 500
# m.options.MV_TYPE = 0
m.options.DIAGLEVEL = 0
m.solve(disp=False)
# Create a figure
plt.figure(figsize=(10, 4))
plt.subplot(2, 2, 1)
# plt.plot([0,1],[1/9,1/9],'r:',label=r'$x<\frac{1}{9}$')
plt.plot(tm, x1.value, 'k-', lw=2, label=r'$x1$')
plt.ylabel('x1')
plt.legend(loc='best')
plt.subplot(2, 2, 2)
plt.plot(tm, x2.value, 'b--', lw=2, label=r'$x2$')
plt.ylabel('x2')
plt.legend(loc='best')
plt.subplot(2, 2, 3)
plt.plot(tm, u.value, 'r--', lw=2, label=r'$u$')
plt.ylabel('control')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 4)
plt.plot(tm, obj.value, 'g-', lw=2, label=r'$\frac{1}{2} \int u^2$')
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Objective')
plt.legend(loc='best')
plt.xlabel('Time')
plt.show()
Here are a few modifications:
# u = m.MV(value=-1)
u = m.MV(value=-1,fixed_initial=False)
#obj = m.Intermediate(tf*final*m.integral(0.5*u**2))
obj = m.Intermediate(m.integral(0.5*u**2))
m.options.NODES = 3 # increase accuracy
If you add a constraint that tf<=3 then it gives the same solution as above.
However, if you relax the tf constraint to <=100 then there is a better solution.
from gekko import GEKKO
import numpy as np
import matplotlib.pyplot as plt
# create GEKKO model
m = GEKKO()
# time points
n = 501
tm = np.linspace(0, 1, n)
m.time = tm
# Variables
x1 = m.Var(value=1) # x1
x2 = m.Var(value=2) # x2
u = m.MV(value=-1,fixed_initial=False) # manipulated variable
u.STATUS = 1
u.DCOST = 1e-5
p = np.zeros(n)
p[-1] = 1.0
final = m.Param(value=p)
# FV
tf = m.FV(value=10.0, lb=0.0, ub=100.0)
tf.STATUS = 1
# equations
m.Equation(x1.dt()/tf == x2)
m.Equation(x2.dt()/tf == u)
# Final conditions
soft = True
if soft:
# soft terminal constraint
m.Minimize(final*1e5*(x1-3)**2)
# m.Minimize(final*1e5*(x2-2)**2)
else:
# hard terminal constraint
x1f = m.Param()
m.free(x1f)
m.fix_final(x1f, 3)
# connect endpoint parameters to x1 and x2
m.Equations([x1f == x1])
# Objective Function
obj = m.Intermediate(m.integral(0.5*u**2))
m.Minimize(final*obj)
m.options.IMODE = 6
m.options.NODES = 3
m.options.SOLVER = 3
m.options.MAX_ITER = 500
# m.options.MV_TYPE = 0
m.options.DIAGLEVEL = 0
m.solve(disp=True)
# Create a figure
tm = tm*tf.value[0]
plt.figure(figsize=(10, 4))
plt.subplot(2, 2, 1)
# plt.plot([0,1],[1/9,1/9],'r:',label=r'$x<\frac{1}{9}$')
plt.plot(tm, x1.value, 'k-', lw=2, label=r'$x1$')
plt.ylabel('x1')
plt.legend(loc='best')
plt.subplot(2, 2, 2)
plt.plot(tm, x2.value, 'b--', lw=2, label=r'$x2$')
plt.ylabel('x2')
plt.legend(loc='best')
plt.subplot(2, 2, 3)
plt.plot(tm, u.value, 'r--', lw=2, label=r'$u$')
plt.ylabel('control')
plt.legend(loc='best')
plt.xlabel('Time')
plt.subplot(2, 2, 4)
plt.plot(tm, obj.value, 'g-', lw=2, label=r'$\frac{1}{2} \int u^2$')
plt.text(0.5, 3.0, 'Final Value = '+str(np.round(obj.value[-1], 2)))
plt.ylabel('Objective')
plt.legend(loc='best')
plt.xlabel('Time')
plt.show()

fitting keras model for cat and dog image classification takes 50 minutes at each epoch. any way i can reduce time?

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
image_size = (180, 180)
batch_size = 32
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
"PetImages",
validation_split=0.2,
subset="training",
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
"PetImages",
validation_split=0.2,
subset="validation",
seed=1337,
image_size=image_size,
batch_size=batch_size,
)
data_augmentation = keras.Sequential(
[
layers.RandomFlip("horizontal"),
layers.RandomRotation(0.1),
]
)
train_ds = train_ds.prefetch(buffer_size=32)
val_ds = val_ds.prefetch(buffer_size=32)
def make_model(input_shape, num_classes):
inputs = keras.Input(shape=input_shape)
# Image augmentation block
x = data_augmentation(inputs)
# Entry block
x = layers.Rescaling(1.0 / 255)(x)
x = layers.Conv2D(32, 3, strides=2, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.Conv2D(64, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
previous_block_activation = x # Set aside residual
for size in [128, 256, 512, 728]:
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.SeparableConv2D(size, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.MaxPooling2D(3, strides=2, padding="same")(x)
# Project residual
residual = layers.Conv2D(size, 1, strides=2, padding="same")(
previous_block_activation
)
x = layers.add([x, residual]) # Add back residual
previous_block_activation = x # Set aside next residual
x = layers.SeparableConv2D(1024, 3, padding="same")(x)
x = layers.BatchNormalization()(x)
x = layers.Activation("relu")(x)
x = layers.GlobalAveragePooling2D()(x)
if num_classes == 2:
activation = "sigmoid"
units = 1
else:
activation = "softmax"
units = num_classes
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(units, activation=activation)(x)
return keras.Model(inputs, outputs)
model = make_model(input_shape=image_size + (3,), num_classes=2)
keras.utils.plot_model(model, show_shapes=True)
epochs = 50
callbacks = [
keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
]
model.compile(
optimizer=keras.optimizers.Adam(1e-3),
loss="binary_crossentropy",
metrics=["accuracy"],
)
model.fit(
train_ds, epochs=epochs, callbacks=callbacks, validation_data=val_ds,
)
So the strategy was to begin the model with the data_augmentation preprocessor, followed by a Rescaling layer and a dropout layer before the final classification layer as shown in the make_model function
for training the model as you can see I set epochs=50 and used buffered prefetching for my input data as it would yield data from disk without having I/O blocking. As for the rest of the parameters I think it was pretty standard. nothing too complicated but when I run my code each epoch is taking approximately 40 minutes and I don't know why.
Any suggestions?

tensorflow adapt for local rgb image classification

I was wondering how to adapt the following code from github batchnorm_five_layers to read in two classes (cats&dogs) from local image paths with image size 780x780 and RBG. Here is the uncommented code from the link:
# encoding: UTF-8
import tensorflow as tf
import tensorflowvisu
import math
from tensorflow.contrib.learn.python.learn.datasets.mnist import read_data_sets
tf.set_random_seed(0)
# Download images and labels into mnist.test (10K images+labels) and mnist.train (60K images+labels)
mnist = read_data_sets("data", one_hot=True, reshape=False, validation_size=0)
# input X: 28x28 grayscale images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 28, 28, 1])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 10])
# variable learning rate
lr = tf.placeholder(tf.float32)
# train/test selector for batch normalisation
tst = tf.placeholder(tf.bool)
# training iteration
iter = tf.placeholder(tf.int32)
# five layers and their number of neurons (tha last layer has 10 softmax neurons)
L = 200
M = 100
N = 60
P = 30
Q = 10
# Weights initialised with small random values between -0.2 and +0.2
# When using RELUs, make sure biases are initialised with small *positive* values for example 0.1 = tf.ones([K])/10
W1 = tf.Variable(tf.truncated_normal([784, L], stddev=0.1)) # 784 = 28 * 28
B1 = tf.Variable(tf.ones([L])/10)
W2 = tf.Variable(tf.truncated_normal([L, M], stddev=0.1))
B2 = tf.Variable(tf.ones([M])/10)
W3 = tf.Variable(tf.truncated_normal([M, N], stddev=0.1))
B3 = tf.Variable(tf.ones([N])/10)
W4 = tf.Variable(tf.truncated_normal([N, P], stddev=0.1))
B4 = tf.Variable(tf.ones([P])/10)
W5 = tf.Variable(tf.truncated_normal([P, Q], stddev=0.1))
B5 = tf.Variable(tf.ones([Q])/10)
def batchnorm(Ylogits, is_test, iteration, offset, convolutional=False):
exp_moving_avg = tf.train.ExponentialMovingAverage(0.999, iteration) # adding the iteration prevents from averaging across non-existing iterations
bnepsilon = 1e-5
if convolutional:
mean, variance = tf.nn.moments(Ylogits, [0, 1, 2])
else:
mean, variance = tf.nn.moments(Ylogits, [0])
update_moving_everages = exp_moving_avg.apply([mean, variance])
m = tf.cond(is_test, lambda: exp_moving_avg.average(mean), lambda: mean)
v = tf.cond(is_test, lambda: exp_moving_avg.average(variance), lambda: variance)
Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon)
return Ybn, update_moving_everages
def no_batchnorm(Ylogits, is_test, iteration, offset, convolutional=False):
return Ylogits, tf.no_op()
# The model
XX = tf.reshape(X, [-1, 784])
# batch norm scaling is not useful with relus
# batch norm offsets are used instead of biases
Y1l = tf.matmul(XX, W1)
Y1bn, update_ema1 = batchnorm(Y1l, tst, iter, B1)
Y1 = tf.nn.relu(Y1bn)
Y2l = tf.matmul(Y1, W2)
Y2bn, update_ema2 = batchnorm(Y2l, tst, iter, B2)
Y2 = tf.nn.relu(Y2bn)
Y3l = tf.matmul(Y2, W3)
Y3bn, update_ema3 = batchnorm(Y3l, tst, iter, B3)
Y3 = tf.nn.relu(Y3bn)
Y4l = tf.matmul(Y3, W4)
Y4bn, update_ema4 = batchnorm(Y4l, tst, iter, B4)
Y4 = tf.nn.relu(Y4bn)
Ylogits = tf.matmul(Y4, W5) + B5
Y = tf.nn.softmax(Ylogits)
update_ema = tf.group(update_ema1, update_ema2, update_ema3, update_ema4)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Y_)
cross_entropy = tf.reduce_mean(cross_entropy)*100
# accuracy of the trained model, between 0 (worst) and 1 (best)
correct_prediction = tf.equal(tf.argmax(Y, 1), tf.argmax(Y_, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# matplotlib visualisation
allweights = tf.concat([tf.reshape(W1, [-1]), tf.reshape(W2, [-1]), tf.reshape(W3, [-1])], 0)
allbiases = tf.concat([tf.reshape(B1, [-1]), tf.reshape(B2, [-1]), tf.reshape(B3, [-1])], 0)
# to use for sigmoid
#allactivations = tf.concat([tf.reshape(Y1, [-1]), tf.reshape(Y2, [-1]), tf.reshape(Y3, [-1]), tf.reshape(Y4, [-1])], 0)
# to use for RELU
allactivations = tf.concat([tf.reduce_max(Y1, [0]), tf.reduce_max(Y2, [0]), tf.reduce_max(Y3, [0]), tf.reduce_max(Y4, [0])], 0)
alllogits = tf.concat([tf.reshape(Y1l, [-1]), tf.reshape(Y2l, [-1]), tf.reshape(Y3l, [-1]), tf.reshape(Y4l, [-1])], 0)
I = tensorflowvisu.tf_format_mnist_images(X, Y, Y_)
It = tensorflowvisu.tf_format_mnist_images(X, Y, Y_, 1000, lines=25)
datavis = tensorflowvisu.MnistDataVis(title4="Logits", title5="Max activations across batch", histogram4colornum=2, histogram5colornum=2)
# training step, the learning rate is a placeholder
train_step = tf.train.AdamOptimizer(lr).minimize(cross_entropy)
# init
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# You can call this function in a loop to train the model, 100 images at a time
def training_step(i, update_test_data, update_train_data):
# training on batches of 100 images with 100 labels
batch_X, batch_Y = mnist.train.next_batch(100)
max_learning_rate = 0.03
min_learning_rate = 0.0001
decay_speed = 1000.0
learning_rate = min_learning_rate + (max_learning_rate - min_learning_rate) * math.exp(-i/decay_speed)
# compute training values for visualisation
if update_train_data:
a, c, im, al, ac = sess.run([accuracy, cross_entropy, I, alllogits, allactivations], {X: batch_X, Y_: batch_Y, tst: False})
print(str(i) + ": accuracy:" + str(a) + " loss: " + str(c) + " (lr:" + str(learning_rate) + ")")
datavis.append_training_curves_data(i, a, c)
datavis.update_image1(im)
datavis.append_data_histograms(i, al, ac)
# compute test values for visualisation
if update_test_data:
a, c, im = sess.run([accuracy, cross_entropy, It], {X: mnist.test.images, Y_: mnist.test.labels, tst: True})
print(str(i) + ": ********* epoch " + str(i*100//mnist.train.images.shape[0]+1) + " ********* test accuracy:" + str(a) + " test loss: " + str(c))
datavis.append_test_curves_data(i, a, c)
datavis.update_image2(im)
# the backpropagation training step
sess.run(train_step, {X: batch_X, Y_: batch_Y, lr: learning_rate, tst: False})
sess.run(update_ema, {X: batch_X, Y_: batch_Y, tst: False, iter: i})
datavis.animate(training_step, iterations=10000+1, train_data_update_freq=20, test_data_update_freq=100, more_tests_at_start=True)
print("max test accuracy: " + str(datavis.get_max_test_accuracy()))
To answer your question in the comments: this is probably what you want to change your code into:
# input X: images, the first dimension (None) will index the images in the mini-batch
X = tf.placeholder(tf.float32, [None, 780, 780, 3])
# correct answers will go here
Y_ = tf.placeholder(tf.float32, [None, 2])
And an image can be read like this:
from scipy import misc
input = misc.imread('input.png')
Now it might be best to follow a Tensorflow tutorial. This one is really good: kadenze.com/courses/creative-applications-of-deep-learning-with-tensorflow-iv/info
Good luck!

Resources