I am opening this new thread because I am looking for some to use Dymos, in order to simulate a dynamic system.
Indeed, I am trying to simulate a system which is composing of a pressurized bottle and a fluid inside. When t=0, the pressure is pushing the fluid through the bottle output, and as a result the pressure inside the bottle is decreasing. My aim is to simulate the behaviour of the pressure inside the bottle and the fluid volumic flow which is escaping from the bottle. If found an Dymos example whicih is very similar to what I am trying to do, but more simpler. https://openmdao.github.io/dymos/examples/water_rocket/water_rocket.html
To model my system, I am using two explicit components: the PressureRate, the VolumeFLowRate. Then I am defining the group component PressureModelODE to connect these two last components and their variables.
Here are these components:
class PressureRate(om.ExplicitComponent):
def initialize(self):
self.options.declare('num_nodes', types=int)
def setup(self):
nn = self.options['num_nodes']
# Inputs
self.add_input('p', shape=(nn,), desc='Pressure inside the nox bottle', units='Pa')
self.add_input('Vb', shape=(nn,), desc='Bottle volume', units='m**3')
self.add_input('Vl', shape=(nn,), desc='Liquid volume', units='m**3')
self.add_input('Vl_dot', shape=(nn,), desc='Liquid volume flow rate', units='m**3/s')
self.add_input('gamma', shape=(nn,), desc='Heat capacity ratio')
# Outputs
self.add_output('p_dot', val=np.ones(nn), desc='Pressure change rate', units='Pa/s')
self.declare_partials(of='*', wrt='*', method='fd')
def compute(self, inputs, outputs):
p = inputs['p']
Vb = inputs['Vb']
Vl = inputs['Vl']
Vl_dot = inputs['Vl_dot']
gamma = inputs['gamma']
outputs['p_dot'] = gamma * p/(Vb - Vl) * Vl_dot
class VolumeFlowRate(om.ExplicitComponent):
"""
A Dymos ODE for a damped harmonic oscillator.
"""
def initialize(self):
self.options.declare('num_nodes', types=int)
def setup(self):
nn = self.options['num_nodes']
def setup(self):
# Inputs
self.add_input('p', desc='Pressure inside the nox_bottle', units='Pa')
self.add_input('pout', desc='Pressure outside the nox_bottle', units='Pa')
self.add_input('deltap', desc='Nox bottle pressure losses', units='Pa')
self.add_input('rhol', desc='Liquid density', units='kg/m**3')
self.add_input('Aout', desc='Output nox_bottle area', units='m**2')
# Outputs
self.add_output('Vl_dot', desc='Volume flow rate', units='m**3/s')
self.declare_partials(of='*', wrt='*', method='fd')
def compute(self, inputs, outputs):
p = inputs['p']
pout = inputs['pout']
deltap = inputs['deltap']
rhol = inputs['rhol']
Aout = inputs['Aout']
outputs['Vl_dot'] = Aout*np.sqrt(2/rhol*(p - pout - deltap))
class BottleModelODE(om.Group):
def initialize(self):
self.options.declare('num_nodes', types=int)
def setup(self):
nn = self.options['num_nodes']
self.add_subsystem('pressure_rate', subsys=PressureRate(num_nodes=nn),
promotes_inputs=['p', "Vb", "Vl", "Vl_dot", "gamma"], promotes_outputs=['p_dot'])
self.add_subsystem('volume_flow_rate', subsys=VolumeFlowRate(num_nodes=nn),
promotes_inputs=['p', "pout", 'deltap', 'rhol', "Aout"], promotes_outputs=['Vl_dot'])
self.connect('pressure_rate.p', 'volume_flow_rate.p')
self.connect('pressure_rate.Vl_dot', 'volume_flow_rate.Vl_dot')
Then to solve these equations and simulate my model, I build a program based on the oscillator example: https://openmdao.github.io/dymos/getting_started/intro_to_dymos/intro_segments.html
I am defining a phase called "explusion" by using the following function:
def expulsion_phase_fn(transcription: dm.transcriptions.pseudospectral.radau_pseudospectral.Radau, pamb: float):
phase = dm.Phase(ode_class=BottleModelODE, transcription=transcription)
phase.set_time_options(fix_initial=True, fix_duration=True)
phase.add_state('p', units='bar', rate_source='pressure_rate.p_dot',
targets=['pressure_rate.p', "volume_flow_rate.p"], fix_initial=True, fix_final=False, lower=pamb)
phase.add_state('Vl', units='m**3', rate_source='volume_flow_rate.Vl_dot', targets=['pressure_rate.Vl'],
fix_initial=True, fix_final=False, lower=0)
phase.add_parameter('Vb', targets=['pressure_rate.Vb'], units='m**3')
phase.add_parameter('gamma', targets=['pressure_rate.gamma'])
phase.add_parameter('rhol', targets=['volume_flow_rate.rhol'], units='kg/m**3')
phase.add_parameter('Aout', targets=['volume_flow_rate.Aout'], units='m**2')
phase.add_parameter('pout', targets=['volume_flow_rate.pout'], units="Pa")
phase.add_parameter('deltap', targets=['volume_flow_rate.deltap'], units="Pa")
return phase
Then, I am defining a trajectory with this function:
def trajectory(pamb: float):
transcript = dm.Radau(num_segments=50, solve_segments='forward')
traj = dm.Trajectory()
# Add phases to trajectory
expulsion_phase = traj.add_phase('expulsion',
expulsion_phase_fn(transcription=transcript, pamb=pamb))
return traj, expulsion_phase
And finally, I am setting the OpenMDAO problem, provide the initial values,... by doing the following lines, which are based on the Oscillator example:
def launch_compt():
# Set ambiant conditions
Tamb = 20 + 273.15
pamb = 100*10**3
deltap = 0
Vb = 5*10**-3
Aout = 10*10**-4
# Set NOX bottle properties up
bottle_params = {"Vb": 5*10**-3, "gamma": 1.4, "Aout": 3*10**-2, "rhol": 1000, "pout":
100*10**3, pinit": 300*10**3, "Vl": 1*10**-3}
# Instantiate an OpenMDAO Problem instance
prob = om.Problem(model=om.Group())
prob.driver = om.ScipyOptimizeDriver(optimizer='SLSQP')
# Instantiate a Dymos trjectory and add it to the Problem model
traj, phase = trajectory(pamb= 100*10*3)
phase.add_objective("time", loc="final")
# Setup the OpenMDAO problem
prob.model.add_subsystem("traj", traj)
prob.setup()
# Assign values to the times and states
prob.set_val('traj.explusion.t_initial', 0.0)
prob.set_val('traj.explusion.t_duration', 200.0)
prob.set_val('traj.explusion.states:p', bottle_params["pinit"])
prob.set_val('traj.explusion.states:Vl', bottle_params["Vl"])
prob.set_val('traj.explusion.parameters:Vb', bottle_params["Vb"])
prob.set_val('traj.explusion.parameters:gamma', bottle_params["gamma"])
prob.set_val('traj.explusion.parameters:rhol', bottle_params["rhol"])
prob.set_val('traj.explusion.parameters:Aout', bottle_params["Aout"])
prob.set_val('traj.explusion.parameters:pout', bottle_params["pout"])
prob.set_val('traj.explusion.parameters:deltap', bottle_params["deltap"])
prob.run_driver()
Unofortunately, that does not work I cannot understand why. It returns me that the parameter Vb (Bottle total volume) is not provided but I cannot understand why: it is provided when I am adding the parameters to the problem, like within the Oscillator example.
In that respect I am contacting, in the hope to find some help.
Thank in advance for any answer.
PS: Here is the error message that I get when I am trying to run the program:
raise ValueError(f'Invalid parameter in phase `{self.pathname}`.\n{str(e)}') from e
ValueError: Invalid parameter in phase `traj.phases.expulsion`.
Parameter `Vb` has invalid target(s).
No such ODE input: 'pressure_rate.Vb'.
The primary issue that you have asked about, related to the No such ODE input error, is cased by the way you coded your ODE and more specifically the way you promoted variables and then added the ODE to the phase.
For example, you promoted your input P then set the state target to pressure_rate.P. This is incorrect. When you promoted P that effectively moved the name address up to the top level of the ODE, so the name target is just P now. You can read more about promotion vs connection in the docs. You have this problem in most of your script, where you are not accounting for promotion when you set targets.
Unfortunately, this is not the only issue in your script. There are several more, and enough that I am not able to get things fully working.
Here are some other modest issues in rough order of importance:
The VolumeFlowRate component input and outputs are scalar, but seem to be intended to connect to the vector (of size num_nodes) variables of PressureRate. I suspect you meant to make them vector as well, but am not 100% sure
You have an execution order issue between PressureRate and VolumeRate. Pressure rate seems to need as an input Vl_dot, which comes from VolumeRate`, but you have added it first so it will run BEFORE the component providing its input value.
You had a typo in your set_val calls (explusion vs expulsion)
You did not have a deltap key in the parameter diction, but you did have a variable for it.
After fixing those, I could get the problem to start running but it did not converge or give an answer. You had solve_segments set to forward and had set 50 segments. Both of those seemed like bad settings to me, so I changed them to 3 segments, and removed the solve_segments option.
Then I was able to get the optimizer to take a few steps, but it errored with
Current function value: [200.]
Iterations: 6
Function evaluations: 12
Gradient evaluations: 2
Optimization FAILED.
Positive directional derivative for linesearch
Which indicated a problem with the derivatives. So I changed your setting for partial derivatives from fd to cs. That allowed it to iterate more, but still didn't converge. Without diving more into the physics of your problem I can't easily diagnose this further. I suspect you have some bad boundary conditions and probably bad initial guesses though.
Here is the modified script I came up with to at least get the optimizer iterating.
import numpy as np
import openmdao.api as om
import dymos as dm
class PressureRate(om.ExplicitComponent):
def initialize(self):
self.options.declare('num_nodes', types=int)
def setup(self):
nn = self.options['num_nodes']
# Inputs
self.add_input('p', shape=(nn,), desc='Pressure inside the nox bottle', units='Pa')
self.add_input('Vb', shape=(nn,), desc='Bottle volume', units='m**3')
self.add_input('Vl', shape=(nn,), desc='Liquid volume', units='m**3')
self.add_input('Vl_dot', shape=(nn,), desc='Liquid volume flow rate', units='m**3/s')
self.add_input('gamma', shape=(nn,), desc='Heat capacity ratio')
# Outputs
self.add_output('p_dot', val=np.ones(nn), desc='Pressure change rate', units='Pa/s')
self.declare_partials(of='*', wrt='*', method='cs')
def compute(self, inputs, outputs):
p = inputs['p']
Vb = inputs['Vb']
Vl = inputs['Vl']
Vl_dot = inputs['Vl_dot']
gamma = inputs['gamma']
outputs['p_dot'] = gamma * p/(Vb - Vl) * Vl_dot
class VolumeFlowRate(om.ExplicitComponent):
"""
A Dymos ODE for a damped harmonic oscillator.
"""
def initialize(self):
self.options.declare('num_nodes', types=int)
def setup(self):
nn = self.options['num_nodes']
# Inputs
self.add_input('p', shape=(nn,), desc='Pressure inside the nox_bottle', units='Pa')
self.add_input('pout', shape=(nn,), desc='Pressure outside the nox_bottle', units='Pa')
self.add_input('deltap', shape=(nn,), desc='Nox bottle pressure losses', units='Pa')
self.add_input('rhol', shape=(nn,), desc='Liquid density', units='kg/m**3')
self.add_input('Aout', shape=(nn,), desc='Output nox_bottle area', units='m**2')
# Outputs
self.add_output('Vl_dot', shape=(nn,), desc='Volume flow rate', units='m**3/s')
self.declare_partials(of='*', wrt='*', method='cs')
def compute(self, inputs, outputs):
p = inputs['p']
pout = inputs['pout']
deltap = inputs['deltap']
rhol = inputs['rhol']
Aout = inputs['Aout']
outputs['Vl_dot'] = Aout*np.sqrt(2/rhol*(p - pout - deltap))
class BottleModelODE(om.Group):
def initialize(self):
self.options.declare('num_nodes', types=int)
def setup(self):
nn = self.options['num_nodes']
self.add_subsystem('volume_flow_rate', subsys=VolumeFlowRate(num_nodes=nn),
promotes_inputs=['p', "pout", 'deltap', 'rhol', "Aout"], promotes_outputs=['Vl_dot'])
self.add_subsystem('pressure_rate', subsys=PressureRate(num_nodes=nn),
promotes_inputs=['p', "Vb", "Vl", "Vl_dot", "gamma"], promotes_outputs=['p_dot'])
def expulsion_phase_fn(transcription: dm.transcriptions.pseudospectral.radau_pseudospectral.Radau, pamb: float):
phase = dm.Phase(ode_class=BottleModelODE, transcription=transcription)
phase.set_time_options(fix_initial=True, fix_duration=True)
phase.add_state('p', units='bar', rate_source='p_dot',
targets=['p'], fix_initial=True, fix_final=False, lower=pamb)
phase.add_state('Vl', units='m**3', rate_source='Vl_dot', targets=['Vl'],
fix_initial=True, fix_final=False, lower=0)
phase.add_parameter('Vb', targets=['Vb'], units='m**3')
phase.add_parameter('gamma', targets=['gamma'])
phase.add_parameter('rhol', targets=['rhol'], units='kg/m**3')
phase.add_parameter('Aout', targets=['Aout'], units='m**2')
phase.add_parameter('pout', targets=['pout'], units="Pa")
phase.add_parameter('deltap', targets=['deltap'], units="Pa")
return phase
def trajectory(pamb: float):
# transcript = dm.Radau(num_segments=50, solve_segments='forward')
transcript = dm.Radau(num_segments=3)
traj = dm.Trajectory()
# Add phases to trajectory
expulsion_phase = traj.add_phase('expulsion', expulsion_phase_fn(transcription=transcript, pamb=pamb))
return traj, expulsion_phase
if __name__ == "__main__":
# Set ambiant conditions
Tamb = 20 + 273.15
pamb = 100*10**3
deltap = 0
Vb = 5*10**-3
Aout = 10*10**-4
# Set NOX bottle properties up
bottle_params = {"Vb": 5*10**-3, "gamma": 1.4, "Aout": 3*10**-2, "rhol": 1000, "pout": 100*10**3, "pinit": 300*10**3, "Vl": 1*10**-3}
# Instantiate an OpenMDAO Problem instance
prob = om.Problem(model=om.Group())
prob.driver = om.ScipyOptimizeDriver(optimizer='SLSQP')
# Instantiate a Dymos trjectory and add it to the Problem model
traj, phase = trajectory(pamb=100*10*3)
phase.add_objective("time", loc="final")
# Setup the OpenMDAO problem
prob.model.add_subsystem("traj", traj)
prob.setup()
# Assign values to the times and states
prob.set_val('traj.expulsion.t_initial', 0.0)
prob.set_val('traj.expulsion.t_duration', 200.0)
prob.set_val('traj.expulsion.states:p', bottle_params["pinit"])
prob.set_val('traj.expulsion.states:Vl', bottle_params["Vl"])
prob.set_val('traj.expulsion.parameters:Vb', bottle_params["Vb"])
prob.set_val('traj.expulsion.parameters:gamma', bottle_params["gamma"])
prob.set_val('traj.expulsion.parameters:rhol', bottle_params["rhol"])
prob.set_val('traj.expulsion.parameters:Aout', bottle_params["Aout"])
prob.set_val('traj.expulsion.parameters:pout', bottle_params["pout"])
prob.set_val('traj.expulsion.parameters:deltap', deltap)
prob.run_driver()
Related
I have a convolutional neural network. And I wanted to train it on images from the training set but first they should be wrapped with my function change(tensor, float) that takes in a tensor/image of the form [hight,width,3] and a float.
Batch size =4
loading data
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
shuffle=True, num_workers=2)
Cnn architecture
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs; data is a list of [inputs, labels]
inputs, labels = data
#size of inputs [4,3,32,32]
#size of labels [4]
inputs = change(inputs,0.1) <----------------------------
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs) #[4, 10]
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
running_loss = 0.0
print('Finished Training')
I am trying to apply the image function change but it gives an object error.
it there a quick way to fix it?
I am using a Julia function but it works completely fine with other objects. Error message:
JULIA: MethodError: no method matching copy(::PyObject)
Closest candidates are:
copy(!Matched::T) where T<:SHA.SHA3_CTX at /opt/julia-1.7.2/share/julia/stdlib/v1.7/SHA/src/types.jl:213
copy(!Matched::T) where T<:SHA.SHA2_CTX at /opt/julia-1.7.2/share/julia/stdlib/v1.7/SHA/src/types.jl:212
copy(!Matched::Number) at /opt/julia-1.7.2/share/julia/base/number.jl:113
I would recommend to put change function to transforms list, so you do data changes on transformation stage.
partial from functools will help you to fix number of arguments, like this:
from functools import partial
def change(input, float):
pass
# Use partial to fix number of params, such that change accepts only input
change_partial = partial(change, float=pass_float_value_here)
# Add change_partial to a list of transforms before or after converting to tensors
transforms = Compose([
RandomResizedCrop(img_size), # example
# Add change_partial here if it operates on PIL Image
change_partial,
ToTensor(), # convert to tensor
# Add change_partial here if it operates on torch tensors
change_partial,
])
I am training a VAE (using federated learning, but that is not so important) and wanted to keep the loss and train functions simple to exchange. The initial approach was to have a tf.function as loss function and a tf.function as train function as follows:
#tf.function
def kl_reconstruction_loss(model, model_input, beta):
x, y = model_input
mean, logvar = model.encode(x, y)
z = model.reparameterize(mean, logvar)
x_logit = model.decode(z, y)
cross_ent = tf.nn.sigmoid_cross_entropy_with_logits(logits=x_logit, labels=x)
reconstruction_loss = tf.reduce_mean(tf.reduce_sum(cross_ent, axis=[1, 2, 3]), axis=0)
kl_loss = tf.reduce_mean(0.5 * tf.reduce_sum(tf.exp(logvar) + tf.square(mean) - 1. - logvar, axis=-1), axis=0)
loss = reconstruction_loss + beta * kl_loss
return loss, kl_loss, reconstruction_loss
#tf.function
def train_fn(model: tf.keras.Model, batch, optimizer, kl_beta):
"""Trains the model on a single batch.
Args:
model: The VAE model.
batch: A batch of inputs [images, labels] for the vae.
optimizer: The optimizer to train the model.
beta: Weighting of KL loss
Returns:
The loss.
"""
def vae_loss():
"""Does the forward pass and computes losses for the generator."""
# N.B. The complete pass must be inside loss() for gradient tracing.
return kl_reconstruction_loss(model, batch, kl_beta)
with tf.GradientTape() as tape:
loss, kl_loss, rc_loss = vae_loss()
grads = tape.gradient(loss, model.trainable_variables)
grads_and_vars = zip(grads, model.trainable_variables)
optimizer.apply_gradients(grads_and_vars)
return loss
For my dataset this results in an epoch duration of approx. 25 seconds. However, since I have to call those functions directly in my code, I would have to enter different ones if I would want to try out different loss/train functions.
So, alternatively, I followed https://github.com/google-research/federated/tree/master/gans and wrapped the loss function in a class and the train function in another function. Now I have:
class VaeKlReconstructionLossFns(AbstractVaeLossFns):
#tf.function
def vae_loss(self, model, model_input, labels, global_round):
# KL Reconstruction loss
mean, logvar = model.encode(model_input, labels)
z = model.reparameterize(mean, logvar)
x_logit = model.decode(z, labels)
cross_ent = tf.nn.sigmoid_cross_entropy_with_logits(logits=x_logit, labels=model_input)
reconstruction_loss = tf.reduce_mean(tf.reduce_sum(cross_ent, axis=[1, 2, 3]), axis=0)
kl_loss = tf.reduce_mean(0.5 * tf.reduce_sum(tf.exp(logvar) + tf.square(mean) - 1. - logvar, axis=-1), axis=0)
loss = reconstruction_loss + self._get_beta(global_round) * kl_loss
if model.losses:
loss += tf.add_n(model.losses)
return loss, kl_loss, reconstruction_loss
def create_train_vae_fn(
vae_loss_fns: vae_losses.AbstractVaeLossFns,
vae_optimizer: tf.keras.optimizers.Optimizer):
"""Create a function that trains VAE, binding loss and optimizer.
Args:
vae_loss_fns: Instance of gan_losses.AbstractVAELossFns interface,
specifying the VAE training loss.
vae_optimizer: Optimizer for training the VAE.
Returns:
Function that executes one step of VAE training.
"""
# We check that the optimizer has not been used previously, which ensures
# that when it is bound the train fn isn't holding onto a different copy of
# the optimizer variables then the copy that is being exchanged b/w server and
# clients.
if vae_optimizer.variables():
raise ValueError(
'Expected vae_optimizer to not have been used previously, but '
'variables were already initialized.')
#tf.function
def train_vae_fn(model: tf.keras.Model,
model_inputs,
labels,
global_round,
new_optimizer_state=None):
"""Trains the model on a single batch.
Args:
model: The VAE model.
model_inputs: A batch of inputs (usually images) for the VAE.
labels: A batch of labels corresponding to the inputs.
global_round: The current glob al FL round for beta calculation
new_optimizer_state: A possible optimizer state to overwrite the current one with.
Returns:
The number of examples trained on.
The loss.
The updated optimizer state.
"""
def vae_loss():
"""Does the forward pass and computes losses for the generator."""
# N.B. The complete pass must be inside loss() for gradient tracing.
return vae_loss_fns.vae_loss(model, model_inputs, labels, global_round)
# Set optimizer vars
optimizer_state = get_optimizer_state(vae_optimizer)
if new_optimizer_state is not None:
# if optimizer is uninitialised, initialise vars
try:
tf.nest.assert_same_structure(optimizer_state, new_optimizer_state)
except ValueError:
initialize_optimizer_vars(vae_optimizer, model)
optimizer_state = get_optimizer_state(vae_optimizer)
tf.nest.assert_same_structure(optimizer_state, new_optimizer_state)
tf.nest.map_structure(lambda a, b: a.assign(b), optimizer_state, new_optimizer_state)
with tf.GradientTape() as tape:
loss, kl_loss, rc_loss = vae_loss()
grads = tape.gradient(loss, model.trainable_variables)
grads_and_vars = zip(grads, model.trainable_variables)
vae_optimizer.apply_gradients(grads_and_vars)
return tf.shape(model_inputs)[0], loss, optimizer_state
return train_vae_fn
This new formulation takes about 86 seconds per epoch.
I am struggling to understand why the second version performs so much worse than the first one. Does anyone have a good explanation for this?
Thanks in advance!
EDIT: My Tensorflow version is 2.5.0
I am new to programming and used Google OR-tools to create my VRP model. In my current model, I have included a general time window and capacity constraint per vehicle, creating a capacitated vehicle routing problem with time windows. I followed the OR-tools guides which contains a maximum travel duration for each vehicle.
However, I want to include a maximum travel duration for the sum of all routes, whereas the maximum travel duration for each vehicle does not matter (so I set it to 100.000). Accorddingly, I want to create something in the model/solution printer that tells me which amount of addresses could not be visited due to the constraint on the maximum travel duration for the sum of all routes. From the examples I have seen I think it would be kind of easy, but my knowledge on programming is fairly limited, so my attempts had no succes. Can anyone help me?
import pandas as pd
import openpyxl
import numpy as np
import math
from random import sample
from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp
from scipy.spatial.distance import squareform, pdist
from haversine import haversine
#STEP - create data
# import/read excel file
data = pd.read_excel(r'C:\Users\Jean-Paul\Documents\Thesis\OR TOOLS\Data.xlsx', engine = 'openpyxl')
df = pd.DataFrame(data, columns= ['number','lat','lng']) # create dataframe with 10805 addresses + address of the depot
#print (df)
# randomly sample X addresses from the dataframe and their corresponding number/latitude/longtitude
df_sample = df.sample(n=100)
#print (df_data)
# read first row of the excel file (= coordinates of the depot)
df_depot = pd.DataFrame(data, columns= ['number','lat','lng']).iloc[0:1]
#print (df_depot)
# combine dataframe of depot and sample into one dataframe
df_data = pd.concat([df_depot, df_sample], ignore_index=True, sort=False)
#print (df_data)
#STEP - create distance matrix data
# determine distance between latitude and longtitude
df_data.set_index('number', inplace=True)
matrix_distance = pd.DataFrame(squareform(pdist(df_data, metric=haversine)), index=df_data.index, columns=df_data.index)
matrix_list = np.array(matrix_distance)
#print (matrix_distance) # create table of distances between addresses including headers
#print (matrix_list) # converting table to list of lists and exclude headers
#STEP - create time matrix data
travel_time = matrix_list / 15 * 60 # divide distance by travel speed 20 km/h and multiply by 60 minutes
#print (travel_time) # converting distance matrix to travel time matrix
#STEP - create time window data
# create list for each sample - couriers have to visit this address within 0-X minutes of time using a list of lists
window_range = []
for i in range(len(df_data)):
list = [0, 240]
window_range.append(list) # create list of list with a time window range for each address
#print (window_range)
#STEP - create demand data
# create list for each sample - all addresses demand 1 parcel except the depot
demand_range = []
for i in range(len(df_data.iloc[0:1])):
list = 0
demand_range.append(list)
for j in range(len(df_data.iloc[1:])):
list2 = 1
demand_range.append(list2)
#print (demand_range)
#STEP - create fleet size data # amount of vehicles in the fleet
fleet_size = 6
#print (fleet_size)
#STEP - create capacity data for each vehicle
fleet_capacity = []
for i in range(fleet_size): # capacity per vehicle
list = 20
fleet_capacity.append(list)
#print (fleet_capacity)
#STEP - create data model that stores all data for the problem
def create_data_model():
data = {}
data['time_matrix'] = travel_time
data['time_windows'] = window_range
data['num_vehicles'] = fleet_size
data['depot'] = 0 # index of the depot
data['demands'] = demand_range
data['vehicle_capacities'] = fleet_capacity
return data
#STEP - creating the solution printer
def print_solution(data, manager, routing, solution):
"""Prints solution on console."""
print(f'Objective: {solution.ObjectiveValue()}')
time_dimension = routing.GetDimensionOrDie('Time')
total_time = 0
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
plan_output = 'Route for vehicle {}:\n'.format(vehicle_id)
while not routing.IsEnd(index):
time_var = time_dimension.CumulVar(index)
plan_output += '{0} Time({1},{2}) -> '.format(
manager.IndexToNode(index), solution.Min(time_var),
solution.Max(time_var))
index = solution.Value(routing.NextVar(index))
time_var = time_dimension.CumulVar(index)
plan_output += '{0} Time({1},{2})\n'.format(manager.IndexToNode(index),
solution.Min(time_var),
solution.Max(time_var))
plan_output += 'Time of the route: {}min\n'.format(
solution.Min(time_var))
print(plan_output)
total_time += solution.Min(time_var)
print('Total time of all routes: {}min'.format(total_time))
#STEP - create the VRP solver
def main():
# instantiate the data problem
data = create_data_model()
# create the routing index manager
manager = pywrapcp.RoutingIndexManager(len(data['time_matrix']),
data['num_vehicles'], data['depot'])
# create routing model
routing = pywrapcp.RoutingModel(manager)
#STEP - create demand callback and dimension for capacity
# create and register a transit callback
def demand_callback(from_index):
"""Returns the demand of the node."""
# convert from routing variable Index to demands NodeIndex
from_node = manager.IndexToNode(from_index)
return data['demands'][from_node]
demand_callback_index = routing.RegisterUnaryTransitCallback(
demand_callback)
routing.AddDimensionWithVehicleCapacity(
demand_callback_index,
0, # null capacity slack
data['vehicle_capacities'], # vehicle maximum capacities
True, # start cumul to zero
'Capacity')
#STEP - create time callback
# create and register a transit callback
def time_callback(from_index, to_index):
"""Returns the travel time between the two nodes."""
# convert from routing variable Index to time matrix NodeIndex
from_node = manager.IndexToNode(from_index)
to_node = manager.IndexToNode(to_index)
return data['time_matrix'][from_node][to_node]
transit_callback_index = routing.RegisterTransitCallback(time_callback)
# define cost of each Arc (costs in terms of travel time)
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)
# STEP - create a dimension for the travel time (TIMEWINDOW) - dimension keeps track of quantities that accumulate over a vehicles route
# add time windows constraint
time = 'Time'
routing.AddDimension(
transit_callback_index,
2, # allow waiting time (does not have an influence in this model)
100000, # maximum total route lenght in minutes per vehicle (does not have an influence because of capacity constraint)
False, # do not force start cumul to zero
time)
time_dimension = routing.GetDimensionOrDie(time)
# add time window constraints for each location except depot
for location_idx, time_window in enumerate(data['time_windows']):
if location_idx == data['depot']:
continue
index = manager.NodeToIndex(location_idx)
time_dimension.CumulVar(index).SetRange(time_window[0], time_window[1])
# add time window constraint for each vehicle start node
depot_idx = data['depot']
for vehicle_id in range(data['num_vehicles']):
index = routing.Start(vehicle_id)
time_dimension.CumulVar(index).SetRange(
data['time_windows'][depot_idx][0],
data['time_windows'][depot_idx][1])
#STEP - instantiate route start and end times to produce feasible times
for i in range(data['num_vehicles']):
routing.AddVariableMinimizedByFinalizer(
time_dimension.CumulVar(routing.Start(i)))
routing.AddVariableMinimizedByFinalizer(
time_dimension.CumulVar(routing.End(i)))
#STEP - setting default search parameters and a heuristic method for finding the first solution
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (
routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)
#STEP - solve the problem with the serach parameters and print solution
solution = routing.SolveWithParameters(search_parameters)
if solution:
print_solution(data, manager, routing, solution)
if __name__ == '__main__':
main()
See #Mizux's answer, going under-the-hood in the solver to make a summation cost over all vehicle route lengths:
https://stackoverflow.com/a/68756570/13773745
Doing things on Google Colab.
transformers: 4.10.2
pytorch-lightning: 1.2.7
import torch
from torch.utils.data import DataLoader
from transformers import BertJapaneseTokenizer, BertForSequenceClassification
import pytorch_lightning as pl
dataset_for_loader = [
{'data':torch.tensor([0,1]), 'labels':torch.tensor(0)},
{'data':torch.tensor([2,3]), 'labels':torch.tensor(1)},
{'data':torch.tensor([4,5]), 'labels':torch.tensor(2)},
{'data':torch.tensor([6,7]), 'labels':torch.tensor(3)},
]
loader = DataLoader(dataset_for_loader, batch_size=2)
for idx, batch in enumerate(loader):
print(f'# batch {idx}')
print(batch)
category_list = [
'dokujo-tsushin',
'it-life-hack',
'kaden-channel',
'livedoor-homme',
'movie-enter',
'peachy',
'smax',
'sports-watch',
'topic-news'
]
tokenizer = BertJapaneseTokenizer.from_pretrained(MODEL_NAME)
max_length = 128
dataset_for_loader = []
for label, category in enumerate(tqdm(category_list)):
# file ./text has lots of articles, categorized by category
# and they are just plain texts, whose content begins from forth line
for file in glob.glob(f'./text/{category}/{category}*'):
lines = open(file).read().splitlines()
text = '\n'.join(lines[3:])
encoding = tokenizer(
text,
max_length=max_length,
padding='max_length',
truncation=True
)
encoding['labels'] = label
encoding = { k: torch.tensor(v) for k, v in encoding.items() }
dataset_for_loader.append(encoding)
SEED=lambda:0.0
# random.shuffle(dataset_for_loader) # ランダムにシャッフル
random.shuffle(dataset_for_loader,SEED)
n = len(dataset_for_loader)
n_train = int(0.6*n)
n_val = int(0.2*n)
dataset_train = dataset_for_loader[:n_train]
dataset_val = dataset_for_loader[n_train:n_train+n_val]
dataset_test = dataset_for_loader[n_train+n_val:]
dataloader_train = DataLoader(
dataset_train, batch_size=32, shuffle=True
)
dataloader_val = DataLoader(dataset_val, batch_size=256)
dataloader_test = DataLoader(dataset_test, batch_size=256)
class BertForSequenceClassification_pl(pl.LightningModule):
def __init__(self, model_name, num_labels, lr):
super().__init__()
self.save_hyperparameters()
self.bert_sc = BertForSequenceClassification.from_pretrained(
model_name,
num_labels=num_labels
)
def training_step(self, batch, batch_idx):
output = self.bert_sc(**batch)
loss = output.loss
self.log('train_loss', loss)
return loss
def validation_step(self, batch, batch_idx):
output = self.bert_sc(**batch)
val_loss = output.loss
self.log('val_loss', val_loss)
def test_step(self, batch, batch_idx):
labels = batch.pop('labels')
output = self.bert_sc(**batch)
labels_predicted = output.logits.argmax(-1)
num_correct = ( labels_predicted == labels ).sum().item()
accuracy = num_correct/labels.size(0)
self.log('accuracy', accuracy)
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=self.hparams.lr)
checkpoint = pl.callbacks.ModelCheckpoint(
monitor='val_loss',
mode='min',
save_top_k=1,
save_weights_only=True,
dirpath='model/',
)
trainer = pl.Trainer(
gpus=1,
max_epochs=10,
callbacks = [checkpoint]
)
model = BertForSequenceClassification_pl(
MODEL_NAME, num_labels=9, lr=1e-5
)
### (a) ###
# I think this is where I am doing fine-tuning
trainer.fit(model, dataloader_train, dataloader_val)
# this is to score after fine-tuning
test = trainer.test(test_dataloaders=dataloader_test)
print(f'Accuracy: {test[0]["accuracy"]:.2f}')
But I am not really sure how to do a test before fine-tuning, in order to compare two models before and after fine-tuning, in order to show how effective fine-tuning is.
Inserting the following two lines to ### (a) ###:
test = trainer.test(test_dataloaders=dataloader_test)
print(f'Accuracy: {test[0]["accuracy"]:.2f}')
I got this result:
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-13-c8b2c67f2d5c> in <module>()
9
10 # 6-19
---> 11 test = trainer.test(test_dataloaders=dataloader_test)
12 print(f'Accuracy: {test[0]["accuracy"]:.2f}')
13
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in test(self, model, test_dataloaders, ckpt_path, verbose, datamodule)
896 self.verbose_test = verbose
897
--> 898 self._set_running_stage(RunningStage.TESTING, model or self.lightning_module)
899
900 # If you supply a datamodule you can't supply train_dataloader or val_dataloaders
/usr/local/lib/python3.7/dist-packages/pytorch_lightning/trainer/trainer.py in _set_running_stage(self, stage, model_ref)
563 the trainer and the model
564 """
--> 565 model_ref.running_stage = stage
566 self._running_stage = stage
567
AttributeError: 'NoneType' object has no attribute 'running_stage'
I noticed that Trainer.fit() can take None as arguments other than model, so I tried this:
trainer.fit(model)
test=trainer.test(test_dataloaders=dataloader_test)
print(f'Accuracy: {test[0]["accuracy"]:.2f}')
The result:
MisconfigurationException: No `train_dataloader()` method defined. Lightning `Trainer` expects as minimum a `training_step()`, `train_dataloader()` and `configure_optimizers()` to be defined.
Thanks.
The Trainer needs to call its .fit() in order to set up a lot of things and then only you can do .test() or other methods.
You are right about putting a .fit() just before .test() but the fit call needs to a valid one. You have to feed a dataloader/datamodule to it. But since you don't want to do a training/validation in this fit call, just pass limit_[train/val]_batches=0 while Trainer construction.
trainer = Trainer(gpus=..., ..., limit_train_batches=0, limit_val_batches=0)
trainer.fit(model, dataloader_train, dataloader_val)
trainer.test(model, dataloader_test) # without fine-tuning
The fit call here will just set things up for you and skip training/validation. And then the testing follows. Next time run the same code but without the limit_[train/val]_batches, this will do the pretraining for you
trainer = Trainer(gpus=..., ...)
trainer.fit(model, dataloader_train, dataloader_val)
trainer.test(model, dataloader_test) # with fine-tuning
Clarifying a bit about .fit() taking None for all but model: Its not quite true - you must provide either a DataLoader or a DataModule.
After some research, I was able to predict the future value using the LSTM code below. I have also attached the Dmd1ahr.csv file in the github link that I am using.
https://github.com/ukeshchawal/hello-world/blob/master/Dmd1ahr.csv
As you all can see below, 90 data points are training sets and 91st to 100th are future value prediction.
However some of the questions that I still have are:
In order to predict these values I had to originally take more than hundred data sets (here, I have taken 500 data sets) which is not exactly what my primary goal is. Is there a way that given 500 data sets, it will predict the rest 10 or 20 out of sample data points? If yes, will you please write me a sample code where you can just take 500 data points from Dmd1ahr.csv file attached below and it will predict some future values (say 501 to 520) based on those 500 points?
The prediction are way off compared to the one who have in your blogs (definitely indicates for parameter tuning - I tried changing epochs, LSTM layers, Activation, optimizer). What other parameter tuning I can do to make it more robust?
Thank you'll in advance.
import numpy as np
import matplotlib.pyplot as plt
import pandas
# By twaking the architecture it could be made more robust
np.random.seed(7)
numOfSamples = 500
lengthTrain = 90
lengthValidation = 100
look_back = 1 # Can be set higher, in my experiments it made performance worse though
transientTime = 90 # Time to "burn in" time series
series = pandas.read_csv('Dmd1ahr.csv')
def generateTrainData(series, i, look_back):
return series[i:look_back+i+1]
trainX = np.stack([generateTrainData(series, i, look_back) for i in range(lengthTrain)])
testX = np.stack([generateTrainData(series, lengthTrain + i, look_back) for i in range(lengthValidation)])
trainX = trainX.reshape((lengthTrain,look_back+1,1))
testX = testX.reshape((lengthValidation, look_back + 1, 1))
trainY = trainX[:,1:,:]
trainX = trainX[:,:-1,:]
testY = testX[:,1:,:]
testX = testX[:,:-1,:]
############### Build Model ###############
import keras
from keras.models import Model
from keras import layers
from keras import regularizers
inputs = layers.Input(batch_shape=(1,look_back,1), name="main_input")
inputsAux = layers.Input(batch_shape=(1,look_back,1), name="aux_input")
# this layer makes the actual prediction, i.e. decides if and how much it goes up or down
x = layers.recurrent.LSTM(300,return_sequences=True, stateful=True)(inputs)
x = layers.recurrent.LSTM(200,return_sequences=True, stateful=True)(inputs)
x = layers.recurrent.LSTM(100,return_sequences=True, stateful=True)(inputs)
x = layers.recurrent.LSTM(50,return_sequences=True, stateful=True)(inputs)
x = layers.wrappers.TimeDistributed(layers.Dense(1, activation="linear",
kernel_regularizer=regularizers.l2(0.005),
activity_regularizer=regularizers.l1(0.005)))(x)
# auxillary input, the current input will be feed directly to the output
# this way the prediction from the step before will be used as a "base", and the Network just have to
# learn if it goes a little up or down
auxX = layers.wrappers.TimeDistributed(layers.Dense(1,
kernel_initializer=keras.initializers.Constant(value=1),
bias_initializer='zeros',
input_shape=(1,1), activation="linear", trainable=False
))(inputsAux)
outputs = layers.add([x, auxX], name="main_output")
model = Model(inputs=[inputs, inputsAux], outputs=outputs)
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['mean_squared_error'])
#model.summary()
#model.fit({"main_input": trainX, "aux_input": trainX[look_back-1,look_back,:]},{"main_output": trainY}, epochs=4, batch_size=1, shuffle=False)
model.fit({"main_input": trainX, "aux_input": trainX[:,look_back-1,:].reshape(lengthTrain,1,1)},{"main_output": trainY}, epochs=100, batch_size=1, shuffle=False)
############### make predictions ###############
burnedInPredictions = np.zeros(transientTime)
testPredictions = np.zeros(len(testX))
# burn series in, here use first transitionTime number of samples from test data
for i in range(transientTime):
prediction = model.predict([np.array(testX[i, :, 0].reshape(1, look_back, 1)), np.array(testX[i, look_back - 1, 0].reshape(1, 1, 1))])
testPredictions[i] = prediction[0,0,0]
burnedInPredictions[:] = testPredictions[:transientTime]
# prediction, now dont use any previous data whatsoever anymore, network just has to run on its own output
for i in range(transientTime, len(testX)):
prediction = model.predict([prediction, prediction])
testPredictions[i] = prediction[0,0,0]
# for plotting reasons
testPredictions[:np.size(burnedInPredictions)-1] = np.nan
############### plot results ###############
#import matplotlib.pyplot as plt
plt.plot(testX[:, 0, 0])
plt.show()
plt.plot(burnedInPredictions, label = "training")
plt.plot(testPredictions, label = "prediction")
plt.legend()
plt.show()