My model is trying to learn a single eigenvalues and eigenvectors that is common to all input matrices - matrix

I am having an issue with my current model. Any help would be appreciated. My model is not working because it is trying to learn a single eigenvalues and eigenvectors that is common to all input matrices. To counter that I tried to change my architecture but the outcome is the same. It is my current architecture:
class EigenNet(nn.Module):
def __init__(self):
super(EigenNet, self).__init__()
self.fc1 = nn.Linear(9, 128).double()
self.fc_hidden_2 = nn.Linear(128, 256).double()
self.fc_hidden_3 = nn.Linear(256,512).double()
# self.hidden_module_list = torch.nn.ModuleList([torch.nn.ModuleList([nn.Linear(256,256).double(),nn.Linear(256,256).double(),
# nn.Linear(256,256).double()]) for i in range(20)])
self.dec_net_eigvec = torch.nn.ModuleList([nn.Linear(512,256).double(),nn.Linear(256,128).double(),nn.Linear(128,64).double(),nn.Linear(64,32).double()])
self.dec_net_eigval = torch.nn.ModuleList([nn.Linear(512,256).double(),nn.Linear(256,128).double(),nn.Linear(128,64).double(),nn.Linear(64,32).double()])
self.fc_eigval = nn.Linear(32,3).double()
self.fc_eigvec = nn.Linear(32, 9).double()
def forward(self, x):
x = torch.nn.functional.relu(self.fc1(x))
x = torch.nn.functional.relu(self.fc_hidden_2(x))
x = torch.nn.functional.relu(self.fc_hidden_3(x))
y=torch.clone(x)
for l in self.dec_net_eigvec:
y=torch.nn.functional.relu(l(y))
z=torch.clone(x)
for l in self.dec_net_eigvec:
z=torch.nn.functional.relu(l(z))
val = self.fc_eigval(z)
vec = self.fc_eigvec(y)
return (val,vec)
Here, the given matrices is real symmetric matrix
it should try to learn different eigenvalues and eigenvectors for each matrices but it is learning a single eigenvalues and eigenvectors that is common to all input matrices.

Related

Julia: isposdef() fails for large matrices?

I have a positive definite covariance matrix C of size 3n x 3n constructed from n^2 blocks of size 3x3.
Running MvNormal (with e.g a zero mean vector) on this matrix to draw Gaussian random vectors, I am getting the error
PosDefException: matrix is not positive definite; Cholesky factorization failed.
and indeed checking isposef(C) returns false when n becomes too large. However my matrix should be positive definite for any n, so it seems that there is some kind of numerical instability (perhaps due to the determinant becoming too small or too large beyond machine precision).
The reproducible code I am using to generate C is below:
#######################################################
# inputs
grid_size=10
l_sq = 1
xmax = 2
#######################################################
# kernel function used to construct covariance matrix C
function corr(x, y, l_sq)
v=x-y
d_sq=sum(v.^2)
n = d_sq/(2l_sq)
return exp(-n)*(Matrix{Float64}(I, length(x), length(x)))
end
nb_grid_points = grid_size^3
gaussian_vector_dim = 3*nb_grid_points
oneD_grid = LinRange(-xmax, xmax, grid_size)
# get input set X which indexes grid points
threeD_grid = collect.(Iterators.product(oneD_grid, oneD_grid, oneD_grid))
grid_points = vec(reshape(threeD_grid,:,1))
########################################
# build C by blocks
C = Array{Float64}(undef, gaussian_vector_dim, gaussian_vector_dim)
for i in 1:nb_grid_points
for j in 1:nb_grid_points
#block covariance matrix C consist of DxD correlation-function matrices K_i,j for i,j=1,...,nb_grid_points
C[3*(i-1)+1:(3*i),(3*(j-1)+1):(3*j)] = corr(grid_points[i], grid_points[j],l_sq)
end
end
#########################################
# plot covariance matrix
plt.imshow(C,cmap="Blues", interpolation="none")
plt.colorbar()
plt.title("Covariance matrix")
#########################################
print("C is symmetric:",issymmetric(C))
print("\ndet C=",det(C))
print("\nC is positive definite=",isposdef(C))
Maintaining, l_sq = 1 , xmax = 2, the code above gives isposdef(C) = false when grid_size=10 but isposdef(C)=true if grid_size is 9 or less.
Why is this failure occurring and how can I fix it? Perhaps I can help Julia by indicating that the covariance matrix is sparse?

state_cov is being ignored when using the Kalman filter in statsmodels

I am trying to create an affine term structure model derived from statsmodels.tsa.statespace.MLEModel (code below) which is initialized using least squares.
'''
class affine_term_structure(sm.tsa.statespace.MLEModel):
def __init__(self, yields, tau, k_states=3, **kwargs):
# Initialize the statespace
super(affine_term_structure, self).__init__(yields, k_states=k_states, **kwargs)
self.initialize_known(np.zeros(self.k_states), np.eye(self.k_states) * 10000)
def update(self, params, **kwargs):
params = super(dynamic_nelson_siegel, self).update(params, **kwargs)
# Extract the parameters
Phi = np.reshape(params[:9], (3, 3))
k = np.array(params[9:12])
Sx = np.zeros((3, 3))
Sx[np.tril_indices(3)] = params[12:18]
lmbd = params[18]
sy = params[-1]
b = self.nss(self.tau, lmbd)
self['transition'] = Phi # transition matrix
self['state_intercept'] = k # transition offset
self['state_cov'] = Sx # Sx.T # transition covariance. 3x3 SPD matrix
self['design'] = b # design matrix
# self['obs_intercept'] = 0 # observation intercept
self['obs_cov'] = sy * sy * np.eye(self.k_endog) # observation covariance
'''
However, I noticed that on running the filter/smoother the states were being excessively smoothed. Digging through the filtering results it seems like the state_cov is not being used in the prediction step.
For example
self.predicted_state_cov[:,:,1]
matches
self.transition[:,:,0] # self.filtered_state_cov[:,:,0] # self.transition[:,:,0].T
Though I would have expected it to be equal to
self.transition[:,:,0] # self.filtered_state_cov[:,:,0] # self.transition[:,:,0].T + self.state_cov[:,:,0]
For good order, please note that all parameter matrices are time invariant.
Im not sure what Im missing here and any help would be much appreciated.
Thanks
In Statsmodels, the state equation is:
x(t+1) = T x(t) + R eta(t+1)
where eta(t+1) ~ N(0, Q)
When you set state_cov, you're setting Q, but you also need to set R, which is selection.
For example, if you want your state equation to be:
x(t+1) = T x(t) + eta(t+1)
Then you would do something like:
self['selection'] = np.eye(3)
It is not the case that R is the identity in every state space model, and it can't even always be initialized to the identity matrix, since the dimension of x(t) and the dimension of eta(t) can be different. That's why R is not automatically initialized to the identity matrix.

Enumerate through variable (porting PyMC to PyMC3)

I'm starting out with PyMC3 by translating this code from PyMC to PyMC3.
I'm not sure how to translate this segment:
v = pymc.Beta('v', alpha=1, beta=alpha, size=N_dp)
#pymc.deterministic
def p(v=v):
""" Calculate Dirichlet probabilities """
# Probabilities from betas
# this line creates the error:
value = [u*np.prod(1-v[:i]) for i,u in enumerate(v)]
# Enforce sum to unity constraint
value[-1] = 1-sum(value[:-1])
return value
z = pymc.Categorical('z', p, size=len(set(counties)))
I assume I have to replace p in the last line with p(v) and remove the #pymc.deterministic but the problem seems to be that I cannot enumerate through v: ValueError: length not known: ViewOp [id A] 'v'.
Can someone show me how to do the translation or link me to the relevant bit in the documentation? Thanks.
The Dirichlet distribution is actually built into pymc3, so that whole code block can be replaced by:
with pm.Model():
...
v = pm.Beta('v', alpha=1, beta=alpha, shape=N_dp)
p = pm.Dirichlet('p', a=v, shape=N_dp)
...
trace = pm.sample(20000)

Dirichlet process in PyMC 3

I would like to implement to implement the Dirichlet process example referenced in
Implementing Dirichlet processes for Bayesian semi-parametric models (source: here) in PyMC 3.
In the example the stick-breaking probabilities are computed using the pymc.deterministic
decorator:
v = pymc.Beta('v', alpha=1, beta=alpha, size=N_dp)
#pymc.deterministic
def p(v=v):
""" Calculate Dirichlet probabilities """
# Probabilities from betas
value = [u*np.prod(1-v[:i]) for i,u in enumerate(v)]
# Enforce sum to unity constraint
value[-1] = 1-sum(value[:-1])
return value
z = pymc.Categorical('z', p, size=len(set(counties)))
How would you implement this in PyMC 3 which is using Theano for the gradient computation?
edit:
I tried the following solution using the theano.scan method:
with pm.Model() as mod:
conc = Uniform('concentration', lower=0.5, upper=10)
v = Beta('v', alpha=1, beta=conc, shape=n_dp)
p, updates = theano.scan(fn=lambda stick, idx: stick * t.prod(1 - v[:idx]),
outputs_info=None,
sequences=[v, t.arange(n_dp)])
t.set_subtensor(p[-1], 1 - t.sum(p[:-1]))
category = Categorical('category', p, shape=n_algs)
sd = Uniform('precs', lower=0, upper=20, shape=n_dp)
means = Normal('means', mu=0, sd=100, shape=n_dp)
points = Normal('obs',
means[category],
sd=sd[category],
observed=data)
step1 = pm.Slice([conc, v, sd, means])
step3 = pm.ElemwiseCategoricalStep(var=category, values=range(n_dp))
trace = pm.sample(2000, step=[step1, step3], progressbar=True)
Which sadly is really slow and does not obtain the original parameters of the synthetic data.
Is there a better solution and is this even correct?
Not sure I have a good answer but perhaps this could be sped up by instead using a theano blackbox op which allows you to write a distribution (or deterministic) in python code. E.g.: https://github.com/pymc-devs/pymc3/blob/master/pymc3/examples/disaster_model_arbitrary_deterministic.py

PyMC for Model Averaging

I am interested in applying PyMC to model averaging. My goal is to estimate many linear models and average estimates across them, weighting by their posterior model probabilities. I am currently using the Bayesian Information Criterion (BIC) to approximate the likelihood of my data (therefore, my analysis is not fully Bayesian). I have successfully simulated a Markov Chain of models using one of my own scripts but I want to use PyMC because it seems like a great tool.
In my attempts thus far, I have not been forming the Markov Chain correctly. I am not visiting models with higher posterior weights more often than others. I will include the example code below. Please also see the IPython notebook here! on github for the math markup and code together.
import numpy as np
from pymc import stochastic, DiscreteMetropolis, MCMC
import statsmodels.api as sm
import pandas as pd
import random
def pack(alist, rank):
binary = [str(1) if i in alist else str(0) for i in xrange(0,rank)]
string = '0b1'+''.join(binary)
return int(string, 2)
def unpack(integer):
string = bin(integer)[3:]
return [int(i) for i in xrange(len(string)) if string[i]=='1']
def make_bma():
# Simulating Data
size = 100
rank = 20
X = 10*np.random.randn(size, rank)
error = 30*np.random.randn(size,1)
coefficients = np.array([10, 2, 2, 2, 2, 2]).reshape((6,1))
y = np.dot(sm.add_constant(X[:,:5], prepend=True), coefficients) + error
# Number of allowable regressors
predictors = [3,4,5,6,7]
#stochastic(dtype=int)
def regression_model():
def logp(value):
columns = unpack(value)
x = sm.add_constant(X[:,columns], prepend=True)
corr = np.corrcoef(x[:,1:], rowvar=0)
prior = np.linalg.det(corr)
ols = sm.OLS(y,x).fit()
posterior = np.exp(-0.5*ols.bic)*prior
return np.log(posterior)
def random():
k = np.random.choice(predictors)
columns = sorted(np.random.choice(xrange(0,rank), size=k, replace=False))
return pack(columns, rank)
class ModelMetropolis(DiscreteMetropolis):
def __init__(self, stochastic):
DiscreteMetropolis.__init__(self, stochastic)
def propose(self):
'''considers a neighborhood around the previous model,
defined as having one regressor removed or added, provided
the total number of regressors coincides with predictors
'''
# Building set of neighboring models
last = unpack(self.stochastic.value)
last_indicator = np.zeros(rank)
last_indicator[last] = 1
last_indicator = last_indicator.reshape((-1,1))
neighbors = abs(np.diag(np.ones(rank)) - last_indicator)
neighbors = neighbors[:,np.any([neighbors.sum(axis=0) == i \
for i in predictors], axis=0)]
neighbors = pd.DataFrame(neighbors)
# Drawing one model at random from the neighborhood
draw = random.choice(xrange(neighbors.shape[1]))
self.stochastic.value = pack(list(neighbors[draw][neighbors[draw]==1].index), rank)
# def step(self):
#
# logp_p = self.stochastic.logp
#
# self.propose()
#
# logp = self.stochastic.logp
#
# if np.log(random.random()) > logp_p - logp:
#
# self.reject()
return locals()
if __name__ == '__main__':
model = make_bma()
M = MCMC(model)
M.use_step_method(model['ModelMetropolis'], model['regression_model'])
M.sample(iter=5000, burn=1000, thin=1)
model_chain = M.trace("regression_model")[:]
from collections import Counter
counts = Counter(model_chain).items()
counts.sort(reverse=True, key=lambda x: x[1])
for f in counts[:10]:
columns = unpack(f[0])
print('Visits:', f[1])
print(np.array([1. if i in columns else 0 for i in range(0,M.rank)]))
print(M.coefficients.flatten())
X = sm.add_constant(M.X[:, columns], prepend=True)
corr = np.corrcoef(X[:,1:], rowvar=0)
prior = np.linalg.det(corr)
fit = sm.OLS(model['y'],X).fit()
posterior = np.exp(-0.5*fit.bic)*prior
print(fit.params)
print('R-squared:', fit.rsquared)
print('BIC', fit.bic)
print('Prior', prior)
print('Posterior', posterior)
print(" ")
It sounds like you are trying to do something akin to reversible jump MCMC, where you are sampling from the model space in addition to the parameter space(s). PyMC does not currently do rjMCMC, though it probably ought to. The trick is to account for the change in dimension when moving among models. If you do have a modest number of models, you can use an indicator function to select from the models, all of which are fit simultaneously.

Resources