Tensorflow tf.nn.in_top_k Error targets[0] is out of range - matrix

I have a tensorflow program with four output labels. I trained the model and am now evaluating separate data with it.
The issue is that after I use the code
import tensorflow as tf
import main
import Process
import Input
eval_dir = "/Users/Zanhuang/Desktop/NNP/model.ckpt-30"
checkpoint_dir = "/Users/Zanhuang/Desktop/NNP/checkpoint"
def evaluate():
with tf.Graph().as_default() as g:
images, labels = Process.eval_inputs()
forward_propgation_results = Process.forward_propagation(images)
init_op = tf.initialize_all_variables()
saver = tf.train.Saver()
top_k_op = tf.nn.in_top_k(forward_propgation_results, labels, 1)
with tf.Session(graph=g) as sess:
sess.run(init_op)
saver.restore(sess, eval_dir)
tf.train.start_queue_runners(sess=sess)
print(sess.run(top_k_op))
def main(argv=None):
evaluate()
if __name__ == '__main__':
tf.app.run()
In total, I only have one class.
My code for the error rate, where I introduce the labels in a one hot matrix is here:
def error(forward_propagation_results, labels):
labels = tf.one_hot(labels, 4)
tf.transpose(labels)
labels = tf.cast(labels, tf.float32)
mean_squared_error = tf.square(tf.sub(labels, forward_propagation_results))
cost = tf.reduce_mean(mean_squared_error)
train = tf.train.GradientDescentOptimizer(learning_rate = 0.05).minimize(cost)
tf.histogram_summary('accuracy', mean_squared_error)
tf.add_to_collection('losses', cost)
tf.scalar_summary('LOSS', cost)
return train, cost

The problem is invalid data in your labels tensor. From your comment, the labels tensor is a vector containing a single value: [40]. The value 40 is larger than the number of columns in the forward_propagation_result (which is 4).
The tf.nn.in_top_k(predictions, targets, k) op has the following behavior:
For each row predictions[i, :]:
result[i] is true if predictions[i, targets[i]] is one of the k largest elements in that row; otherwise it is false.
There is no value predictions[0, 40], because (as your comment shows) that argument is a 1 x 4 matrix. Therefore TensorFlow gives you an out of range error. This suggests that either your evaluation data are wrong, or you should be using a different evaluation function.

Related

Loop until matrix is full?

I have a conditional statement that adds row of binary values from matrix A to matrix B. I want to put this in a loop so that it continues to add rows from matrix A until matrix B is full. Currently matrix B is initialized as 10 by 10 matrix of zeros. Do I need to initialize matrix B differently in order to create this condition or is there a way of doing it as is?
Below is roughly how my code looks so far
from random import sample
import numpy as np
matrixA = np.random.randint(2, size=(10,10))
matrixB = np.zeros((10,10))
x, y = sample(range(1, 10), k=2)
if someCondition:
matrixB = np.append(matrixB, [matrixA[x]], axis=0)
else:
matrixB = np.append(matrixB, [matrixA[y]], axis=0)
you don't need a loop for it. It is really easy to just do it using smart indexing. For example:
import numpy as np
A = np.random.randint(0, 10, size=(20,10))
B = np.empty((10, 10))
print(A)
# Copy till the row that satisfies your conditions. Here I assume it to be 10
B = A[:10, :]
print(B)

tf.boolean_mask, mask_dimension must be specified?

When using tf.boolean_mask(), a Value Error is raised. It reads "Number of mask dimensions must be specified, even if some dimensions are None. E.g. shape=[None] is ok, but shape=None is not.
I suspect that something is going wrong when I create my boolean mask s, because when I just create a boolean mask by hand, all works fine. However, I've checked the shape and the dtype of s so far, and couldn't notice anything suspicious. Both seemed to be identical to the shape and type of the boolean mask I created by hand.
Please see a screenshot of the problem.
The following should allow you to reproduce the error on your machine. You need tensorflow, numpy and scipy.
with tf.Session() as sess:
# receive five embedded vectors
v0 = tf.constant([[3.0,1.0,2.,4.,2.]])
v1 = tf.constant([[4.0,0,1.0,4,1.]])
v2 = tf.constant([[1.0,1.0,0.0,4.,8.]])
v3 = tf.constant([[1.,4,2.,5.,2.]])
v4 = tf.constant([[3.,2.,3.,2.,5.]])
# concatenate the five embedded vectors into a matrix
VT = tf.concat([v0,v1,v2,v3,v4],axis=0)
# perform SVD on the concatenated matrix
s, u1, u2 = tf.svd(VT)
e = tf.square(s) # list of eigenvalues
v = u1 # eigenvectors as column vectors
# sample a set
s = tf.py_func(sample_dpp_bin,[e,v],tf.bool)
X = tf.boolean_mask(VT,s)
print(X.eval())
This is the code to generate s. s is a sample from a determinantal point process (for the mathematically interested).
Note that I'm using tf.py_func to wrap this python function:
import tensorflow as tf
import numpy as np
from scipy.linalg import orth
def sample_dpp_bin(e_val,e_vec):
# e_val = np.array of eigenvalues
# e_vec = array of eigenvectors (= column vectors)
eps = 0.01
# sample a set of eigenvectors
ind = (np.random.rand(len(e_val)) <= (e_val)/(1+e_val))
k = sum(ind)
if k == e_val.size:
return np.ones(e_val.size,dtype=bool) # check for full set
if k == 0:
return np.zeros(e_val.size,dtype=bool)
V = e_vec[:,np.array(ind)]
# sample a set of k items
sample = np.zeros(e_val.size,dtype=bool)
for l in range(k-1,-1,-1):
p = np.sum(V**2,axis=1)
p = np.cumsum(p / np.sum(p)) # item cumulative probabilities
i = int((np.random.rand() <= p).argmax()) # choose random item
sample[i] = True
j = (np.abs(V[i,:])>eps).argmax() # pick an eigenvector not orthogonal to e_i
Vj = V[:,j]
V = orth(V - (np.outer(Vj,(V[i,:]/Vj[i]))))
return sample
The output if I print s and tf.reshape(s) is
[False True True True True]
[5]
The output if I print VT and tf.reshape(VT) is
[[ 3. 1. 2. 4. 2.]
[ 4. 0. 1. 4. 1.]
[ 1. 1. 0. 4. 8.]
[ 1. 4. 2. 5. 2.]
[ 3. 2. 3. 2. 5.]]
[5 5]
Any help much appreciated.
Following example works for me.
import tensorflow as tf
import numpy as np
tensor = [[1, 2], [3, 4], [5, 6]]
mask = np.array([True, False, True])
t_m = tf.boolean_mask(tensor, mask)
sess = tf.Session()
print(sess.run(t_m))
Output:
[[1 2]
[5 6]]
Provide your runnable code snippet to reproduce the error. I think you might be doing something wrong in s.
Update:
s = tf.py_func(sample_dpp_bin,[e,v],tf.bool)
s_v = (s.eval())
X = tf.boolean_mask(VT,s_v)
print(X.eval())
mask should be a np array not TF tensor. You don't have to use tf.pyfunc.
The error message states that the shape of the mask is not defined. What do you get if you print tf.shape(s)? I'd bet the problem with your code is that the shape of s is completely unknown, and you could fix that with a simple call like s.set_shape((None)) (to simply specify that s is a 1-dimensional tensor). Consider this code snippet:
X = np.random.randint(0, 2, (100, 100, 3))
with tf.Session() as sess:
X_tf = tf.placeholder(tf.int8)
# X_tf.set_shape((None, None, None))
y = tf.greater(tf.reduce_max(X_tf, axis=(0, 1)), 0)
print(tf.shape(y))
z = tf.boolean_mask(X_tf, y, axis=2)
print(sess.run(z, feed_dict={X_tf: X}))
This prints a shape of Tensor("Shape_3:0", shape=(?,), dtype=int32) (i.e., even the dimensions of y are unknown) and returns the same error as you have. However, if you uncomment the set_shape line, then X_tf is known to be 3-dimensional and so s is 1-dimensional. The code then works. So, I think all you need to do is add a s.set_shape((None)) call after the py_func call.

PyMC3 Simple Topic Model

I'm trying to implement a very simple topic model in PyMC3 and I'm having a problem getting it to sample. I'm pretty sure the issue is in the 'p' function in how I'm trying to access the 'theta' variable. Theta isn't really a list, but I don't know how else to combine this information. Any help would be greatly appreciated.
import numpy as np
from pymc3 import *
import theano.tensor as t
K = 3 #NUMBER OF TOPICS
V = 20 #NUMBER OF WORDS
N = 15 #NUMBER OF DOCUMENTS
#GENERAETE RANDOM CATEGORICAL MIXTURES
data = np.ones([N,V])
#theano.compile.ops.as_op(itypes=[t.dmatrix, t.lscalar],otypes=[t.dvector])
def p(theta, z_i):
return theta[z_i]
model = Model()
with model:
alpha = np.ones(V)
beta = np.ones(K)
theta = Dirichlet('theta', alpha, shape=(K,V))
phi = Dirichlet('phi', beta, shape=K)
z = [Categorical('z_%i' % i, phi) for i in range(N)]
x = [Categorical('x_%i' % (i), p=p(theta,z[i]), observed=data[i]) for i in range(N)]
print "Created model. Now begin sampling"
step = Metropolis()
trace = sample(10000, step)
trace.get_values('phi')
The error message I'm getting in the above code is:
TypeError: expected type_num 9 (NPY_INT64) got 7 Apply node that
caused the error: ScalarFromTensor(InplaceDimShuffle{}.0) Toposort
index: 11 Inputs types: [TensorType(int64, scalar)] Inputs shapes:
[()] Inputs strides: [()] Inputs values: [array(1)] Outputs clients:
[[Subtensor{int64}(Elemwise{TrueDiv}[(0, 0)].0, ScalarFromTensor.0)]]

PyMC: sampling step by step?

I would like to know why the sampler is incredibly slow when sampling step by step.
For example, if I run:
mcmc = MCMC(model)
mcmc.sample(1000)
the sampling is fast. However, if I run:
mcmc = MCMC(model)
for i in arange(1000):
mcmc.sample(1)
the sampling is slower (and the more it samples, the slower it is).
If you are wondering why I am asking this.. well, I need a step by step sampling because I want to perform some operations on the values of the variables after each step of the sampler.
Is there a way to speed it up?
Thank you in advance!
------------------ EDIT -------------------------------------------------------------
Here I present the specific problem in more details:
I have two models in competition and they are part of a bigger model that has a categorical variable functioning as a 'switch' between the two.
In this toy example, I have the observed vector 'Y', that could be explained by a Poisson or a Geometric distribution. The Categorical variable 'switch_model' selects the Geometric model when = 0 and the Poisson model when =1.
After each sample, if switch_model selects the Geometric model, I want the variables of the Poisson model NOT to be updated, because they are not influencing the likelihood and therefore they are just drifting away. The opposite is true if the switch_model selects the Poisson model.
Basically what I do at each step is to 'change' the value of the non-selected model by bringing it manually one step back.
I hope that my explanation and the commented code will be clear enough. Let me know if you need further details.
import numpy as np
import pymc as pm
import pandas as pd
import matplotlib.pyplot as plt
# OBSERVED VALUES
Y = np.array([0, 1, 2, 3, 8])
# PRIOR ON THE MODELS
pi = (0.5, 0.5)
switch_model = pm.Categorical("switch_model", p = pi)
# switch_model = 0 for Geometric, switch_model = 1 for Poisson
p = pm.Uniform('p', lower = 0, upper = 1) # Prior of the parameter of the geometric distribution
mu = pm.Uniform('mu', lower = 0, upper = 10) # Prior of the parameter of the Poisson distribution
# LIKELIHOOD
#pm.observed
def Ylike(value = Y, mu = mu, p = p, M = switch_model):
if M == 0:
out = pm.geometric_like(value+1, p)
elif M == 1:
out = pm.poisson_like(value, mu)
return out
model = pm.Model([Ylike, p, mu, switch_model])
mcmc = pm.MCMC(model)
n_samples = 5000
traces = {}
for var in mcmc.stochastics:
traces[str(var)] = np.zeros(n_samples)
bar = pm.progressbar.progress_bar(n_samples)
bar.update(0)
mcmc.sample(1, progress_bar=False)
for var in mcmc.stochastics:
traces[str(var)][0] = mcmc.trace(var)[-1]
for i in np.arange(1,n_samples):
mcmc.sample(1, progress_bar=False)
bar.update(i)
for var in mcmc.stochastics:
traces[str(var)][i] = mcmc.trace(var)[-1]
if mcmc.trace('switch_model')[-1] == 0: # Gemetric wins
traces['mu'][i] = traces['mu'][i-1] # One step back for the sampler of the Poisson parameter
mu.value = traces['mu'][i-1]
elif mcmc.trace('switch_model')[-1] == 1: # Poisson wins
traces['p'][i] = traces['p'][i-1] # One step back for the sampler of the Geometric parameter
p.value = traces['p'][i-1]
print '\n\n'
traces=pd.DataFrame(traces)
traces['mu'][traces['switch_model'] == 0] = np.nan
traces['p'][traces['switch_model'] == 1] = np.nan
print traces.describe()
traces.plot()
plt.show()
The reason this is so slow is that Python's for loops are pretty slow, especially when they are compared to FORTRAN loops (Which is what PyMC is written in basically.) If you could show more detailed code, it might be easier to see what you are trying to do and to provide faster alternative algorithms.
Actually I found a 'crazy' solution, and I have the suspect to know why it works. I would still like to get an expert opinion on my trick.
Basically if I modify the for loop in the following way, adding a 'reset of the mcmc' every 1000 loops, the sampling fires up again:
for i in np.arange(1,n_samples):
mcmc.sample(1, progress_bar=False)
bar.update(i)
for var in mcmc.stochastics:
traces[str(var)][i] = mcmc.trace(var)[-1]
if mcmc.trace('switch_model')[-1] == 0: # Gemetric wins
traces['mu'][i] = traces['mu'][i-1] # One step back for the sampler of the Poisson parameter
mu.value = traces['mu'][i-1]
elif mcmc.trace('switch_model')[-1] == 1: # Poisson wins
traces['p'][i] = traces['p'][i-1] # One step back for the sampler of the Geometric parameter
p.value = traces['p'][i-1]
if i%1000 == 0:
mcmc = pm.MCMC(model)
In practice this trick erases the traces and the database of the sampler every 1000 steps. It looks like the sampler does not like having a long database, although I do not really understand why. (of course 1000 steps is arbitrary, too short it adds too much overhead, too long it will cause the traces and database to be too long).
I find this hack a bit crazy and definitely not elegant.. does any of the experts or developers have a comment on it? Thank you!

PyMC for Model Averaging

I am interested in applying PyMC to model averaging. My goal is to estimate many linear models and average estimates across them, weighting by their posterior model probabilities. I am currently using the Bayesian Information Criterion (BIC) to approximate the likelihood of my data (therefore, my analysis is not fully Bayesian). I have successfully simulated a Markov Chain of models using one of my own scripts but I want to use PyMC because it seems like a great tool.
In my attempts thus far, I have not been forming the Markov Chain correctly. I am not visiting models with higher posterior weights more often than others. I will include the example code below. Please also see the IPython notebook here! on github for the math markup and code together.
import numpy as np
from pymc import stochastic, DiscreteMetropolis, MCMC
import statsmodels.api as sm
import pandas as pd
import random
def pack(alist, rank):
binary = [str(1) if i in alist else str(0) for i in xrange(0,rank)]
string = '0b1'+''.join(binary)
return int(string, 2)
def unpack(integer):
string = bin(integer)[3:]
return [int(i) for i in xrange(len(string)) if string[i]=='1']
def make_bma():
# Simulating Data
size = 100
rank = 20
X = 10*np.random.randn(size, rank)
error = 30*np.random.randn(size,1)
coefficients = np.array([10, 2, 2, 2, 2, 2]).reshape((6,1))
y = np.dot(sm.add_constant(X[:,:5], prepend=True), coefficients) + error
# Number of allowable regressors
predictors = [3,4,5,6,7]
#stochastic(dtype=int)
def regression_model():
def logp(value):
columns = unpack(value)
x = sm.add_constant(X[:,columns], prepend=True)
corr = np.corrcoef(x[:,1:], rowvar=0)
prior = np.linalg.det(corr)
ols = sm.OLS(y,x).fit()
posterior = np.exp(-0.5*ols.bic)*prior
return np.log(posterior)
def random():
k = np.random.choice(predictors)
columns = sorted(np.random.choice(xrange(0,rank), size=k, replace=False))
return pack(columns, rank)
class ModelMetropolis(DiscreteMetropolis):
def __init__(self, stochastic):
DiscreteMetropolis.__init__(self, stochastic)
def propose(self):
'''considers a neighborhood around the previous model,
defined as having one regressor removed or added, provided
the total number of regressors coincides with predictors
'''
# Building set of neighboring models
last = unpack(self.stochastic.value)
last_indicator = np.zeros(rank)
last_indicator[last] = 1
last_indicator = last_indicator.reshape((-1,1))
neighbors = abs(np.diag(np.ones(rank)) - last_indicator)
neighbors = neighbors[:,np.any([neighbors.sum(axis=0) == i \
for i in predictors], axis=0)]
neighbors = pd.DataFrame(neighbors)
# Drawing one model at random from the neighborhood
draw = random.choice(xrange(neighbors.shape[1]))
self.stochastic.value = pack(list(neighbors[draw][neighbors[draw]==1].index), rank)
# def step(self):
#
# logp_p = self.stochastic.logp
#
# self.propose()
#
# logp = self.stochastic.logp
#
# if np.log(random.random()) > logp_p - logp:
#
# self.reject()
return locals()
if __name__ == '__main__':
model = make_bma()
M = MCMC(model)
M.use_step_method(model['ModelMetropolis'], model['regression_model'])
M.sample(iter=5000, burn=1000, thin=1)
model_chain = M.trace("regression_model")[:]
from collections import Counter
counts = Counter(model_chain).items()
counts.sort(reverse=True, key=lambda x: x[1])
for f in counts[:10]:
columns = unpack(f[0])
print('Visits:', f[1])
print(np.array([1. if i in columns else 0 for i in range(0,M.rank)]))
print(M.coefficients.flatten())
X = sm.add_constant(M.X[:, columns], prepend=True)
corr = np.corrcoef(X[:,1:], rowvar=0)
prior = np.linalg.det(corr)
fit = sm.OLS(model['y'],X).fit()
posterior = np.exp(-0.5*fit.bic)*prior
print(fit.params)
print('R-squared:', fit.rsquared)
print('BIC', fit.bic)
print('Prior', prior)
print('Posterior', posterior)
print(" ")
It sounds like you are trying to do something akin to reversible jump MCMC, where you are sampling from the model space in addition to the parameter space(s). PyMC does not currently do rjMCMC, though it probably ought to. The trick is to account for the change in dimension when moving among models. If you do have a modest number of models, you can use an indicator function to select from the models, all of which are fit simultaneously.

Resources