How to slice tensor with dynamc shape in tensorflow? - slice

Say there is a tensor state containing a list of rnn states, and another tensor prob containing the probabilities of each state.
state = tf.placeholder(tf.float32, [None, 49, 32])
print state.get_shape() # (?, 49, 32) (batch_size, candidate_size, state_size)
prob = tf.placeholder(tf.float32, [None, 49])
print prob.get_shape() # (?, 49) (batch_size, candidate_size)
# Now I want to fetch 7 states with top probabilities
_, indices = tf.nn.top_k(prob, 7)
print indices.get_shape() # (?, 7)
How can I slice state with indices?
EDIT:
The issue with using tf.gather(state, indices) is that it will only slice state along the first dimension, which is the batch dimension. Here we want to slice it along the second dimension (of length 49).

Related

LightGBM predict with pred_contrib=True for multiclass: order of SHAP values in the returned array

LightGBM predict method with pred_contrib=True returns an array of shape =(n_samples, (n_features + 1) * n_classes).
What is the order of data in the second dimension of this array?
In other words, there are two questions:
What is the correct way to reshape this array to use it: shape = (n_samples, n_features + 1, n_classes) or shape = (n_samples, n_classes, n_features + 1)?
In the feature dimension, there are n_features entries, one for each feature, and a (useless) entry for the contribution not related to any feature. What is the order of these entries: feature contributions in the entries 1,..., n_features in the same order they appear in the dataset, with the remaining (useless) entry at index 0, or some other way?
The answers are as follows:
The correct shape is (n_samples, n_classes, n_features + 1).
The feature contributions are in the entries 1,..., n_features in the same order they appear in the dataset, with the remaining (useless) entry at index 0.
The following code shows it convincingly:
import lightgbm, pandas, numpy
params = {'objective': 'multiclass', 'num_classes': 4, 'num_iterations': 10000,
'metric': 'multiclass', 'early_stopping_rounds': 10}
train_df = pandas.DataFrame({'f0': [0, 1, 2, 3] * 50, 'f1': [0, 0, 1] * 66 + [1, 2]}, dtype=float)
val_df = train_df.copy()
train_target = pandas.Series([0, 1, 2, 3] * 50)
val_target = pandas.Series([0, 1, 2, 3] * 50)
train_set = lightgbm.Dataset(train_df, train_target)
val_set = lightgbm.Dataset(val_df, val_target)
model = lightgbm.train(params=params, train_set=train_set, valid_sets=[val_set, train_set])
feature_contribs = model.predict(val_df, pred_contrib=True)
print('Shape of SHAP:', feature_contribs.shape)
# Shape of SHAP: (200, 12)
print('Averages over samples:', numpy.mean(feature_contribs, axis=0))
# Averages over samples: [ 3.99942301e-13 -4.02281771e-13 -4.30029167e+00 -1.90606677e-05
# 1.90606677e-05 -4.04157656e+00 2.24205077e-05 -2.24205077e-05
# -4.04265615e+00 -3.70370401e-15 5.20335728e-18 -4.30029167e+00]
feature_contribs.shape = (200, 4, 3)
print('Mean feature contribs:', numpy.mean(feature_contribs, axis=(0, 1)))
# Mean feature contribs: [ 8.39960111e-07 -8.39960113e-07 -4.17120401e+00]
(Each output appears as a comment in the following line.)
The explanation is as follows.
I have created a dataset with two features and with labels identical to the second of these features.
I would expect significant contribution from the second feature only.
After averaging the SHAP output over the samples, we get an array of the shape (12,) with nonzero values at the positions 2, 5, 8, 11 (zero-based).
This shows that the correct shape of this array is (4, 3).
After reshaping this way and averaging over the samples and the classes, we get an array of the shape (3,) with the nonzero entry at the end.
This shows that the last entry of this array corresponds to the last feature. This means that the entry at the position 0 does not correspond to any feature and the following entries correspond to features.

Tensorflow tf.nn.in_top_k Error targets[0] is out of range

I have a tensorflow program with four output labels. I trained the model and am now evaluating separate data with it.
The issue is that after I use the code
import tensorflow as tf
import main
import Process
import Input
eval_dir = "/Users/Zanhuang/Desktop/NNP/model.ckpt-30"
checkpoint_dir = "/Users/Zanhuang/Desktop/NNP/checkpoint"
def evaluate():
with tf.Graph().as_default() as g:
images, labels = Process.eval_inputs()
forward_propgation_results = Process.forward_propagation(images)
init_op = tf.initialize_all_variables()
saver = tf.train.Saver()
top_k_op = tf.nn.in_top_k(forward_propgation_results, labels, 1)
with tf.Session(graph=g) as sess:
sess.run(init_op)
saver.restore(sess, eval_dir)
tf.train.start_queue_runners(sess=sess)
print(sess.run(top_k_op))
def main(argv=None):
evaluate()
if __name__ == '__main__':
tf.app.run()
In total, I only have one class.
My code for the error rate, where I introduce the labels in a one hot matrix is here:
def error(forward_propagation_results, labels):
labels = tf.one_hot(labels, 4)
tf.transpose(labels)
labels = tf.cast(labels, tf.float32)
mean_squared_error = tf.square(tf.sub(labels, forward_propagation_results))
cost = tf.reduce_mean(mean_squared_error)
train = tf.train.GradientDescentOptimizer(learning_rate = 0.05).minimize(cost)
tf.histogram_summary('accuracy', mean_squared_error)
tf.add_to_collection('losses', cost)
tf.scalar_summary('LOSS', cost)
return train, cost
The problem is invalid data in your labels tensor. From your comment, the labels tensor is a vector containing a single value: [40]. The value 40 is larger than the number of columns in the forward_propagation_result (which is 4).
The tf.nn.in_top_k(predictions, targets, k) op has the following behavior:
For each row predictions[i, :]:
result[i] is true if predictions[i, targets[i]] is one of the k largest elements in that row; otherwise it is false.
There is no value predictions[0, 40], because (as your comment shows) that argument is a 1 x 4 matrix. Therefore TensorFlow gives you an out of range error. This suggests that either your evaluation data are wrong, or you should be using a different evaluation function.

determining the sum of top-left to bottom-right diagonal values in a matrix with Ruby?

I have a square matrix of indeterminate row & column length (assume rows and columns are equal as befits a square).
I've plotted out an example matrix as follows:
matrix = [
[1, 2, 3],
[4, 5, 6],
[7, 8, 9]
]
My goal is to get a sum from top-left to bottom-right of the diagonal values.
Obviously in this example, this is all i'll need:
diagsum = matrix[0][0]+matrix[1][1]+matrix[2][2]
#=> 15
I see the pattern where it's a +1 incremental for each row & column argument in the matrix, so the code i've developed for my matrix of indeterminate length (supplied as the argument to my method diagsum would preferably need to implement some sort of row_count method on my matrix argument.
If
arr = [[1,2,3],
[4,5,6],
[7,8,9]]
then:
require 'matrix'
Matrix[*arr].trace
#=> 15
This will sum the diagonal values.
matrix = []
matrix[0] = [1,2,3]
matrix[1] = [4,5,6]
matrix[2] = [7,8,9]
def diagsum(mat)
sum = 0
mat.each_with_index { |row,i| sum += row[i] }
sum
end
puts (diagsum matrix) # 15
Not clear what x is.
But assuming that it is the number of columns/rows, you have 0..x, while the index only goes up to x - 1. You should change it to 0...x.
You are assigning to variable i, whose scope is only in the block.
You are only using i once, perhaps intended to correspond to either row or column, but not both.
You are adding the indices instead of the corresponding elements.
each will return the receiver regardless of whatever you get in the blocks.
puts will return nil regardless of whatever you get.

Find the smallest sum of the squares of two measurements taken at least 5 min apart

I'm trying to solve this problem in Python3. I know how to find min1 and min2, but I cannot guess how to search 5 elements in a single pass.
Problem Statement
The input program serves measurements performed by a device at intervals of 1 minute. All data are in natural numbers not exceeding 1000. The problem is to find the smallest sum of the squares of two measurements performed at intervals not less than 5 minutes apart. The first line will contain one natural number -- the number of measurements N. It is guaranteed that 5 < N <= 10000. Each of the following N lines contains one natural number -- the result of the next measurement.
Your program should output a single number, the lowest sum of the squares of two measurements performed at intervals not less than 5 minutes apart.
Sample input:
9
12
45
5
4
21
20
10
12
26
Expected output: 169
I like this question. Fun brain-teaser. :)
I noticed your sample input was all integers in range(1, 100) with some repetition, so I generated sample lists like so:
>>> import random
>>> sample_list = [random.choice(range(1, 100)) for i in range(10)]
>>> sample_list
[74, 68, 57, 18, 36, 8, 89, 73, 77, 80]
According to the problem statement, these numbers represent data measured at one-minute intervals, and one of our constraints is that our result must represent data gathered at least five minutes apart. Ultimately, that means the indices of the data in the original list must differ by at least five. In other words, for any two inputs v1 and v2:
abs(sample_list.index(v1) - sample_list.index(v2)) >= 5
must be true. We also know that we're searching for the smallest sum, so it will be helpful to look at the smallest numbers first.
Thus, I started by mapping the values in the sample_list to the indices where they occur, then sorting them:
>>> occurrences = {}
>>> for index, value in enumerate(sample_list):
... try:
... occurrences[value].append(index)
... except KeyError:
... occurrences[value] = [index]
...
>>> occurrences
{80: [9], 18: [3], 68: [1], 73: [7], 89: [6], 8: [5], 57: [2], 74: [0], 77: [8], 36: [4]}
>>> sorted_occurrences = sorted(occurrences)
>>> sorted_occurrences
[8, 18, 36, 57, 68, 73, 74, 77, 80, 89]
After a whole lot of trial and error, here's what I finally came up with in function form (including some of the earlier-discussed pieces):
def smallest_sum_of_squares_five_apart(sample):
occurrences = {}
for index, value in enumerate(sample):
try:
occurrences[value].append(index)
except KeyError:
occurrences[value] = [index]
sorted_occurrences = sorted(occurrences)
least_sum = 0
for index, v1 in enumerate(sorted_occurrences):
if least_sum and v1**2 > least_sum:
return least_sum
for v2 in sorted_occurrences[:index+1]:
if (abs(max(occurrences[v1]) - min(occurrences[v2])) >= 5 or
abs(max(occurrences[v2]) - min(occurrences[v1])) >= 5):
print('Found candidates:', str((v1, v2)))
sum_of_squares = v1**2 + v2**2
if not least_sum or sum_of_squares < least_sum:
least_sum = sum_of_squares
return least_sum
The idea here is to:
Start by looking at the smallest values first.
Compare them one by one with all the values smaller, up to themselves.
Check each against our criteria. Notice we do this by checking the extremes of each, where these two numbers occur the farthest away from one another in the original sample.
Break out when checking becomes pointless.
Unfortunately, it is not sufficient to find the first one. Depending how the list is constructed, it will not always find the smallest pair first this way. In fact, it does not for your own sample input. However, once v1**2 (the square of the larger value) is larger than the sum, we know since all numbers are natural numbers it is pointless to continue looking.
I have included a full runnable implementation of this below. It takes a command line argument (default 10) indicating the number of items you want in the randomly generated sample. It will print the randomly generated sample as well as all candidate pairs it checked, and finally the sum itself. I have checked this on 10-sized inputs several times and it seems to be working in general. However, feedback is welcome if it is not correct. Note also you can uncomment your sample list from the question to see how it works (and that it gets the right answer) for it.
import random
import sys
def smallest_sum_of_squares_five_apart(sample):
occurrences = {}
for index, value in enumerate(sample):
try:
occurrences[value].append(index)
except KeyError:
occurrences[value] = [index]
sorted_occurrences = sorted(occurrences)
least_sum = 0
for index, v1 in enumerate(sorted_occurrences):
if least_sum and v1**2 > least_sum:
return least_sum
for v2 in sorted_occurrences[:index+1]:
if (abs(max(occurrences[v1]) - min(occurrences[v2])) >= 5 or
abs(max(occurrences[v2]) - min(occurrences[v1])) >= 5):
print('Found candidates:', str((v1, v2)))
sum_of_squares = v1**2 + v2**2
if not least_sum or sum_of_squares < least_sum:
least_sum = sum_of_squares
return least_sum
if __name__ == '__main__':
try:
r = int(sys.argv[1])
except IndexError:
r = 10
sample_list = [random.choice(range(1, 100)) for i in range(r)]
#sample_list = [9, 12, 45, 5, 4, 21, 20, 10, 12, 26]
print(sample_list)
print(smallest_sum_of_squares_five_apart(sample_list))
Try this:
#!/usr/bin/env python3
import queue
inp = [9,12,45,5,4,21,20,10,12,26]
q = queue.Queue() #Make a new queue
smallest = False #No smallest number, yet
best = False #No best sum of squares, yet
for x in inp:
q.put(x) #Place current element on queue
#If there's an item from more than five minutes ago, consider it
if q.qsize()>5:
temp = q.get() #Pop oldest item from queue into temporary variable
if not smallest: #If this is the first item more than 5 minutes old
smallest = temp #it is the smallest item by default
else: #otherwise...
smallest = min(temp,smallest) #only store it if it is the smallest yet
#If we have no best sum of squares or the current item produces one, then
#save it as the best
if (not best) or (x*x+smallest*smallest<best):
best = x*x+smallest*smallest
print(best)
The idea is to walk through the queue keeping track of the smallest element we have seen yet which is older than five minutes and comparing it against the newest element keeping track of the smallest sum of squares as we go.
I think you'll find the answer to be pretty intuitive if you think about it.
The algorithm operates in O(N) time.

Finding all sets of possibly overlapping fragments that makes up the whole?

Let us say the space = [0, 100] and there are a number of intervals given.
These intervals are fragments of the space, and possibly overlap.
[0, 30], [0, 20], [10, 40], [30, 50], [50, 90], [70, 100]
is a set of intervals.
An example of a set of intervals that span the entire space chosen from the above set is:
[0, 30], [10, 40], [30, 50], [50, 90], [70, 100]
Another example is
[0, 30], [30, 50], [50, 90], [70, 100]
which is the set in the previous example without [10, 40].
I want to find all combinations of such sets of intervals to calculate cost for each interval and find the one with the lowest cost.
from operator import itemgetter
import collections
tmp = [(0, 30), (0, 20), (10, 40), (30, 50), (50, 90), (70, 100), ]
aa = sorted(tmp, key=itemgetter(1)) # sort with respect to 1st elem
a = set(aa)
space = 100
d_conn = 15
RTT = d_conn*2
bandwidth = 10
def get_marginal_cost(fragment):
return RTT + (fragment[1] - fragment[0])/bandwidth
def dfs(a, start, path=None):
if path is None:
path = [start, ]
if start[1] == space:
yield path
for frgmt in a - set(path):
l = frgmt[0]
r = frgmt[1]
if start[0] < l <= start[1] <= r:
# if l <= start[1] <= r:
yield dfs(a, frgmt, path + [frgmt, ])
for z in a:
if z[0] == 0:
for output in list(dfs(a, z)):
for outpu in list(output):
for outp in list(outpu):
for out in list(outp):
for ou in list(out):
print list(ou)
This is my attempt so far, but I could not finish.
Particularly, I am looking to finish this without use of yield functionality in Python, because I am not familiar with it and I probably want to implement this in C++.
Can anyone help me write a working program that solves this problem?
Thank you.
Is it really necessary to build a tree just to find the minimum cost?
Probably not (assuming that your currently unspecified cost function displays optimal substructure).
For a linear cost function, the following classic algorithm runs in time O(n log n), where n is the number of intervals. Initialize a sorted map from mid to the cost of covering [0, mid]. Initially, this map has one entry, 0 -> 0. Sort the intervals by right endpoint and process them in order as follows. To process [a, b], find the map entry mid -> cost such that mid >= a is as small as possible. (If no such entry exists, then just continue.) Let cost' = cost + Cost([a, b]), where Cost([a, b]) is unspecified but always positive. While the last entry in the map has cost greater than or equal to cost', delete it. Insert b -> cost'. To finish, look up the successor of end, where [0, end] is the space to be covered.
Even if your cost function is not linear, since it's a (presumably monotone) function of the total interval length and the number of intervals, we can get an O(n^2)-time algorithm that, instead of remembering just one cost for each midpoint, remembers for each integer between 0 and n the cost for a solution that uses the specified number of intervals.
You don't have to explicitly build the tree - you could use a recursive depth first search to achieve the same effect.
At each point in the recursive depth first search you will have built a set of intervals covering [0,x] and you will want to extend this. To do this you need to find all of the intervals which intersect with x and end after x. As you recurse down the tree you will want to do the same search for y > x and so on.
One way to speed this up would be to put the start and end points of the intervals into an array and sort them. You keep a pointer into the array which marks the position x and a set of intervals that cover x, perhaps stored as a hash set. When you advance the position x you move the pointer along the list, deleting intervals from the set when you see their right hand points, and adding intervals to the set when you see their left hand points. You can back up in a very similar way.
This should allow you to keep track of what intervals you can use to extend the covering [0,x] at each point without searching through every possible interval.
This should allow you to enumerate the list of all possible coverings reasonably efficiently. To find the cheapest covering without enumerating all possible coverings we would need to know more about the cost function than you have put in the question.
I am pretty sure this can be optimised, but below is a working version. Will try to optimize it and update again:
from operator import itemgetter
import collections
import random
def generate_sample_input(space):
# This method generates a set of tuples, each tuple consisting of 2 nos
low, high = space
init = (low, random.randint(low + 1, (low + high)/2))
last = (random.randint((low + high)/2 + 1, high), high)
mid = random.randint(init[1] + 1, last[0] - 1)
ranges = [init, (init[1] - 1, mid + 1), (mid - 1, last[0] + 1), last]
nums = {i for tup in ranges for i in tup}
for _ in range(random.randint(0, 20)):
low = random.randint(space[0], space[1] - 1)
high = random.randint(low, space[1])
if all(i not in nums for i in (low, high)):
nums |= {low, high}
ranges.append((low, high))
return sorted(set(ranges), key = lambda x: x[0])
class Node(object):
def __init__(self, tup):
assert len(tup) == 2 and all(type(x) == int for x in tup)
self.low, self.high = tup
self.visitable_nodes = []
self.piggybacker_nodes = []
def __repr__(self):
return "(%s, %s)" % (self.low, self.high)
def set_visitable(self, node):
assert type(node) == Node
if self.low < node.low and node.high < self.high:
self.piggybacker_nodes.append(node)
elif self.low < node.low < self.high:
self.visitable_nodes.append(node)
class Graph(object):
def __init__(self):
self.sources, self.sinks, self.nodes = [], [], []
def add(self, node, is_sink=False, is_source=False):
assert type(node) == Node and not (is_sink and is_source)
for old_node in self.nodes:
old_node.set_visitable(node)
node.set_visitable(old_node)
self.nodes.append(node)
if is_sink:
self.sinks.append(node)
elif is_source:
self.sources.append(node)
def create_graph(self, ranges=[], space=[]):
for tup in ranges:
self.add(Node(tup), is_source=tup[0]==space[0],
is_sink=tup[1]==space[1])
def dfs(stack=[], nodes=[], sinks=[], level=0):
for node in nodes:
if node in sinks:
print stack + [node]
dfs(stack + [node], node.visitable_nodes, sinks, level + 1)
def main():
space = (0, 100)
ranges = generate_sample_input(space)
graph = Graph()
graph.create_graph(space=space, ranges=ranges)
print ranges
dfs([], nodes=graph.sources, sinks=graph.sinks)
if __name__ == "__main__":
main()

Resources