I am trying use PYMC3 to implement an example where the data comes from a mixture of multinomials. The goal is to infer the underlying state_prob vector (see below). The code runs, but the Metropolis sampler gets stuck at the initial state_prior vector. Also, for some reason I have not been able to get NUTS to work.
import numpy as np
import pandas as pd
from pymc3 import Model, Multinomial, Dirichlet
import pymc3
import theano.tensor as tt
from theano import function, printing
N = 10
state_prior = np.array([.3, .3, .3])
state_prob = np.array([0.3, 0.1, 0.6]) #need to infer this
state_emission_tran = np.array([[0.3, 0.2, 0.5],
[0.1, 0.5, 0.4],
[0.0, 0.05, 0.95]])
state_data = np.random.multinomial(1, state_prob, size=N)
emission_prob_given_state = np.matmul(state_data, state_emission_tran)
def rand_mult(row_p):
return np.random.multinomial(1, row_p)
emission_data = np.apply_along_axis(rand_mult, 1, emission_prob_given_state)
# done with creating data
with Model() as simple_dag:
state = Dirichlet('state', state_prior*100, shape=3)
emission_dist = [pymc3.Multinomial.dist(p=state_emission_tran[i], n=1, shape=3) for i in range(3)]
emission_mix = pymc3.Mixture('emission_mix', w = state, comp_dists = emission_dist, observed=emission_data)
with simple_dag:
step = pymc3.Metropolis(vars=[state])
trace = pymc3.sample(10000, cores=2, chains=2, tune=500, step=step, progressbar=True)
Try this one:
import numpy as np
import pandas as pd
from pymc3 import Model, Multinomial, Dirichlet
import pymc3
import theano.tensor as tt
from theano import function, printing
N = 10
state_prior = np.array([.3, .3, .3])
state_prob = np.array([0.3, 0.1, 0.6]) #need to infer this
state_emission_tran = np.array([[0.3, 0.2, 0.5],
[0.1, 0.5, 0.4],
[0.0, 0.05, 0.95]])
state_data = np.random.multinomial(1, state_prob, size=N)
emission_prob_given_state = np.matmul(state_data, state_emission_tran)
def rand_mult(row_p):
return np.random.multinomial(1, row_p)
emission_data = np.apply_along_axis(rand_mult, 1, emission_prob_given_state)
# done with creating data
with Model() as simple_dag:
state = Dirichlet('state', state_prior*100, shape=3)
emission_dist = [pymc3.Multinomial.dist(p=state_emission_tran[i], n=1, shape=3) for i in range(3)]
emission_mix = pymc3.Mixture('emission_mix', w = state, comp_dists = emission_dist, observed=emission_data)
with simple_dag:
trace = pymc3.sample(3000, tune=1000)
I am using pymc3 version 3.5 in Linux and it works fine.
Related
I have a Seaborn displot with a hued variable:
For each hued variable, I want to extract the mode of the density estimate and then plot each hue variable versus its mode, like so:
How do I do this?
You can use scipy.stats.gaussian_kde to create the density estimation function. And then call that function on an array of x-values to calculate its maximum.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
df = pd.DataFrame({'x': np.random.normal(0.001, 1, 1300).cumsum() + 30,
'hue': np.repeat(np.arange(0.08, 0.20001, 0.01), 100).round(2)})
g = sns.displot(df, x='x', hue='hue', palette='turbo', kind='kde', fill=True, height=6, aspect=1.5)
plt.show()
from scipy.stats import gaussian_kde
from matplotlib.cm import ScalarMappable
fig, ax = plt.subplots(figsize=(10, 6))
hues = df['hue'].unique()
num_hues = len(hues)
colors = sns.color_palette('turbo', num_hues)
xmin, xmax = df['x'].min(), df['x'].max()
xs = np.linspace(xmin, xmax, 500)
for hue, color in zip(hues, colors):
data = df[df['hue'] == hue]['x'].values
kde = gaussian_kde(data)
mode_index = np.argmax(kde(xs))
mode_x = xs[mode_index]
sns.scatterplot(x=[hue], y=[mode_x], color=color, s=50, ax=ax)
cmap = sns.color_palette('turbo', as_cmap=True)
norm = plt.Normalize(hues.min(), hues.max())
plt.colorbar(ScalarMappable(cmap=cmap, norm=norm), ax=ax, ticks=hues)
plt.show()
Here is another approach, extracting the kde curves. It uses the legend of the kde plot to get the correspondence between the curves and the hue values. sns.kdeplot is the axes-level function used by sns.displot(kind='kde'). fill=False creates lines instead of filled polygons for the curves, for which the values are easier to extract. (ax1.fill_between can fill the curves during a second pass). The x and y axes of the second plot are switched to align the x-axes of both plots.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
df = pd.DataFrame({'x': np.random.normal(0.007, 0.1, 1300).cumsum() + 30,
'hue': np.repeat(np.arange(0.08, 0.20001, 0.01), 100).round(2)})
fig, (ax1, ax2) = plt.subplots(nrows=2, figsize=(12, 10), sharex=True)
sns.kdeplot(data=df, x='x', hue='hue', palette='turbo', fill=False, ax=ax1)
hues = [float(txt.get_text()) for txt in ax1.legend_.get_texts()]
ax2.set_yticks(hues)
ax2.set_ylabel('hue')
for hue, line in zip(hues, ax1.lines[::-1]):
color = line.get_color()
x = line.get_xdata()
y = line.get_ydata()
ax1.fill_between(x, y, color=color, alpha=0.3)
mode_ind = np.argmax(y)
mode_x = x[mode_ind]
sns.scatterplot(x=[mode_x], y=hue, color=color, s=50, ax=ax2)
sns.despine()
plt.tight_layout()
plt.show()
On Windows 10, I have installed Anaconda and launched Spyder. I have also successfully installed Theano, Tensorflow and Keras, since when I execute
import keras
the console outputs
Using Tensorflow Backend
When I compile and fit the neural network it runs fine. But when I try to run k-fold cross validation, combining the scikit-learn via a keras wrapper and using the parameter n_jobs = -1 (and generally n_jobs with whatever value, thus having multiprocessing), the console just freezes forever until restarting kernel manually or terminating Spyder.
Another problem, when I try to run some parameter tuning using GridSearchCV, for i.e. 100 epochs, it doesn't freeze but it outputs epoch 1/1 instead of 1/100 and generally it gives bad results, not logical (i.e. it runs only for a couple of minutes, while normally it would take hours!).
My code is:
# Part 1 - Data Preprocessing
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values
# Encoding categorical data
# Encoding the Independent Variable
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
# Avoiding the dummy variable trap
X = X[:, 1:]
# Splitting the dataset into the Training set and Test set
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Part 2 - Now let's make the ANN!
# Importing the Keras libraries and packages
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
# Initialising the ANN
classifier = Sequential()
# Adding the input layer and the first hidden layer with dropout
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
classifier.add(Dropout(rate = 0.1)) # p should vary from 0.1 to 0.4, NOT HIGHER, because then we will have under-fitting.
# Adding the second hidden layer with dropout
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dropout(rate = 0.1))
# Adding the output layer
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
# Compiling the ANN
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
# Fitting the ANN to the Training set
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)
# Part 3 - Making predictions and evaluating the model
# Predicting the Test set results
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
new_prediction = classifier.predict(sc.transform(np.array([[0, 0, 600, 1, 40, 3, 60000, 2, 1, 1, 50000]])))
new_prediction = (new_prediction > 0.5)
#Part 4 = Evaluating, Improving and Tuning the ANN
# Evaluating the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from keras.models import Sequential
from keras.layers import Dense
def build_classifier():
classifier = Sequential()
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier, batch_size = 10, nb_epoch = 100)
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)
mean = accuracies.mean()
variance = accuracies.std()
# Improving the ANN
# Tuning the ANN
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
def build_classifier(optimizer):
classifier = Sequential()
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 11))
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])
return classifier
classifier = KerasClassifier(build_fn = build_classifier)
parameters = {"batch_size": [25, 32],
"nb_epoch": [100, 500],
"optimizer": ["adam", "rmsprop"]}
grid_search = GridSearchCV(estimator = classifier,
param_grid = parameters,
scoring = "accuracy",
cv = 10)
grid_search = grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
best_accuracy = grid_search.best_score_
Also, for n_jobs = 1, it runs but says epoch 1/1 and runs 10 times, which is the k-fold value. That means that it recognizes nb_epoch = 1 and not 100 for some reason.
Finally, I tried enclosing the cross_val_score() into a class:
class run():
def __init__(self):
cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)
if __name__ == '__main__':
run()
or have it only with the if condition:
if __name__ == '__main__':
cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10, n_jobs = -1)
but it doesn't work either, it freezes again.
Can anyone help me out solving these issues? What is going on, what can I do to solve these so everything runs properly?
Thank you in advance.
it seems Windows has an issue with "n_jobs", remove it in your "accuracies=" code and it will work, downside is it may take a while but at least it will work.
I'm trying to create a plot in Python where the data that is being plotted gets updated as my simulation progresses. In MATLAB, I could do this with the following code:
t = linspace(0, 1, 100);
figure
for i = 1:100
x = cos(2*pi*i*t);
plot(x)
drawnow
end
I'm trying to use matplotlib's FuncAnimation function in the animation module to do this inside a class. It calls a function plot_voltage which recalculates voltage after each timestep in my simulation. I have it set up as follows:
import matplotlib.pyplot as plt
import matplotlib.animation as animation
def __init__(self):
ani = animation.FuncAnimation(plt.figure(2), self.plot_voltage)
plt.draw()
def plot_voltage(self, *args):
voltages = np.zeros(100)
voltages[:] = np.nan
# some code to calculate voltage
ax1 = plt.figure(2).gca()
ax1.clear()
ax1.plot(np.arange(0, len(voltages), 1), voltages, 'ko-')`
When my simulation runs, the figures show up but just freeze. The code runs without error, however. Could someone please let me know what I am missing?
Here is a translation of the matlab code into matplotlib using FuncAnimation:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
t = np.linspace(0, 1, 100)
fig = plt.figure()
line, = plt.plot([],[])
def update(i):
x = np.cos(2*np.pi*i*t)
line.set_data(t,x)
ani = animation.FuncAnimation(fig, update,
frames=np.linspace(1,100,100), interval=100)
plt.xlim(0,1)
plt.ylim(-1,1)
plt.show()
I wish to appear a figure (and certain text) as if they are printed on a page of an open book. Is it possible to transform an jpg image programmatically or in matplotlib to have such an effect?
You can use a background axis along with an open source book image to do something like this,
import numpy as np
import matplotlib.pyplot as plt
fig = plt.figure()
ax1 = fig.add_axes([0.1, 0.1, 0.8, 0.8])
ax2 = fig.add_axes([0.2, 0.3, 0.25, 0.3])
#Plot page from a book
im = plt.imread("./book_page.jpg")
implot = ax1.imshow(im, origin='lower')
# Plot a graph and set background to transparent
x = np.linspace(0,4.*np.pi,40)
y = np.sin(x)
ax2.plot(x,y,'-ro',alpha=0.5)
ax2.set_ylim([-1.1,1.1])
ax2.patch.set_alpha(0.0)
from matplotlib import rc
rc('text', usetex=True)
margin = im.shape[0]*0.075
ytext = im.shape[1]/2.+10
ax1.text(margin, ytext, "The following text is an example")
ax1.text(margin, 90, "Figure 1. Showing a sine function")
plt.show()
Which looks like this,
where I used the following book image.
UPDATE: Added non-affine transformation based on scikit-image warp example, but with Maxwell distribution. The solution saves the matplotlib line as an image in order to apply a pointwise transform. Mapping for vector graphics may be possible but I think this will be more complicated...
import numpy as np
import matplotlib.pyplot as plt
def maxwellian_transform_image(image):
from skimage.transform import PiecewiseAffineTransform, warp
rows, cols = image.shape[0], image.shape[1]
src_cols = np.linspace(0, cols, 20)
src_rows = np.linspace(0, rows, 10)
src_rows, src_cols = np.meshgrid(src_rows, src_cols)
src = np.dstack([src_cols.flat, src_rows.flat])[0]
# add maxwellian to row coordinates
x = np.linspace(0, 3., src.shape[0])
dst_rows = src[:, 1] + (np.sqrt(2/np.pi)*x**2 * np.exp(-x**2/2)) * 50
dst_cols = src[:, 0]
dst_rows *= 1.5
dst_rows -= 1.0 * 50
dst = np.vstack([dst_cols, dst_rows]).T
tform = PiecewiseAffineTransform()
tform.estimate(src, dst)
out_rows = image.shape[0] - 1.5 * 50
out_cols = cols
out = warp(image, tform, output_shape=(out_rows, out_cols))
return out
#Create the new figure
fig = plt.figure()
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
#Plot page from a book
im = plt.imread("./book_page.jpg")
implot = ax.imshow(im, origin='lower')
# Plot and save graph as image, will need some manipulation of location
temp, at = plt.subplots()
margin = im.shape[0]*0.1
x = np.linspace(margin,im.shape[0]/2.,40)
y = im.shape[1]/3. + 0.1*im.shape[1]*np.sin(12.*np.pi*x/im.shape[0])
at.plot(x,y,'-ro',alpha=0.5)
temp.savefig("lineplot.png",transparent=True)
#Read in plot as an image and apply transform
plot = plt.imread("./lineplot.png")
out = maxwellian_transform_image(plot)
ax.imshow(out, extent=[0,im.shape[1],0,im.shape[0]])
plt.show()
The figure now looks like,
Hello All and Merry Christmas,
Could someone please explain me how the following sample of code works (http://matplotlib.sourceforge.net/examples/animation/random_data.html) ?
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
timeline = [1,2,3,4,5,6,7,8,9,10] ;
metric = [10,20,30,40,50,60,70,80,90,100] ;
fig = plt.figure()
window = fig.add_subplot(111)
line, = window.plot(np.random.rand(10))
def update(data):
line.set_ydata(data)
return line,
def data_gen():
while True:
yield np.random.rand(10)
ani = animation.FuncAnimation(fig, update, data_gen, interval=5*1000)
plt.show()
In particular, I would like to use lists ("metric") in order to update the list.
Th problem is that FuncAnimation is using generators if I am not mistaken, but, how can I make it work ?
Thank you.
You can feed FuncAnimation with any iterable not just a generator.
From docs:
class matplotlib.animation.FuncAnimation(fig, func, frames=None,
init_func=None, fargs=None, save_count=None, **kwargs)
Makes an animation by repeatedly calling a function func, passing in
(optional) arguments in fargs. frames can be a generator, an iterable,
or a number of frames. init_func is a function used to draw a clear
frame. If not given, the results of drawing from the first item in the
frames sequence will be used.
Thus the equivalen code with lists could be:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
start = [1, 0.18, 0.63, 0.29, 0.03, 0.24, 0.86, 0.07, 0.58, 0]
metric =[[0.03, 0.86, 0.65, 0.34, 0.34, 0.02, 0.22, 0.74, 0.66, 0.65],
[0.43, 0.18, 0.63, 0.29, 0.03, 0.24, 0.86, 0.07, 0.58, 0.55],
[0.66, 0.75, 0.01, 0.94, 0.72, 0.77, 0.20, 0.66, 0.81, 0.52]
]
fig = plt.figure()
window = fig.add_subplot(111)
line, = window.plot(start)
def update(data):
line.set_ydata(data)
return line,
ani = animation.FuncAnimation(fig, update, metric, interval=2*1000)
plt.show()