I tried to train a LightGBM binary classifier using the Python API the relation -
if feature > 5, then 1 else 0
import pandas as pd
import numpy as np
import lightgbm as lgb
x_train = pd.DataFrame([4, 7, 2, 6, 3, 1, 9])
y_train = pd.DataFrame([0, 1, 0, 1, 0, 0, 1])
x_test = pd.DataFrame([8, 2])
y_test = pd.DataFrame([1, 0])
lgb_train = lgb.Dataset(x_train, y_train)
lgb_eval = lgb.Dataset(x_test, y_test, reference=lgb_train)
params = { 'objective': 'binary', 'metric': {'binary_logloss', 'auc'}}
gbm = lgb.train(params, lgb_train, valid_sets=lgb_eval)
y_pred = gbm.predict(x_test, num_iteration=gbm.best_iteration)
y_pred
array([0.42857143, 0.42857143])
np.where((y_pred > 0.5), 1, 0)
array([0, 0])
Clearly it failed to predict the first test 8. Can anyone see what went wrong?
LightGBM's parameter defaults are set with the expectation of moderate-sized training data, and might not work well on extremely small datasets like the one in this question.
There are two in particular that are impacting your result:
min_data_in_leaf: minimum number of samples that must fall into a leaf node
min_sum_hessian_in_leaf: basically, the minimum contribution to the loss function for one leaf node
Setting these to the lowest possible values can force LightGBM to overfit to such a small dataset.
import pandas as pd
import numpy as np
import lightgbm as lgb
x_train = pd.DataFrame([4, 7, 2, 6, 3, 1, 9])
y_train = pd.DataFrame([0, 1, 0, 1, 0, 0, 1])
x_test = pd.DataFrame([8, 2])
y_test = pd.DataFrame([1, 0])
lgb_train = lgb.Dataset(x_train, y_train)
lgb_eval = lgb.Dataset(x_test, y_test, reference=lgb_train)
params = {
'objective': 'binary',
'metric': {'binary_logloss', 'auc'},
'min_data_in_leaf': 1,
'min_sum_hessian_in_leaf': 0
}
gbm = lgb.train(params, lgb_train, valid_sets=lgb_eval)
y_pred = gbm.predict(x_test, num_iteration=gbm.best_iteration)
y_pred
# array([6.66660313e-01, 1.89048958e-05])
np.where((y_pred > 0.5), 1, 0)
# array([1, 0])
For details on all the parameters and their defaults, see https://lightgbm.readthedocs.io/en/latest/Parameters.html.
Related
I want to have multiple types of seaborn plots using the same y axis but with different x coordinates (see image below).
I've tried doing this multiple different ways with specifying the X-axis coordinates differently but can't seem to get it to work.
Here is an example of almost working code
x=[1,2,3,3,3,4,4,5,5,6] # first violin
y=[4,4,5,5,5,5,6] # second violin
z=[5,5,6] # swarmplot over second violin
for data,label in [(x,'x'),(y,'y'),(z,'z')]:
for i in data:
c2v['value'].append(i)
c2v['key'].append(label)
data=pd.DataFrame(c2v)
data.head()
print(data.loc[data.key=='z'])
fig,ax=plt.subplots(1,figsize=(5,5),dpi=200)
ax = sns.violinplot(data=data.loc[data.key.isin(['x','y'])], x='key', y='value',palette=['honeydew','lightgreen'])
sns.swarmplot(x=['swarmplot']*len(data), y=data['value'], order=ax.get_xticklabels() + ['swarmplot'], ax=ax) #.loc[data.key=='z',:]
ax.set_xlabel('')
It produces the following image:
However, it is plotting all values associated with x/y/z instead of just z. When I slice the dataframe to only 'z' in the swarmplot as below, I get an error:
sns.swarmplot(x=['swarmplot']*len(data), y=data.loc[data.key=='z',:]['value'], order=ax.get_xticklabels() + ['swarmplot'], ax=ax)
KeyError: 'swarmplot'
Any suggestions?
To draw a second plot onto the same x-axis, you can use order= giving a list of existing tick labels, appending the new labels.
Here is an example:
import seaborn as sns
tips = sns.load_dataset('tips')
ax = sns.swarmplot(data=tips, x='day', y='total_bill')
sns.violinplot(x=['violin']*len(tips), y=tips['total_bill'], order=ax.get_xticklabels() + ['violin'], ax=ax)
ax.set_xlabel('')
The problem with the code in the new question, is that the x= and y= of the swarmplot need the same number of elements. It also seems the swarmplot resets the y limits, so I added some code to readjust those:
from matplotlib import pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
x = [1, 2, 3, 3, 3, 4, 4, 5, 5, 6] # first violin
y = [4, 4, 5, 5, 5, 5, 6] # second violin
z = [5, 5, 6] # swarmplot over second violin
data = pd.DataFrame({'value': np.concatenate([x, y, z]),
'key': ['x'] * len(x) + ['y'] * len(y) + ['z'] * len(z)})
fig, ax = plt.subplots(1, figsize=(5, 5))
ax = sns.violinplot(data=data.loc[data.key.isin(['x', 'y'])], x='key', y='value', palette=['honeydew', 'lightgreen'])
ymin1, ymax1 = ax.get_ylim()
swarm_data = data.loc[data.key == 'z', :]['value']
sns.swarmplot(x=['swarmplot'] * len(swarm_data), y=swarm_data, order=ax.get_xticklabels() + ['swarmplot'], ax=ax)
ymin2, ymax2 = ax.get_ylim()
ax.set_ylim(min(ymin1, ymin2), max(ymax1, ymax2))
ax.set_xlabel('')
ax.set_xticks(np.arange(3))
ax.set_xticklabels(['x', 'y', 'swarmplot'])
plt.show()
You can simplify things by directly using the data without creating a dataframe:
x = [1, 2, 3, 3, 3, 4, 4, 5, 5, 6] # first violin
y = [4, 4, 5, 5, 5, 5, 6] # second violin
z = [5, 5, 6] # swarmplot over second violin
fig, ax = plt.subplots(1, figsize=(5, 5))
ax = sns.violinplot(x=['x']*len(x) + ['y']*len(y), y=x + y, palette=['honeydew', 'lightgreen'])
ymin1, ymax1 = ax.get_ylim()
sns.swarmplot(x=['swarmplot'] * len(z), y=z, order=ax.get_xticklabels() + ['swarmplot'], ax=ax)
ymin2, ymax2 = ax.get_ylim()
ax.set_ylim(min(ymin1, ymin2), max(ymax1, ymax2))
ax.set_xticks(np.arange(3))
ax.set_xticklabels(['x', 'y', 'swarmplot'])
plt.show()
It seems if lightgbm.train is used with an initial score (init_score) it cannot boost this score.
Here is a simple example:
params = {"learning_rate": 0.1,"metric": "binary_logloss","objective": "binary",
"boosting_type": "gbdt","num_iterations": 5, "num_leaves": 2 ** 2,
"max_depth": 2, "num_threads": 1, "verbose": 0, "min_data_in_leaf": 1}
x = pd.DataFrame([[1, 0.1, 0.3], [1, 0.1, 0.3], [1, 0.1, 0.3],
[0, 0.9, 0.3], [0, 0.9, 0.3], [0, 0.9, 0.3]], columns=["a", "b", "prob"])
y = pd.Series([0, 1, 0, 0, 1, 0])
d_train = lgb.Dataset(x, label=y)
model = lgb.train(params, d_train)
y_pred_default = model.predict(x, raw_score=False)
In the case above, no init_score is used. The predictions are correct:
y_pred_default = [0.33333333, ... ,0.33333333]
d_train = lgb.Dataset(x, label=y, init_score=scipy.special.logit(x["prob"]))
model = lgb.train(params, d_train)
y_pred_raw = model.predict(x, raw_score=True)
In this part, we assume column "prob" from x to be our initial guess (maybe by some other model). We apply logit and use it as initial score. However, the model cannot improve and the boosting will always return 0: y_pred_raw = [0, 0, 0, 0, 0, 0]
y_pred_raw_with_init = scipy.special.logit(x["prob"]) + y_pred_raw
y_pred = scipy.special.expit(y_pred_raw_with_init)
This part above shows the way I suppose is correct to translate the initial scores together with the boosting back to probabilities. Since the boosting is zero y_pred yields [0.3, ..., 0.3] which is our initial probability.
I want to transform LINGO code to Python GEKKO code. Here is Lingo code, lingo results and gekko codes. I cant write second and third constraints. It returns indexing error but, I dont understand why? Can someone help? (It's a graph coloring problem)
from gekko import GEKKO
import numpy as np
m = GEKKO(remote=False)
# x = m.Array(m.Var,(7,5),lb=0,ub=1,integer=True)
x = m.Array(m.Var,(6,6),lb=0,ub=1,integer=True)
y= np.array([1, 2, 3, 4, 5, 6])
country=6
arcs=np.array([[1,3],
[5,4],
[3,6],
[2,4],
[2,5],
[2,6],
[4,5],
[4,6]])
for i in range(6):
m.Minimize(y)
for i in range(6):
# for j in range(2):
# m.Equation(m.sum(x[i,j])==1)
m.Equation(m.sum(x[i,:])==1)
for k in range (6):
for i in range(8):
m.Equation(x[arcs[i,1],k]+x[arcs[i,2],k]<=1)
# m.Equation(x[arcs[i,1],k]+x[arcs[i,2],k])<=1)
# m.Equation(m.sum(x[arcs[i,1],k],x[arcs[i,2],k]))<=1)
Revised version is=
from gekko import GEKKO
import numpy as np
m = GEKKO(remote=False)
x = m.Array(m.Var,(6,6),lb=0,ub=1,integer=True)
y = m.Array(m.Var,6,lb=0,ub=1,integer=True)
y= np.array([1, 2, 3, 4, 5, 6])
country=6
arcs=np.array([[1,3],
[1,4],
[3,4],
[3,4],
[4,5],
[2,6],
[4,5],
[4,6]])
for i in range(6):
m.Minimize(y[i])
for i in range(6):
m.Equation(m.sum(x[i,:])==1)
for k in range (6):
for i in range(8):
m.Equation(x[arcs[i,0]-1,k-1]+x[arcs[i,1]-1,k-1]<=1)
for i in range(6):
m.Equation(m.sum(x[i,:]<=y[i])
m.options.solver = 1
m.solve()
print('Objective Function: ' + str(m.options.objfcnval))
print(x)
print(y)
now it gives invalid syntax error for m.solve and m options?
A couple things that you need to consider for Python:
Lists and arrays are zero-index so you need to shift them by -1 relative to the LINDO / LINGO language.
The objective function y is a list of constants. Gekko generates warnings that there are no variables in that expression.
Here is a corrected version of your Python script that you probably need to supplement with a correct objective statement and any additional equations that are needed.
from gekko import GEKKO
import numpy as np
m = GEKKO(remote=False)
x = m.Array(m.Var,(6,6),lb=0,ub=1,integer=True)
y= np.array([1, 2, 3, 4, 5, 6])
country=6
arcs=np.array([[1,3],
[1,4],
[3,4],
[2,4],
[2,5],
[2,6],
[4,5],
[4,6]])
for i in range(6):
m.Minimize(y[i])
for i in range(6):
m.Equation(m.sum(x[i,:])==1)
for k in range (6):
for i in range(8):
m.Equation(x[arcs[i,0]-1,k]\
+x[arcs[i,1]-1,k]<=1)
m.solve()
Response to Edit
The revised version is missing a closing parenthesis on the m.sum(). Here is a corrected version.
from gekko import GEKKO
import numpy as np
m = GEKKO(remote=False)
x = m.Array(m.Var,(6,6),lb=0,ub=1,integer=True)
y = m.Array(m.Var,6,lb=0,ub=1,integer=True)
y= np.array([1, 2, 3, 4, 5, 6])
country=6
arcs=np.array([[1,3],[1,4],[3,4],[3,4],[4,5],[2,6],[4,5],[4,6]])
for i in range(6):
m.Minimize(y[i])
for i in range(6):
m.Equation(m.sum(x[i,:])==1)
for k in range (6):
for i in range(8):
m.Equation(x[arcs[i,0]-1,k-1]+x[arcs[i,1]-1,k-1]<=1)
for i in range(6):
m.Equation(m.sum(x[i,:])<=y[i])
m.options.solver = 1
m.solve()
print('Objective Function: ' + str(m.options.objfcnval))
print(x)
print(y)
You can find additional tips on troubleshooting gekko applications with tutorial 18.
How do I speed up the rank calculation of a sparse matrix in pure ruby?
I'm currently calculating the rank of a matrix (std lib) to determine the rigidity of a graph.
That means I have a sparse matrix of about 2 rows * 9 columns to about 300 rows * 300 columns.
That translates to times of several seconds to determine the rank of the matrix, which is very slow for a GUI application.
Because I use Sketchup I am bound to Ruby 2.0.0.
I'd like to avoid the hassle of setting up gcc on windows, so nmatrix is (I think) not a good option.
Edit:
Example matrix:
[[12, -21, 0, -12, 21, 0, 0, 0, 0],
[12, -7, -20, 0, 0, 0, -12, 7, 20],
[0, 0, 0, 0, 14, -20, 0, -14, 20]]
Edit2:
I am using integers instead of floats to speed it up considerably.
I have also added a fail fast mechanism earlier in the code in order to not call the slow rank function at all.
Edit3:
Part of the code
def rigid?(proto_matrix, nodes)
matrix_base = Array.new(proto_matrix.size) { |index|
# initialize the row with 0
arr = Array.new(nodes.size * 3, 0.to_int)
proto_row = proto_matrix[index]
# ids of the nodes in the graph
node_ids = proto_row.map { |hash| hash[:id] }
# set the values of both of the nodes' positions
[0, 1].each { |i|
vertex_index = vertices.find_index(node_ids[i])
# predetermined vector associated to the node
vec = proto_row[i][:vec]
arr[vertex_index * 3] = vec.x.to_int
arr[vertex_index * 3 + 1] = vec.y.to_int
arr[vertex_index * 3 + 2] = vec.z.to_int
}
arr
}
matrix = Matrix::rows(matrix_base, false)
rank = matrix.rank
# graph is rigid if the rank of the matrix is bigger or equal
# to the amount of node coordinates minus the degrees of freedom
# of the whole graph
rank >= nodes.size * 3 - 6
end
Similarly to the Caffe framework, where it is possible to watch the learned filters during CNNs training and it's resulting convolution with input images, I wonder if is it possible to do the same with TensorFlow?
A Caffe example can be viewed in this link:
http://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/00-classification.ipynb
Grateful for your help!
To see just a few conv1 filters in Tensorboard, you can use this code (it works for cifar10)
# this should be a part of the inference(images) function in cifar10.py file
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64],
stddev=1e-4, wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
bias = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(bias, name=scope.name)
_activation_summary(conv1)
with tf.variable_scope('visualization'):
# scale weights to [0 1], type is still float
x_min = tf.reduce_min(kernel)
x_max = tf.reduce_max(kernel)
kernel_0_to_1 = (kernel - x_min) / (x_max - x_min)
# to tf.image_summary format [batch_size, height, width, channels]
kernel_transposed = tf.transpose (kernel_0_to_1, [3, 0, 1, 2])
# this will display random 3 filters from the 64 in conv1
tf.image_summary('conv1/filters', kernel_transposed, max_images=3)
I also wrote a simple gist to display all 64 conv1 filters in a grid.