data generation cluster analysis - cluster-computing

I' don't know with what R commands you can generate datasets with combination of parameters (dimension, cluster proportions,model). I use this commands
formmatrix = function(a,b,c,d,e,f,p){
m1 = c(0,8,rep(0, p-2))
m2 = c(8,0,rep(0, p-2))
m3 = c(-8,-8,rep(0, p-2))
sig1 = diag(c(1,a, rep(1, p-2)))
sig2 = diag(c(b,c, rep(1, p-2)))
sig3 = matrix(c(d,e,e,f),2,2,byrow=T)
sig3 = cbind(sig3, matrix(rep(0,2*(p-2)),2))
sig3 = rbind(sig3, cbind(t(matrix(rep(0,2*(p-2)),2)), diag(rep(1, p-2))))
return(list(m1=m1, m2=m2, m3=m3, sig1=sig1, sig2=sig2, sig3=sig3))
}
p = 2
sim1 = formmatrix(1,1,1,1,0,1, p)
sim2 = formmatrix(5,1,5,1,0,5, p)
sim3 = formmatrix(5,5,1,3,-2,3, p)
sim4 = formmatrix(1,20,5,15,-10,15, p)
sim5 = formmatrix(1, 45, 30, 15, -10, 15, p)
mixt1 <- rbind(rmvnorm(200, mean = sim1$m1, sigma = sim1$sig1),
rmvnorm(300, mean = sim1$m2 ,sigma = sim1$sig2 ),
rmvnorm(500, mean = sim1$m3,sigma = sim1$sig3))

Related

Optimization of a function calculation time

```
#jit(nopython=True,parallel=False,fastmath=True)
def func(q,H,N):
q_H = q*(2*H+1)
q_N = q*N
array_3d = np.zeros((q_H,q_H,q_H), dtype=np.float64)
vect = np.random.random((q_N))
M = np.random.random((q_N,q_H))
invM = G1.T
for l in range(q_H):
yl = M[:,l]
for m in range(l,q_H):
ym = M[:,m]
calc_vec = yl*ym*vect
array_3d[:,l,m] = invM#calc_vec
array_3d[:,m,l] = array_3d[:,l,m]
return array_3d
```
q : int (1,2,...,9)
H : int (3,5,10,15,20,25,30)
N : int (512, 1024, 2048, 4096)
I am trying to optimize the computation time of this function. Thanks for your help.

H2O Automl in R and Python

I have a very simple question. I recently started working on python.
Here is the R codes for H2O Automl
aml <- h2o.automl(x = x, y = y, project_name =gtp,max_runtime_secs = 99, max_runtime_secs_per_model = 3600,
leaderboard_frame = test,
training_frame = train, validation_frame = test,nfolds =0,
max_models = 1000,exclude_algos = c("GLM", "DeepLearning", "GBM","DRF","StackedEnsemble"),
seed = 22)
How can I write these in Python?
aml = H2OAutoML(max_runtime_secs = 600, exclude_algos = "GLM", "DeepLearning", "GBM","DRF","StackedEnsemble" ,
seed = 42,project_name =gtp)
aml.train(x = X,
y = y, validation_frame =hf_v
training_frame = hf_train,
leaderboard_frame = hf_test,)
aml = H2OAutoML(max_runtime_secs = 600, exclude_algos = ["GLM", "DeepLearning", "GBM","DRF","StackedEnsemble"] ,
seed = 42,project_name = 'gtp')
aml.train(x = X,
y = y, validation_frame =hf_v
training_frame = hf_train,
leaderboard_frame = hf_test,)

Creating a requirement prioritization function in Python 3 program for a Salp Swarm Optimization

I want to be able to use a text file of requirements to be prioritized.
I want to male swarm_size,min_values and maximum_values inputs from text file.
SSA Function
def salp_swarm_algorithm(swarm_size = 5, min_values = [-5,-5], max_values = [5,5], iterations = 50):
count = 0
position = initial_position(swarm_size = swarm_size, min_values = min_values, max_values = max_values)
food = food_position(dimension = len(min_values))
while (count <= iterations):
print("Iteration = ", count, " Requirement = ", food.iloc[food['Fitness'].idxmin(),-1])
c1 = 2*math.exp(-(4*(count/iterations))**2)
food = update_food(position, food)
position = update_position(position, food, c1 = c1, min_values = min_values, max_values = max_values)
count = count + 1
print(food.iloc[food['Fitness'].idxmin(),:].copy(deep = True))
return food.iloc[food['Fitness'].idxmin(),:].copy(deep = True)

why I get max_weights=1 after gradient check?

I set a 3 layers neural network,it has 2 hidden layers,But when I try to implement gradient check ,I got my max_weigh=1 ,which means I have some error in my backprop.here are my backprop function,I really need some help
is there something wrong with my codes?
thanks!
def loss(self,X,y,reg = 0.0):
#forward prop
W1, b1 = self.params['W1'], self.params['b1']
W2, b2 = self.params['W2'], self.params['b2']
W3, b3 = self.params['W3'], self.params['b3']
N,D = X.shape
H1out = np.maximum(0,X.dot(W1)+b1) #H1out (N,H1)
H2out = np.maximum(0,H1out.dot(W2)+b2) #H2out (N,H2)
scores = None
scores = H2out.dot(W3)+b3
scores_shift = scores-np.max(scores,axis = 1).reshape(-1,1)
softmaxout = np.exp(scores_shift)/np.sum(np.exp(scores_shift),axis=1).reshape(-1,1)
loss_main = None
loss = None
loss_main = -np.sum(np.log(softmaxout[range(N),list(y)]))
loss = loss_main/N + reg*np.sum(W1*W1)*np.sum(
W2*W2)+np.sum(W3*W3)
#backward prop
dscores = softmaxout.copy() #dscores (N,C)
dscores[range(N),list(y)] -= 1
dscores /= N
dW3 = H2out.T.dot(dscores)
db3 = np.sum(dscores,axis = 0)
dh2 = dscores.dot(W3.T) #dh2 (N,H2)
dh_Relu2 = (H2out > 0) * dh2 #dh_ReLu2 (N,H2)
dW2 = H1out.T.dot(dh_Relu2)
db2 = np.sum(dh_Relu2,axis = 0)
dh1 = dh_Relu2.dot(W2.T) #dh1 (N,H1)
dh_Relu1 = (H1out>0) * dh1
dW1 = X.T.dot(dh_Relu1)
db1 = np.sum(dh_Relu1,axis = 0)
grad = {}
grad['W1'] = dW1
grad['b1'] = db1
grad['W2'] = dW2
grad['b2'] = db2
grad['W3'] = dW3
grad['b3'] = db3
return loss,grad

value in range for big datasets

I have a problem that I can't seem to solve. I want a query to determine whether a given value lies within a predefined range, but my loop is very slow for big datasets. Is there a more efficient way?
clear all
close all
Regression(1,1) = 1.001415645694801;
Regression(1,2) = 0.043822386790753;
FF_Value(:,1) = [24.24 30.77 31.37 29.05 29.20 29.53 29.67 27.78];
FF_Value(:,2) = [24.16 30.54 31.15 29.53 29.39 29.34 29.53 28.17];
FF_Distance = FF_Value(:,2)-(Regression(1,2)+Regression(1,1)*FF_Value(:,1));
FF_Distance_Positiv = sort(FF_Distance(FF_Distance > 0));
FF_Distance_Positiv(FF_Distance_Positiv == 0) = [];
FF_Distance_Negativ = sort(FF_Distance(FF_Distance < 0),'descend');
FF_Distance_Negativ(FF_Distance_Negativ == 0) = [];
A = repmat(FF_Distance_Positiv,length(FF_Distance_Negativ),1);
B = repmat(FF_Distance_Negativ',length(FF_Distance_Positiv),1);
C = reshape(B,[length(FF_Distance_Positiv)*length(FF_Distance_Negativ),1]);
Recognition(:,1) = A;
Recognition(:,2) = C;
FF_Recognition = zeros(length(FF_Value),1);
for i = 1:length(Recognition)
for j = 1:length(FF_Value)
if (Regression(1,2)+Recognition(i,1))+Regression(1,1)*FF_Value(j,1) >= FF_Value(j,2) &&...
(Regression(1,2)+Recognition(i,2))+Regression(1,1)*FF_Value(j,1) <= FF_Value(j,2)
FF_Recognition(j,1) = 1;
end
end
end
Welcome to the world of bsxfun's replacing your world of repmats -
%------------ Original code -----------------------------------------
FF_Distance = FF_Value(:,2)-(Regression(1,2)+Regression(1,1)*FF_Value(:,1));
FF_Distance_Positiv = sort(FF_Distance(FF_Distance > 0));
FF_Distance_Positiv(FF_Distance_Positiv == 0) = [];
%// Note for Performance: If number of elements satisfying `FF_Distance_Positiv == 0`
%// is a lot, consider doing this instead -
%// `FF_Distance_Positiv = FF_Distance_Positiv(FF_Distance_Positiv~=0)`.
%// Follow this strategy for `FF_Distance_Negativ` too.
FF_Distance_Negativ = sort(FF_Distance(FF_Distance < 0),'descend');
FF_Distance_Negativ(FF_Distance_Negativ == 0) = [];
%------- Added vectorization replacing `repmats` and nested loops ------------
mult = Regression(1,1)*FF_Value(:,1);
y1 = bsxfun(#plus,Regression(1,2),FF_Distance_Positiv);
y2 = bsxfun(#plus,y1.',mult); %//'
mc1 = bsxfun(#ge,y2,FF_Value(:,2));
z1 = bsxfun(#plus,Regression(1,2),FF_Distance_Negativ);
z2 = bsxfun(#plus,z1.',mult); %//'
mc2 = bsxfun(#le,z2,FF_Value(:,2));
FF_Recognition = all([any(mc1,2) any(mc2,2)],2);

Resources