Parallel Computing with Julia #parallel and SharedArray - parallel-processing

I have been trying to implement some parallel programming in Julia using #parallel and SharedArrays.
Xi = Array{Float64}([0.0, 450.0, 450.0, 0.0, 0.0, 450.0, 450.0, 0.0])
Yi = Array{Float64}([0.0, 0.0, 600.0, 600.0, 0.0, 0.0, 600.0, 600.0])
Zi = Array{Float64}([0.0, 0.0, 0.0, 0.0, 400.0, 400.0, 400.0, 400.0])
Xj = Array{Float64}([0.0, 450.0, 450.0, 0.0, 0.0, 450.0, 450.0, 0.0])
Yj = Array{Float64}([0.0, 0.0, 600.0, 600.0, 0.0, 0.0, 600.0, 600.0])
Zj = Array{Float64}([0.0, 0.0, 0.0, 0.0, 400.0, 400.0, 400.0, 400.0])
L = Array{Float64}([400.0, 400.0, 400.0, 400.0, 450.0, 600.0, 450.0, 600.0])
Rot = Array{Float64}([90.0, 90.0, 90.0, 90.0, 0.0, 0.0, 0.0, 0.0])
Obviously these vectors will be huge, but for simplicity I just put this limited size.
This is the operation without parallel computing:
function jt_transcoord(Xi, Yi, Zi, Xj, Yj, Zj, Rot, L)
r = Vector(length(Xi))
for i in 1:length(Xi)
rxX = (Xj[i] - Xi[i]) / L[i]
rxY = (Yj[i] - Yi[i]) / L[i]
rxZ = (Zj[i] - Zi[i]) / L[i]
if rxX == 0 && rxY == 0
r[i] = [0 0 rxZ; cosd(Rot[i]) -rxZ*sind(Rot[i]) 0; sind(Rot[i]) rxZ*cosd(Rot[i]) 0]
else
R=sqrt(rxX^2+rxY^2)
r21=(-rxX*rxZ*cosd(Rot[i])+rxY*sind(Rot[i]))/R
r22=(-rxY*rxZ*cosd(Rot[i])-rxX*sind(Rot[i]))/R
r23=R*cosd(Rot[i])
r31=(rxX*rxZ*sind(Rot[i])+rxY*cosd(Rot[i]))/R
r32=(rxY*rxZ*sind(Rot[i])-rxX*cosd(Rot[i]))/R
r33=-R*sind(Rot[i])
r[i] = [rxX rxY rxZ;r21 r22 r23;r31 r32 r33]
end
end
return r
end
The returned value is basically an array that contains a matrix in each vector row. That looks something like this:
r =
[[0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0],
[0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0],
[0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0],
[0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0],
[1.0 0.0 0.0; 0.0 -0.0 1.0; 0.0 -1.0 -0.0],
[0.0 1.0 0.0; 0.0 -0.0 1.0; 1.0 0.0 -0.0],
[-1.0 0.0 0.0; 0.0 0.0 1.0; 0.0 1.0 -0.0],
[0.0 -1.0 0.0; -0.0 0.0 1.0; -1.0 -0.0 -0.0]]
This is my function using #parallel. First of all I need to convert the vectors to SharedArrays:
Xi = convert(SharedArray, Xi)
Yi = convert(SharedArray, Yi)
Zi = convert(SharedArray, Zi)
Xj = convert(SharedArray, Xj)
Yj = convert(SharedArray, Yj)
Zj = convert(SharedArray, Zj)
L = convert(SharedArray, L)
Rot = convert(SharedArray, Rot)
This the same code but using #parallel
function jt_transcoord_parallel(Xi, Yi, Zi, Xj, Yj, Zj, Rot, L)
r = SharedArray{Float64}(zeros((length(Xi),1)))
#parallel for i in 1:length(Xi)
rxX = (Xj[i] - Xi[i]) / L[i]
rxY = (Yj[i] - Yi[i]) / L[i]
rxZ = (Zj[i] - Zi[i]) / L[i]
if rxX == 0 && rxY == 0
r[i] = [0 0 rxZ; cosd(Rot[i]) -rxZ*sind(Rot[i]) 0; sind(Rot[i]) rxZ*cosd(Rot[i]) 0]
else
R=sqrt(rxX^2+rxY^2)
r21=(-rxX*rxZ*cosd(Rot[i])+rxY*sind(Rot[i]))/R
r22=(-rxY*rxZ*cosd(Rot[i])-rxX*sind(Rot[i]))/R
r23=R*cosd(Rot[i])
r31=(rxX*rxZ*sind(Rot[i])+rxY*cosd(Rot[i]))/R
r32=(rxY*rxZ*sind(Rot[i])-rxX*cosd(Rot[i]))/R
r33=-R*sind(Rot[i])
r[i] = [rxX rxY rxZ;r21 r22 r23;r31 r32 r33]
end
end
return r
end
I just got a vector of zeros. My question is: Is there a way to implement this function using #parallel in Julia and get the same results that I got in my original function?

The functions jt_transcoord and jt_transcoord_parallel have major coding flaws.
In jt_transcoord, you are assigning an array to a vector element position. For example, you write r = Vector(length(Xi)) and then assign r[i] = [rxX rxY rxZ;r21 r22 r23;r31 r32 r33]. But r[i] should be a number, and you instead assign it a 3x3 matrix. I suspect that Julia is quietly changing types for you.
SharedArray objects will not admit this lax type conversion behavior. The components of a SharedArray must be of a single primitive type such as Float64, and Vector{Matrix} is not a primitive type. Open a Julia v0.6 REPL and copy/paste the following code:
r = SharedArray{Float64}(length(Xi))
for i in 1:length(Xi)
rxX = (Xj[i] - Xi[i]) / L[i]
rxY = (Yj[i] - Yi[i]) / L[i]
rxZ = (Zj[i] - Zi[i]) / L[i]
if rxX == 0 && rxY == 0
r[i] = [0 0 rxZ; cosd(Rot[i]) -rxZ*sind(Rot[i]) 0; sind(Rot[i]) rxZ*cosd(Rot[i]) 0]
else
R = sqrt(rxX^2+rxY^2)
r21 = (-rxX*rxZ*cosd(Rot[i])+rxY*sind(Rot[i]))/R
r22 = (-rxY*rxZ*cosd(Rot[i])-rxX*sind(Rot[i]))/R
r23 = R*cosd(Rot[i])
r31 = (rxX*rxZ*sind(Rot[i])+rxY*cosd(Rot[i]))/R
r32 = (rxY*rxZ*sind(Rot[i])-rxX*cosd(Rot[i]))/R
r33 = -R*sind(Rot[i])
r[i] = [rxX rxY rxZ;r21 r22 r23;r31 r32 r33]
end
end
On my end, I get:
ERROR: MethodError: Cannot `convert` an object of type Array{Float64,2} to an object of type Float64
This may have arisen from a call to the constructor Float64(...),
since type constructors fall back to convert methods.
Stacktrace:
[1] setindex!(::SharedArray{Float64,2}, ::Array{Float64,2}, ::Int64) at ./sharedarray.jl:483
[2] macro expansion at ./REPL[26]:6 [inlined]
[3] anonymous at ./<missing>:?
Essentially, Julia is telling you that it cannot assign a matrix to a SharedArray vector.
What are your options?
If you insist on having a Vector{Matrix} return type, then use r = Vector{Matrix{Float64}}(length(Xi)) in jt_transcoord. But you cannot use SharedArrays for this since Vector{Matrix} is not an admissible primitive type.
Alternatively, if you are willing to operate with tensors (i.e. 3-way arrays) then you can use pseudocode A below. But SharedArray computing will only help you if you carefully account for which process owns which portion of the tensor. Otherwise, the processes will need to communicate with each other, and your parallelized function could execute very slowly.
If you are willing to lay your 3x3 matrices in a 3n x 3 columnwise fashion, then you can use pseudocode B below.
Pseudocode A
function jt_transcoord_tensor(Xi, Yi, Zi, Xj, Yj, Zj, Rot, L)
# initialize array
r = Array{Float64}(3,3,length(Xi))
# r = SharedArray{Float64,3}((3,3,length(Xi))) # for SharedArrays
for i in 1:length(Xi)
# #parallel for i in 1:length(Xi) # for SharedArrays
# other code...
r[:,:,i] = [0 0 rxZ; cosd(Rot[i]) -rxZ*sind(Rot[i]) 0; sind(Rot[i]) rxZ*cosd(Rot[i]) 0]
# other code...
r[:,:,i] = [rxX rxY rxZ;r21 r22 r23;r31 r32 r33]
end
end
return r
end
Pseudocode B
function jt_transcoord_parallel(Xi, Yi, Zi, Xj, Yj, Zj, Rot, L)
n = length(Xi)
r = SharedArray{Float64}((3*n,3))
#parallel for i in 1:length(Xi)
# other code...
r[(3*(i-1)+1):3*(i),:] = [0 0 rxZ; cosd(Rot[i]) -rxZ*sind(Rot[i]) 0; sind(Rot[i]) rxZ*cosd(Rot[i]) 0]
# other code...
r[(3*(i-1)+1):3*(i),:] = [rxX rxY rxZ;r21 r22 r23;r31 r32 r33]
end
end
return r
end

Related

How to solve a set of ODEs in Scilab?

I have following set of ODEs which I would like to numerically solve in Scilab
I have successfully written the function for evaluating the right hand side of the first equation in other words I am able to solve the first equation
function ut = u(t)
ut = [Vm*cos(2*%pi*fs*t); Vm*sin(2*%pi*fs*t)];
endfunction
function dxdt = SystemModel(t, x, u)
A = [(-RS*(LL + LM)^2 - RR*LM^2)/(LL*LM*(LL + LM)), 0.0, RR/(LL*(LL + LM)), pp*wm/LL;
0.0, (-RS*(LL + LM)^2 - RR*LM^2)/(LL*LM*(LL + LM)), -pp*wm/LL, RR/(LL*(LL + LM));
(LM*RR)/(LL + LM), 0.0, -RR/(LL + LM), -pp*wm;
0.0, (LM*RR)/(LL + LM), pp*wm, -RR/(LL + LM)];
B = [(LL + LM)/(LL*LM), 0.0; 0.0, (LL + LM)/(LL*LM); 0.0, 0.0; 0.0, 0.0];
dxdt = A*x + B*u(t);
endfunction
My problem is that I don't know how to write similar function for evaluation of the right hand side of the second equation because it depends on solution of the first equation. Can anybody give me an advice how to do that?
Possible solution:
x0 = zeros(4, 1);
xtilde0 = zeros(4, 1);
X0 = [x0; xtilde0];
t0 = 0;
dt = 0.001;
t = 0:dt:1;
function ut = u(t)
ut = [Vm*cos(2*%pi*fs*t); Vm*sin(2*%pi*fs*t)];
endfunction
function dXdt = RightHandSide(t, X, u)
x = X(1:4);
xtilde = X(5:8);
// dx/dt = A*x + B*u
A = [(-RS*(LL + LM)^2 - RR*LM^2)/(LL*LM*(LL + LM)), 0.0, RR/(LL*(LL + LM)), pp*wm/LL;
0.0, (-RS*(LL + LM)^2 - RR*LM^2)/(LL*LM*(LL + LM)), -pp*wm/LL, RR/(LL*(LL + LM));
(LM*RR)/(LL + LM), 0.0, -RR/(LL + LM), -pp*wm;
0.0, (LM*RR)/(LL + LM), pp*wm, -RR/(LL + LM)];
B = [(LL + LM)/(LL*LM), 0.0; 0.0, (LL + LM)/(LL*LM); 0.0, 0.0; 0.0, 0.0];
// dxtilde/dt = (An - L*Cn)*xtilde + (dA - L*dC)*x + dB*u
An = [(-RSn*(LLn + LMn)^2 - RRn*LMn^2)/(LLn*LMn*(LLn + LMn)), 0.0, RRn/(LLn*(LLn + LMn)), pp*wm/LLn;
0.0, (-RSn*(LLn + LMn)^2 - RRn*LMn^2)/(LLn*LMn*(LLn + LMn)), -pp*wm/LLn, RRn/(LLn*(LLn + LMn));
(LMn*RRn)/(LLn + LMn), 0.0, -RRn/(LLn + LMn), -pp*wm;
0.0, (LMn*RRn)/(LLn + LMn), pp*wm, -RRn/(LLn + LMn)];
K = 1.5;
l1 = (K - 1.0)*((RSn*(LLn + LMn)^2 + RRn*LMn^2)/(LLn*LMn*(LLn + LMn)) + RRn/(LLn + LMn));
l2 = (K - 1.0)*pp*wm;
l3 = (K^2 - 1.0)*((RSn*(LLn + LMn)^2 + RRn*LMn^2)/(LMn*(LLn + LMn)) - (LMn*RRn)/(LLn + LMn)) - (K - 1)*((RSn*(LLn + LMn)^2 + RRn*LMn^2)/(LMn*(LLn + LMn)) + (LLn*RRn)/(LLn + LMn));
l4 = -(K - 1.0)*LLn*wm*pp;
L = [l1, l2;
-l2, l1;
l3, l4;
-l4, l3];
Bn = [(LLn + LMn)/(LLn*LMn), 0.0; 0.0, (LLn + LMn)/(LLn*LMn); 0.0, 0.0; 0.0, 0.0];
Cn = [1.0, 0.0, 0.0, 0.0; 0.0, 1.0, 0.0, 0.0];
A = [(-RS*(LL + LM)^2 - RR*LM^2)/(LL*LM*(LL + LM)), 0.0, RR/(LL*(LL + LM)), pp*wm/LL;
0.0, (-RS*(LL + LM)^2 - RR*LM^2)/(LL*LM*(LL + LM)), -pp*wm/LL, RR/(LL*(LL + LM));
(LM*RR)/(LL + LM), 0.0, -RR/(LL + LM), -pp*wm;
0.0, (LM*RR)/(LL + LM), pp*wm, -RR/(LL + LM)];
B = [(LL + LM)/(LL*LM), 0.0; 0.0, (LL + LM)/(LL*LM); 0.0, 0.0; 0.0, 0.0];
C = [1.0, 0.0, 0.0, 0.0; 0.0, 1.0, 0.0, 0.0];
dA = An - A;
dB = Bn - B;
dC = Cn - C;
dxdt = A*x + B*u(t);
dxtildedt = (An - L*Cn)*xtilde + (dA - L*dC)*x + dB*u(t);
dXdt = [dxdt; dxtildedt];
endfunction
X = ode(X0, t0, t, list(RightHandSide, u));
Let y = x_tilde. Let assume that it is a 3x1 vector (we can't guess its size with your current presentation).
Build the column X = [x1 x2 x3 x4 y1 y2 y3].' (big X)
Express the column (dX/dt) according to X coordinates and t
Convert the system built in 2) into a Scilab function X_dot = Xder(t, X)
Build the initial state vector Xinit = [x1(t_init); x2(t_init); .. y3(t_init)]
Define the vector t of times to which you want the values of X. They all likely have to be ≥ t_init, and must be strictly increasing.
call X = ode(Xinit, t_init, t, Xder)
X(:,i) should then be the values of X components for each t(i) date.
You can "back-split" big X into x = X(1:4,:) and x_tilde = X(5:$,:).

Convert conditional logic for edge wrapping to math expression

I have a simple function that takes a float parameter x in the range [-1.0, 2.0] and maps it to range [0.0, 1.0] such that values below 0.0 are mapped to 1.0 and values above 1.0 are mapped to 0.0:
float wrap_ternary(x) {
// result is between [0.0, 1.0]
return x < 0.0 ? x + 1.0 : x > 1.0 ? x - 1.0 : x;
}
I want to convert that function to use math expressions instead of conditionals. I've come up with the following algorithm:
float wrap_mod(x) {
return (((x + 1.0) % 2.0 + 1.0) % 1.0);
}
However the edge wrapping of this algorithm is inclusive i.e. 1.0/2.0 are mapped to 0.0 whereas the ternary version would map these values to 1.0:
X
wrap_ternary
wrap_mod
wrong
-1.0
0.0
0.0
-0.5
0.5
0.5
0.0
0.0
0.0
0.5
0.5
0.5
1.0
1.0
0.0
x
1.5
0.5
0.5
2.0
1.0
0.0
x
How would I modify my algorithm so that it produces the same results as the ternary version? I've tried to subtract EPSILON from my modulo but that didn't really work... I just can't wrap my head around this. No pun intended.
I am not familiar with GLSL, but it seems to be quite similar to C. So below is a C solution wrap_floor() that uses the floor() function to implement the conditional addition or subtraction, together with a test framework that tests it exhaustively.
The only issue is getting the switchover points correct, which would naturally fall on integers. The question already mentions "epsilon techniques" in which one increases or diminishes floating-point numbers by one ulp (unit in the last place) and which can be used to shift the bounds slightly. To get the correct shift for this case, we need to multiply the input by the "magic" multiplier (1 - 1 ulp). Assuming that float maps to the IEEE-754 binary32 format, the desired number is the literal float constant 0.99999994.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// result is between [0.0, 1.0]
float wrap_ternary (float x) {
return x < 0.0 ? x + 1.0 : x > 1.0 ? x - 1.0 : x;
}
float wrap_floor (float x) { return x - floorf (x * 0.99999994f); }
int main (void)
{
/* exhaustive test of wrapping functions over [-1,2] */
float x = -0.0f;
while (x >= -1.0f) {
if (wrap_ternary (x) != wrap_floor (x)) {
printf ("x=% .9e wrap_ternary = % .8e wrap_floor=% .8e\n",
x, wrap_ternary (x), wrap_floor (x));
}
x = nextafterf (x, -INFINITY);
}
x = 0.0f;
while (x <= 2.0f) {
if (wrap_ternary (x) != wrap_floor (x)) {
printf ("x=% .8ef wrap_ternary = % .8e wrap_floor=% .8e\n",
x, wrap_ternary (x), wrap_floor (x));
}
x = nextafterf (x, INFINITY);
}
return EXIT_SUCCESS;
}
This might work as expected, though...
float wrap_somehow(x) {
return 0.5 - (frac(1.5 - abs(x - 0.5)) - 0.5) * sign(x - 0.5);
}
The frac function, which returns the fractional part of a float, seems to have the behaviour you want for all inputs other than 1.0 and 2.0. So this would work:
float wrap_ternary(x) {
return (x == 1.0 || x == 2.0) ? 1.0 : frac(x);
}

How do you rectify a 3D planar polygon?

I have a 3D planar (all vertices lie in some plane) polygon with vertices: [(x1, y1, z1) ... (x1, y1, z1)].
I would like to transform this polygon so that I'm viewing it orthographically (as if I'm looking at it straight on).
How can this be done in Python?
I assume you have no information except for vertex coordinates.
Take three non-collinear (perhaps consequent) vertices C, A, B. Calculate normalized vector (divide by vector length)
b = (B - A) / |B - A|
then normal vector (using vector/cross multiplication)
N = b.cross.(A-C) and normalize it
un = N / |N|
and another unit vector in polygon plane
v = b.cross.n
Now we want find such matrix of affine transformations, that transforms vertex A into point (0,0,0), edge AB will be collinear with OX axis, normal will be collinear with OZ axis, vector q will be collinear with OY axis. This all means that rotated polygon will lie in OXY plane.
Mathematically: points A, u=A+b, v=A+q, n=A+un should be transformed in quadruplet (0,0,0), (1,0,0), (0,1,0), (0,0,1). In matrix form
[Ax ux vx nx] [0 1 0 0]
M * [Ay uy vy ny] = [0 0 1 0]
[Az uz vz nz] [0 0 0 1]
[1 1 1 1 ] [1 1 1 1]
or
M * S = D
Using matrix inverse
M * S * Sinv = D * Sinv
and finally
M = D * Sinv
So calculate matrix M and multiply it with every vertex coordinates. New coordinates should have zero Z-component (or very small due to numerical errors).
You can perform all described operations with numpy library with a little code
Example with specific data
Quick-made implementation in plain Python for reference
import math
def calcMatrix(ax, bx, cx, ay, by, cy, az, bz, cz):
ux, uy, uz = bx - ax, by - ay, bz - az
mag = math.sqrt(ux*ux+uy*uy +uz*uz)
ux, uy, uz = ux / mag, uy / mag, uz / mag
Cx, Cy, Cz = ax - cx, ay - cy, az - cz
nx, ny, nz = uy * Cz - uz * Cy, uz * Cx - ux * Cz, ux * Cy - uy * Cx
mag = math.sqrt(nx*nx+ny*ny+nz*nz)
nx, ny, nz = nx / mag, ny / mag, nz / mag
vx, vy, vz = uy * nz - uz * ny, uz * nx - ux * nz, ux * ny - uy * nx
denom = 1.0 / (ux*ux+uy*uy + uz*uz)
M = [[0.0]*4 for _ in range(4)]
M[3][3] = 1.0
M[0][0] = denom*(ux)
M[0][1] = denom*(uy)
M[0][2] = denom*(uz)
M[0][3] = denom*(-ax*ux-ay*uy+az*uz)
M[1][0] = denom*(vx)
M[1][1] = denom*(vy)
M[1][2] = denom*(vz)
M[1][3] = -denom*(ax*vx-ay*vy+az*vz)
M[2][0] = denom*(nx)
M[2][1] = denom*(ny)
M[2][2] = denom*(nz)
M[2][3] = denom*(-ax*nx-ay*ny+az*nz)
return M
def mult(M, vec):
res = [0]*4
for k in range(4):
for i in range(4):
res[k] += M[k][i] * vec[i]
return res
#test corners and middle point
M = calcMatrix(1, 0, 0, 0, 1, 0, 0, 0, 1)
#print(M)
p = [1, 0, 0, 1]
print(mult(M, p))
p = [0, 1, 0, 1]
print(mult(M, p))
p = [0, 0, 1, 1]
print(mult(M, p))
p = [1/3, 1/3, 1/3, 1]
print(mult(M, p))
test results:
[0.0, 0.0, 0.0, 1.0]
[1.4142135623730951, 0.0, 0.0, 1.0]
[0.7071067811865476, 1.2247448713915892, 0.0, 1.0]
[0.7071067811865476, 0.4082482904638631, 1.1102230246251565e-16, 1.0]
Find a normal n to the polygon, by means of a cross-product between two non-parallel sides. Take the cross-product of n with a vertical vector, to get an horizontal vector u. Then take the cross product of n and u to get v, and normalize the vectors. u and v are parallel to the plane of the polygon and orthogonal to each other.
Finally, for every vertex p compute the 2D coordinates (p.u, p.v) which show you the polygon in its plane.
numpy supplies the cross and dot vector functions. Also linalg.norm (or sqrt(dot(v, v))).
Here's a robust approach using NumPy (project(); the rest is test code).
import numpy
import scipy.spatial
def project(x):
# Center the plane on the origin
x = x - numpy.mean(x, axis=0)
# Compute the Singular Value Decomposition
u, s, v = numpy.linalg.svd(x)
# Return the top two principal components
return u[:, :2] # numpy.diag(s[:2])
def test():
n = 10
x = (numpy.random.rand(n, 2) # numpy.random.rand(2, 3)) + numpy.random.rand(3)
y = project(x)
print(x.shape, y.shape)
print(
numpy.max(
numpy.abs(
scipy.spatial.distance_matrix(x, x)
- scipy.spatial.distance_matrix(y, y)
)
)
)
if __name__ == "__main__":
test()
Sample output:
(10, 3) (10, 2)
5.551115123125783e-16

Julia: How to execute functions in parallel?

I want to run functions in parallel. These functions are executed many times in a loop.
coordSys = SharedArray{Bool}([true,false,true,true]);
dir = SharedArray{Int8}([1,2,3,2]);
load = SharedArray{Float64}([8,-7.5,7,-8.5]);
L = SharedArray{Float64}([400,450,600,500]);
r = SharedArray{Float64}([0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0
0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0
0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0
0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0]);
Obviously these vectors will be huge, but for simplicity I just put this limited size.
Operation without parallel computing:
function unifLoad(coordSys,dir,load,L,ri)
if coordSys == true
if dir == 1
Q = [load;0;0];
elseif dir == 2
Q = [0;load;0];
elseif dir == 3
Q = [0;0;load];
end
q = ri*Q; #matrix multiplication
P = q[1]*L/2;
V = q[2]*L/2;
M = -q[3]*L*L/12;
f = [P;V;M];
else
f = [1.0;1.0;1.0];
end
return f
end
running the loop:
var = zeros(12)
for i = 1:length(L)
var[3*(i-1)+1:3*i] = unifLoad(coordSys[i],dir[i],load[i],L[i],r[3*(i-1)+1:3*i,:]);
end
The returned value is:
var
12-element Array{Float64,1}:
0.0
0.0
-1.06667e5
1.0
1.0
1.0
2100.0
0.0
-0.0
0.0
2125.0
-0.0
Operation with parallel computing
I've been trying to implement the same function in parallel, but without getting the same results.
# addprocs(3)
#everywhere function unifLoad_Parallel(coordSys,dir,load,L,ri)
if coordSys == true
if dir == 1
Q = [load;0;0];
elseif dir == 2
Q = [0;load;0];
elseif dir == 3
Q = [0;0;load];
end
q = ri*Q; # Matrix multiplication (ri -> Array 3x3)
P = q[1]*L/2;
V = q[2]*L/2;
M = -q[3]*L*L/12;
f = [P;V;M];
else
f = [1.0;1.0;1.0];
end
return f
end
running the parallel loop:
var_parallel = SharedArray{Float64}(12);
#parallel for i = 1:length(L)
var_parallel[3*(i-1)+1:3*i] = unifLoad_Parallel(coordSys[i],dir[i],load[i],L[i],r[3*(i-1)+1:3*i,:]);
end
The returned value is:
var_parallel
12-element SharedArray{Float64,1}:
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
On my Julia 0.6.3 the parallel code returns the same result so I am unable to reproduce the problem (also I do not encounter the issue #SalchiPapa reports).
However, I would like to note that this code actually should work faster with threads (I assume that the real problem is much larger). Here is the code you could use (I used an equivalent implementation to your which is a bit shorter - but the only significantly relevant change is that I wrap it in a function which provides dramatic performance gains). The crucial issue that all arrays except var are shared but only read. And var is written but only once at each entry and not read from. This is the case where it is safe to use threading which has a lower overhead.
Here is an example code (you have to define JULIA_NUM_TREADS environment variable before starting Julia and set it to number of threads you want - most probably 4 is what you want):
using Base.Threads
function experiment()
coordSys = [true,false,true,true];
dir = [1,2,3,2];
load = [8,-7.5,7,-8.5];
L = [400,450,600,500];
r = [0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0
0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0
0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0
0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0];
unifLoad(coordSys,dir,load,L,r, i) =
coordSys ? load * L * r[3*(i-1)+1:3*i, dir] .* [0.5, 0.5, -L/12] : [1.0, 1.0, 1.0]
var = zeros(12)
#threads for i = 1:length(L)
var[3*(i-1)+1:3*i] = unifLoad(coordSys[i],dir[i],load[i],L[i],r,i);
end
var
end
Also here is a bit simplified code for parallel processing using similar ideas:
coordSys = SharedArray{Bool}([true,false,true,true]);
dir = SharedArray{Int8}([1,2,3,2]);
load = SharedArray{Float64}([8,-7.5,7,-8.5]);
L = SharedArray{Float64}([400,450,600,500]);
r = SharedArray{Float64}([0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0
0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0
0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0
0.0 0.0 1.0; 0.0 -1.0 0.0; 1.0 0.0 0.0]);
#everywhere unifLoad(coordSys,dir,load,L,r,i) =
coordSys ? load * L * r[3*(i-1)+1:3*i, dir] .* [0.5, 0.5, -L/12] : [1.0, 1.0, 1.0]
vcat(pmap(i -> unifLoad(coordSys[i],dir[i],load[i],L[i],r,i), 1:length(L))...)
Here pmap is mostly used to simplify the code so that you do not need #sync.

f# break function evaluation

I have to minimize a quite complicate function. For the minimization I use the NonLinearProgram from the Extreme Optimization Library. Since there´s no way to find a global minimum, I use different startpoints and choose, then the "best minimum". My problem is there can be some startpoints, which evaluatin can take a very long time. Is there some general way in F# or some special method in Extreme Optimization, to stop the evaluation let´s say after 10 min and just give a list with [nan; nan; nan; nan; nan; nan] back?
let funcFindPara (startpoint:float list) func =
let nlp = new NonlinearProgram(6)
// add the function
nlp.ObjectiveFunction <- (fun x -> func x.[0] x.[1] x.[2] x.[3] x.[4] x.[5])
// add lineare constraints
nlp.AddLinearConstraint("a + d > 0", Vector.Create(1.0, 0.0, 0.0, 1.0, 0.0, 0.0), 1.0e-5, infinity) |> ignore
nlp.AddLinearConstraint("c > 0", Vector.Create(0.0, 0.0, 1.0, 0.0, 0.0, 0.0), 1.0e-5, infinity) |> ignore
nlp.AddLinearConstraint("d > 0", Vector.Create(0.0, 0.0, 0.0, 1.0, 0.0, 0.0), 1.0e-5, infinity) |> ignore
nlp.AddLinearConstraint("gamma > 0", Vector.Create(0.0, 0.0, 0.0, 0.0, 1.0, 0.0), 1.0e-5, infinity) |> ignore
nlp.AddLinearConstraint("0 < rho_infty <= 1", Vector.Create(0.0, 0.0, 0.0, 0.0, 0.0, 1.0), 1.0e-5, 1.0) |> ignore
// add nonlinear constrains
// gamma <= -ln(rho_infty)
nlp.AddNonlinearConstraint((fun (x : Vector) -> x.[4] + log(x.[5])), ConstraintType.LessThanOrEqual, 0.0, (fun (x : Vector) -> fun (y : Vector) ->
y.[0] <- 0.0
y.[1] <- 0.0
y.[2] <- 0.0
y.[3] <- 0.0
y.[4] <- 1.0
y.[5] <- 1.0 / x.[5]
y
)
) |> ignore
// add starting point
nlp.InitialGuess <- Vector.Create(startpoint.[0], startpoint.[1], startpoint.[2], startpoint.[3], startpoint.[4], startpoint.[5])
// solve
let solution = nlp.Solve()
// return list with parameters
List.init 6 (fun index -> solution.[index])
You could wrap the function with async { } and pass that to RunSynchronously along with a timeout:
let withTimeout f timeout defaultValue =
try Async.RunSynchronously((async { return f() }), timeout)
with :? System.TimeoutException -> defaultValue
let longFn() =
System.Threading.Thread.Sleep(5000)
[1.0; 2.0; 3.0]
//Usage
withTimeout longFn 2000 [nan; nan; nan]

Resources