Related
For example, I have kmeans code
program read_from_file
use functions_module
!$ use omp_lib
character(len=2) :: c2
integer :: i, j, k, l, c, d
real, dimension(:,:), allocatable :: r, centroid, new_centro, converge
real, dimension(:), allocatable :: cost
integer,dimension(:),allocatable :: indices,distancereg,cluster
integer :: ios_read = 0
integer :: n = 0
integer :: omega, tid, n_threads
real, dimension(:,:), allocatable :: comparison_value
print *, 'which data index?'
read*, idx
write(c2, '(i2.2)') idx
open(unit=99, file='datatest1.dat', iostat=ios_read)
if (ios_read /= 0) then
print *, "kmeans_data_distrib_"//c2//"_small.dat could not be opened"
! print
end if
!find the maximum lines
do
read(99, *, iostat=ios_read) i, x, y
if (ios_read > 0) then
print *, "something is wrong"
stop
else if (ios_read < 0) then
print *, "end of file reached"
exit
else
n = n+1
end if
end do
rewind(99)
!do i=1,n
open(unit=98, file='rawdata.dat')
allocate(r(2, n))
do i = 1,n
read(99, *, iostat=ios_read) j, x, y
r(1, j) = x
r(2, j) = y
write(98, *) x, y
end do
close(99) ! close kmeans
close(98) ! close rawdatai
print*, 'put k'
read*, k
allocate (comparison_value(2,k))
comparison_value = 0.02
** do l=1,10
call centroid_inits(r, n, k, centroid)
call min_distance(r, n, k, centroid, distance,indices,distancereg)
call new_centroid(r,n,k,centroid,indices,new_centro,omega)
call costfunction(r,n,k,distancereg,indices,new_centro,cluster,cost)
end do
open(unit=99,file="kmeans3_test.dat")
do i = 1, n
write(99,"(2es14.5,i4)") r(:,i),indices(i)
enddo
close(99)
Contains
subroutine centroid_inits(r,n,k,centroid)
real,dimension (:,:),intent(in),allocatable :: r
real,dimension (:,:),intent(out),allocatable:: centroid
real,dimension(k),allocatable::xc(:) ,yc(:)
integer,intent(in) :: n,k
integer :: i
real :: maks_x,maks_y,min_x,min_y
allocate(centroid(2, k))
allocate(xc(k))
allocate(yc(k))
maks_x = maxval(r(1,:))
maks_y = maxval(r(2,:))
min_x = minval(r(1,:))
min_y = minval(r(2,:))
! print *, min_x, maks_x, min_y, maks_y
do i = 1,k
xc (i) = min_x + (maks_x - min_x) * fib_rnd()
yc (i) = min_y + (maks_y - min_y) * fib_rnd()
centroid (1,i) = xc(i)
centroid (2,i) = yc(i)
end do
do i = 1,k
print *, centroid(:,i)
end do
end subroutine centroid_inits
subroutine min_distance(r,n,k,centroid,distance,indices,distancereg)
integer, intent(out):: n,k
real,dimension(:,:),intent(in),allocatable::centroid
real,dimension(:,:),intent(in),allocatable::r
integer,dimension(:),intent(out),allocatable::indices,distancereg
real ::d_min
integer::y,i_min,j,i
integer,parameter :: data_dim=2
allocate (indices(n))
allocate (distancereg(k))
!cost=0.d0
do j=1,n
i_min = -1
d_min=1.d6
! !$ OMP DO
do i=1,k
distance=0.d0
distancereg(i)=0.d0
do y=1,data_dim
distance = distance+abs(r(y,j)-centroid(y,i))
distancereg(i)=distancereg(i)+abs(r(y,j)-centroid(y,i))
end do
if (distance<d_min) then
d_min=distance
i_min=i
end if
end do
!!$OMP END DO
if( i_min < 0 ) print*," found error by assigning k-index to particle ",j
indices(j)=i_min
end do
end subroutine
subroutine new_centroid(r,n,k,centroid,indices,new_centro,omega)
integer, intent(in):: n
real,dimension(:,:),intent(inout),allocatable ::centroid
real,dimension(:,:),intent(in),allocatable ::r
integer,dimension(:),intent(in),allocatable::indices
real,dimension(:,:),intent(out),allocatable:: new_centro
integer,intent(inout)::k
integer :: t,y,j,k_ind
integer,intent(out) :: omega
real,dimension(:),allocatable :: summ
allocate(summ(2))
allocate (new_centro(2,k))
t=2
do k_ind=1,k
omega = count(indices==k_ind)
summ(1)=0
summ(2)=0
do j=1,n
if (indices(j)==k_ind) then
summ(1) =+ r(1,j)
summ(2) =+ r(2,j)
end if
end do
new_centro(1,k_ind) = summ(1)/omega
new_centro(2,k_ind) = summ(2)/omega
end do
centroid = new_centro
!do k_ind=1,k
!print*, 'new centro',new_centro(:,k_ind)
!end do
end subroutine
subroutine costfunction(r,n,k,distancereg,indices,new_centro,cluster,cost)
integer, dimension (:), allocatable, intent(out) :: distancereg, indices
integer, dimension (:), allocatable, intent(out) :: cluster
real, dimension (:,:), allocatable, intent(in) :: r
real, dimension (:,:), intent(in), allocatable :: new_centro
real, dimension(:), intent(out), allocatable :: cost
integer :: i,k
allocate(cluster(k))
allocate(cost(k))
allocate(distancereg(k))
call min_distance(r,n,k,centroid,distance,indices,distancereg)
cluster = 0
do i=1,k
cost(i)=0
cluster(i)=count(indices==i)
cost(i)=(1.0/cluster(i))*distancereg(i)
! print*,cost(i)
end do
print*," total sum of cluster members ",sum(cluster)," vs N ",n
end subroutine
subroutine convergence_value(converge, centroid, new_centro, cost, cluster)
real, dimension (:,:), intent(inout), allocatable :: new_centro
real, dimension (:,:), intent(inout), allocatable :: centroid
real, dimension(:), allocatable, intent(out):: cost
integer, dimension (:), allocatable, intent(out) :: cluster
real, dimension(:,:), intent (inout), allocatable:: converge
allocate(converge(2,k))
call centroid_inits(r, n, k, centroid)
call min_distance(r, n, k, centroid, distance,indices,distancereg)
call new_centroid(r,n,k,centroid,indices,new_centro,omega)
converge = (abs(centroid-new_centro))
print*, 'this is c',converge
end subroutine
end program read_from_file
It runs okay with serial. But I want to apply openmp. I want to each thread doing the same calculation and find which thread have better cost function and time. (all thread doing the clusterization). My attemp and idea is to paralellized the code before encounter the subroutine, that two asterisk. But I do not know if its enough (though I tried it and showing error), and how do make display report of each thread ?
*You might notice from the code that I am a beginner
I wrote the following code, and then tried using OpenMP to parallelise it. However, after I compiled the following OpenMP code using f2py, Python always generates certain errors when I run it. There are no error messages, only that the numbers are a bit off, and whenever I compile it with f2py and run it in Python, it kills the kernel.
I am wondering if this has anything to do with my parallel region. I am always a bit confused about which variables to take private so can anyone observe any errors?
subroutine simulate_omp(m,nt,s0,l,d,a,numthreads,x,y,psi)
!Pedestrian motion model
!input variables:
!n = m^2 = number of students
!nt: number of time steps
!s0: student speed
!l: initial spacing between students
!d: student motion influence by all other students within distance <= d
!a: noise amplitude
!numthreads: number of threads to use in parallel regions
!output variables:
!x,y: all m^2 student paths from i=1 to nt+1
!psi: synchronization parameter, store at all nt+1 times (including initial
condition)
use omp_lib
implicit none
integer, intent(in) :: m,nt,numthreads
real(kind=8), intent(in) :: s0,l,d,a
real(kind=8), dimension(m*m,nt+1), intent(out) :: x,y
real(kind=8), dimension(nt+1), intent(out) :: psi
real(kind=8), dimension(m*m,nt+1) :: xtemp,ytemp,u,v
real(kind=8), dimension(m*m,nt) :: usum,vsum,umean,vmean
real(kind=8) :: r(m*m)
real(kind=8),parameter :: pi = 4*atan(1.0_8)
integer :: i1,j1,k1,i2,j2,k2,count
!$call omp_set_num_threads(numthreads)
! initialize student positions
x = 0.d0
y = 0.d0
k1 = 0
do i1 = 1,m
do j1=1,m
k1 = k1 + 1
x(k1,1) = (j1-1)*l/2 - (m-1)*l/4
y(k1,1) = (i1-1)*l/2 - (m-1)*l/4
end do
end do
x(:,1) = x(:,1)/(m-1)
y(:,1) = y(:,1)/(m-1)
! initialize
xtemp(:,1) = x(:,1)
ytemp(:,1) = y(:,1)
call random_number(r)
u(:,1) = s0*cos(r*2*pi-pi)
v(:,1) = s0*sin(r*2*pi-pi)
psi(1) = sqrt(sum(u(:,1))**2+sum(v(:,1)**2))/dble(m)/dble(m)/s0
do i2 = 1,nt
!$OMP parallel do private(j2,k2,l)
do j2 = 1,m*m
usum(j2,i2) = 0
vsum(j2,i2) = 0
count = 0
!$OMP parallel do reduction(+:usum,vsum,count)
do k2 = 1,m*m
if ((xtemp(k2,i2)-xtemp(j2,i2))**2+(ytemp(k2,i2)-ytemp(j2,i2))**2<=d**2)
then
usum(j2,i2) = usum(j2,i2)+u(k2,i2)
vsum(j2,i2) = vsum(j2,i2)+v(k2,i2)
count = count+1
end if
end do
!$OMP end parallel do
umean(j2,i2) = usum(j2,i2)/dble(count)
vmean(j2,i2) = vsum(j2,i2)/dble(count)
u(j2,i2+1) = s0*cos(atan(vmean(j2,i2)/umean(j2,i2))+a*(r(j2)*2*pi-pi))
v(j2,i2+1) = s0*sin(atan(vmean(j2,i2)/umean(j2,i2))+a*(r(j2)*2*pi-pi))
xtemp(j2,i2+1) = xtemp(j2,i2)+u(j2,i2+1)
ytemp(j2,i2+1) = ytemp(j2,i2)+v(j2,i2+1)
! boundary conditions
if (xtemp(j2,i2+1)>l) then
xtemp(j2,i2+1) = xtemp(j2,i2+1)-2*l
else
if (xtemp(j2,i2+1)<-l) then
xtemp(j2,i2+1) = xtemp(j2,i2+1)+2*l
end if
end if
if (ytemp(j2,i2+1)>l) then
ytemp(j2,i2+1) = ytemp(j2,i2+1)-2*l
else
if (ytemp(j2,i2+1)<-l) then
ytemp(j2,i2+1) = ytemp(j2,i2+1)+2*l
end if
end if
end do
!$OMP end parallel do
psi(i2+1) = sqrt(sum(u(:,i2+1))**2+sum(v(:,i2+1))**2)/dble(m)/dble(m)/s0
end do
x(:,1:nt+1) = xtemp(:,1:nt+1)
y(:,1:nt+1) = ytemp(:,1:nt+1)
end subroutine simulate_omp
The argument l is declared with intent(in) and not modified in the loop so there is no need to declare it private. Below is a suggestion without the outer parallel loop:
subroutine simulate_omp(m,nt,s0,l,d,a,numthreads,x,y,psi)
!Pedestrian motion model
!input variables:
!n = m^2 = number of students
!nt: number of time steps
!s0: student speed
!l: initial spacing between students
!d: student motion influence by all other students within distance <= d
!a: noise amplitude
!numthreads: number of threads to use in parallel regions
!output variables:
!x,y: all m^2 student paths from i=1 to nt+1
!psi: synchronization parameter, store at all nt+1 times (including initial
condition)
use omp_lib
implicit none
integer, intent(in) :: m,nt,numthreads
real(kind=8), intent(in) :: s0,l,d,a
real(kind=8), dimension(m*m,nt+1), intent(out) :: x,y
real(kind=8), dimension(nt+1), intent(out) :: psi
real(kind=8), dimension(m*m,nt+1) :: xtemp,ytemp,u,v
real(kind=8), dimension :: usum,vsum,umean,vmean
real(kind=8) :: r(m*m)
real(kind=8),parameter :: pi = 4*atan(1.0_8)
integer :: i1,j1,k1,i2,j2,k2,count
!$call omp_set_num_threads(numthreads)
! initialize student positions
x = 0.d0
y = 0.d0
k1 = 0
do i1 = 1,m
do j1=1,m
k1 = k1 + 1
x(k1,1) = (j1-1)*l/2 - (m-1)*l/4
y(k1,1) = (i1-1)*l/2 - (m-1)*l/4
end do
end do
x(:,1) = x(:,1)/(m-1)
y(:,1) = y(:,1)/(m-1)
! initialize
xtemp(:,1) = x(:,1)
ytemp(:,1) = y(:,1)
call random_number(r)
u(:,1) = s0*cos(r*2*pi-pi)
v(:,1) = s0*sin(r*2*pi-pi)
psi(1) = sqrt(sum(u(:,1))**2+sum(v(:,1)**2))/dble(m)/dble(m)/s0
do i2 = 1,nt
do j2 = 1,m*m
usum = 0
vsum = 0
count = 0
!$OMP parallel do private(k2), reduction(+:usum,vsum,count)
do k2 = 1,m*m
if ((xtemp(k2,i2)-xtemp(j2,i2))**2+(ytemp(k2,i2)-ytemp(j2,i2))**2<=d**2) then
usum = usum+u(k2,i2)
vsum = vsum+v(k2,i2)
count = count+1
end if
end do
!$OMP end parallel do
umean = usum/dble(count)
vmean = vsum/dble(count)
u(j2,i2+1) = s0*cos(atan(vmean/umean)+a*(r(j2)*2*pi-pi))
v(j2,i2+1) = s0*sin(atan(vmean/umean)+a*(r(j2)*2*pi-pi))
xtemp(j2,i2+1) = xtemp(j2,i2)+u(j2,i2+1)
ytemp(j2,i2+1) = ytemp(j2,i2)+v(j2,i2+1)
! boundary conditions
if (xtemp(j2,i2+1)>l) then
xtemp(j2,i2+1) = xtemp(j2,i2+1)-2*l
else
if (xtemp(j2,i2+1)<-l) then
xtemp(j2,i2+1) = xtemp(j2,i2+1)+2*l
end if
end if
if (ytemp(j2,i2+1)>l) then
ytemp(j2,i2+1) = ytemp(j2,i2+1)-2*l
else
if (ytemp(j2,i2+1)<-l) then
ytemp(j2,i2+1) = ytemp(j2,i2+1)+2*l
end if
end if
end do
psi(i2+1) = sqrt(sum(u(:,i2+1))**2+sum(v(:,i2+1))**2)/dble(m)/dble(m)/s0
end do
x(:,1:nt+1) = xtemp(:,1:nt+1)
y(:,1:nt+1) = ytemp(:,1:nt+1)
end subroutine simulate_omp
You can time it and compare it with the outer loop parallelised using private(j2,k2,umean,vmean,usum,vsum,count), shared(u,v,xtemp,ytemp). Make sure to have OMP_NESTED set to true for the latter tests.
I am trying to get a solution for the Rossler attractor system using RK-4, with parameters a=0.2, b=0.2, c=6 and initial conditions x0=-5.6, y0=0, z0=0. I tried solving using Fortran but the result is only displaying the initial conditions even after 1000 iterations. What mistakes am I making?
implicit none
external rossler
integer::i,j=0,n,nstep
real::a,b,c,y1(3),t0,dt,t1,t2,ya(3),yb(3),yd(3),t,x0,y0,z0,x(1000),y(1000),z(1000),k1(3),k2(3),k3(3),k4(3),h
print *, "enter the values of a,b,c"
read (*,*) a,b,c
print *, "enter the values of x0,y0,z0"
read (*,*) x0,y0,z0
n=3
t0=0.0
h=0.05
ya(1)=x0
ya(2)=y0
ya(3)=z0
nstep=1000
do i=1,nstep
t1=t0
t2=t0+h
call rk4(rossler,t1,t2,1,N,k1,k2,k3,k4,Ya,Y1,Yb)
x(i)=ya(1)
y(i)=ya(2)
z(i)=ya(3)
open (99,file="rossler.txt")
write(99,*) x(i),y(i),z(i)
end do
end program
subroutine rossler(T,Yd,YB,N)
implicit none
integer n
real t,yb(n),yd(n),a,b,c
yd(1)=-yb(2)-yb(3)
Yd(2)=yb(1)+a*yb(2)
Yd(3)=b+yb(3)*(yb(1)-c)
return
end
subroutine rk4(rossler,t1,t2,nstep,N,k1,k2,k3,k4,Ya,Y1,Yb)
implicit none
external rossler
integer nstep,n,i,j
REAL T1,T2,Ya(N),k1(n),k2(n),k3(n),k4(n),H,Y1(N),T,yb(n)
T=T1+(I-1)*H
CALL rossler(T,Yb,Ya,N)
DO J=1,N
k1(j)=YB(J)*H
end do
CONTINUE
CALL rossler(T+0.5*H,Yb,Ya+k1*0.5,N)
DO J=1,N
k2(j)=YB(J)*H
enddo
CONTINUE
CALL rossler(T+0.5*H,Yb,Ya+k2*0.5,N)
DO J=1,N
K3(J)=YB(J)*H
enddo
CONTINUE
CALL rossler(T+H,Yb,Ya+k3,N)
DO J=1,n
K4(J)=YB(J)*H
Y1(J)=Ya(J)+(k1(j)+k4(j)+2.0*(k2(j)+k3(j)))/6.0
enddo
CONTINUE
DO J=1,N
Ya(J)=Y1(j)
enddo
CONTINUE
enddo
RETURN
END
Although the question seems a duplicate of another question, here I am attaching a minimally modified code so that the OP can compare it with the original one. The essential modifications are that I have removed all the unused variables, moved a, b, c, and h to a parameter module, and cleaned up unnecessary statements (like CONTINUE). No newer features of Fortran introduced (including interface block for rossler), so it is hopefully straight-forward to see how the code has been changed.
module params
real :: a, b, c, h
end module
program main
use params, only: a, b, c, h
implicit none
external rossler
integer :: i, n, nstep
real :: t, y(3)
a = 0.2
b = 0.2
c = 5.7
n = 3
t = 0.0
h = 0.05
y(1) = -5.6
y(2) = 0.0
y(3) = 0.0
nstep = 7000
open(99, file="rossler.txt")
do i = 1,nstep
call rk4 ( rossler, t, n, y )
write(99,*) y(1), y(2), y(3)
end do
end program
subroutine rossler ( t, dy, y, n )
use params, only: a, b, c
implicit none
integer n
real t, dy(n), y(n)
dy(1) = -y(2) - y(3)
dy(2) = y(1) + a * y(2)
dy(3) = b + ( y(1) - c ) * y(3)
end
subroutine rk4 ( deriv, t, n, y )
use params, only: h
implicit none
external deriv
integer n, j
real y(n), t, k1(n), k2(n), k3(n), k4(n), d(n)
call deriv ( t, d, y, n )
do j = 1,n
k1(j) = d(j) * h
enddo
call deriv ( t+0.5*h, d, y+k1*0.5, n )
DO j = 1,n
k2(j) = d(j) * h
enddo
call deriv ( t+0.5*h, d, y+k2*0.5, n )
do j = 1,n
k3(j) = d(j) * h
enddo
call deriv ( t+h, d, y+k3, n )
do j = 1,n
k4(j) = d(j) * h
y(j) = y(j) + ( k1(j) + k4(j) + 2.0 * (k2(j) + k3(j)) ) / 6.0
enddo
t = t + h
end
By choosing the parameters as a = 0.2, b = 0.2, c = 5.7 and nstep = 7000, the modified code gave the so-called Rössler attractor, which is very beautiful and appears close in pattern to that displayed in the Wiki page. So with the minimal modifications, I believe the OP will also get a similar picture (it may be interesting to see how the pattern changes depending on parameters).
2D projection of the trajectory onto the xy plane:
The problem here is exactly the same as in another question, although I can no longer vote to close as a duplicate.
To make explicit and add the comments on the question: a, b and c take the place of omega from that question; the subroutine rossler as the function fcn.
An answer to that question addresses how this issue can be resolved.
I have matrix and array. I need to multiply their value. Then find the line with the maximum sum , and bring its number and value . Help me to understand
real, allocatable, dimension(:,:) :: startArr
real, allocatable:: x(:) , maxArr
do i = 1,4
do k=1,4
startArr(1,i)*x(k)
startArr(2,i)*x(k)
startArr(3,i)*x(k)
startArr(4,i)*x(k)
end do
end do
S = startArr(1,1)+ startArr(1,2) + startArr(1,3) + startArr(1,4)
D = startArr(2,1)+ startArr(2,2) + startArr(2,3) + startArr(2,4)
M = startArr(3,1)+ startArr(3,2) + startArr(3,3) + startArr(3,4)
K = startArr(4,1)+ startArr(4,2) + startArr(4,3) + startArr(4,4)
maxArr = (S,D,M,K)
max = S
do i = 1,4
if(maxArr(i)>max)
max = maxArr(i)
end do
I cannot comment on either your algorithm for multiplication is right or not. you have to check it with your mathematics however some parts of your code is wrong. I tried to correct them. this is my own version in Compaq Fortran.
program matrix_Manipulation
implicit none
real, allocatable, dimension(:,:) :: startArr,MultipliedArray
real, allocatable:: x(:) , sumLine(:)
real:: maxValue
integer::i,k
allocate(startArr(4,4),x(4),sumLine(4),MultipliedArray(4,4))
startArr(1,1)=1
startArr(2,1)=2
startArr(3,1)=3
startArr(4,1)=4
startArr(1,2)=5
startArr(2,2)=6
startArr(3,2)=7
startArr(4,2)=8
startArr(1,3)=9
startArr(2,3)=10
startArr(3,3)=11
startArr(4,3)=12
startArr(1,4)=13
startArr(2,4)=14
startArr(3,4)=15
startArr(4,4)=16
x(1)=2
x(2)=0.5
x(3)=8
x(4)=1
do i = 1,4
do k=1,4
MultipliedArray(1,i)=startArr(1,i)*x(k)
MultipliedArray(1,i)=startArr(2,i)*x(k)
MultipliedArray(1,i)=startArr(3,i)*x(k)
MultipliedArray(1,i)=startArr(4,i)*x(k)
end do
end do
sumLine(1) = sum(MultipliedArray(1,1:4))
sumLine(2) = sum(MultipliedArray(2,1:4))
sumLine(3) = sum(MultipliedArray(3,1:4))
sumLine(4) = sum(MultipliedArray(4,1:4))
maxValue = MAXVAL(sumLine)
end program matrix_Manipulation
the 'MAXVAL' is a built-in function to mind the maximum element of an array or matrix. That is in Comaq Fortran and it might be a different funtion in gFortran or ther compiler.
I want to allocate memory for a matrix filled with double elements with Fortran 90, below is the corresponding C code:
int dim = 1024;
double *M = (double *)malloc(dim*dim*sizeof(double));
I wrote the code below but I could not access M(i) with i>=100:
program matrix
INTEGER :: i,d
CHARACTER(len=32) :: arg
REAL*8 M(*)
POINTER(ptr_M, M)
d=0
if(iargc() == 1) then
call getarg(1, arg)
read(arg, '(I10)') d
end if
print '("Dimension=", i6)', d
!allocate and init matrix
ptr_M = malloc(d*d*8)
do i=1,d*d
M(i) = i
end do
print '("M(i)=", f7.4)', M(100)
call free(ptr_M)
end program matrix
what's wrong?
Thanks to all, here is my final solution:
program matrix
IMPLICIT NONE
REAL, ALLOCATABLE :: M(:,:)
INTEGER :: i, j, d
CHARACTER(len=32) :: arg
!specify dimension with programm parameter
if(iargc() == 1) then
call getarg(1, arg)
read(arg, '(I10)') d
end if
!create and init matrix
ALLOCATE (M(d, d))
do i=1,d
do j=1,d
M(i, j) = (i - 1)*d+j
write (*,*) "M(",i,",",j,")=",M(i, j)
end do
end do
DEALLOCATE (M)
end program matrix
Using an ALLOCATABLE array, you can allocate a matrix with 100 rows and 200 columns as follows:
program xalloc
real, allocatable :: x(:,:)
allocate(x(100,200))
end program xalloc