Dear all,
I have problem in my code, which I arrange it into a question with an extremely simply minimal working example.
Thing is this, I have a function whose value is a 2D array. Here is it,
function fi_vec ( x )
real ( kind = r8 ) :: fi_vec(n)
real ( kind = r8 ), intent(in) :: x(n)
fi_vec(1) = -x(2)*x(1)
fi_vec(2) = -x(2) + 1.0_r8
return
end function fi_vec
Now I have a subroutine whose argument f is the same array as the above function, here it is,
subroutine fi_vec_sub ( x, f )
implicit none
real ( kind = r8 ) :: f(n)
real ( kind = r8 ) :: x(n)
f(1) = -x(2)*x(1)
f(2) = -x(2) + 1.0_r8
return
end
So the function fi_vec ( x ) returns the same value as the f argument in the subroutine fi_vec_sub ( x, f ) .
In principle, I think that the speed of the function and the subroutine should be the same, right?
However, I found the function is much slower (10x) than the subroutine, it seems it function just need to have many more memory allocate and deallocate.
Question is, why?
See the simple code below. You can just compile and run.
I use intel fortran, version is OneApi 2021.3, flag is
-O3 -xHost -heap-arrays
Thank you very much in advance!
module fg
implicit none
integer, private, parameter :: i4=selected_int_kind(9)
integer, private, parameter :: i8=selected_int_kind(15)
integer, private, parameter :: r8=selected_real_kind(15,9)
integer(kind=i8), private, save :: n
contains
subroutine fg_init(nin)
integer ( kind = i8 ) :: nin
n = nin
return
end subroutine
function fi_vec ( x )
real ( kind = r8 ) :: fi_vec(n)
real ( kind = r8 ), intent(in) :: x(n)
fi_vec(1) = -x(2)*x(1)
fi_vec(2) = -x(2) + 1.0_r8
return
end function fi_vec
subroutine fi_vec_sub ( x, f )
implicit none
real ( kind = r8 ) :: f(n)
real ( kind = r8 ) :: x(n)
f(1) = -x(2)*x(1)
f(2) = -x(2) + 1.0_r8
return
end
end module fg
program main
use fg
implicit none
integer, parameter :: i4=selected_int_kind(9)
integer, parameter :: i8=selected_int_kind(15)
integer, parameter :: r8=selected_real_kind(15,9)
integer(kind=i8) :: np,i,n
real(kind=r8) :: t0, t1, x(2),y(2)
n=2
call fg_init(n)
np = 10**9
call CPU_TIME(t0)
do i=1,np
x = x + 1.0_r8
call fi_vec_sub(x, y)
enddo
call CPU_TIME(t1)
write(6,*) 'time for subroutine which return 2D array = ', t1 - t0
call CPU_TIME(t0)
do i=1,np
x = x + 1.0_r8
y = fi_vec(x)
enddo
call CPU_TIME(t1)
write(6,*) 'time for function whose value is a 2D array = ', t1 - t0
end program main
Result is,
time for subroutine which return a 2D array = 6.250000000000000E-002
time for function whose value is a 2D array = 1.12500000000000
**Thank you @Beliavsky , yes add print y the time are the same. **
But in my real code, I did find using function and subroutine, the timing is different.
Below the the real code, again using intel Fortran should just compile and run just fine.
gfortran need to slightly work around a little bit by addind _i8 at the error places but it should be trivial.
test02 is the function method which cost 0.8s or so.
test03 is subroutine method which cost 2.5s or so.
More details is here in the post,
Why a function returns an array is much slower than a subroutine returns an array? (real MWE included) - #8 by CRquantum
Around line 300,
ks(:,j) = h * ( fi_in(xs(:,j)) + gi_in(xs(:,j))*warray(:,j) )
This part caused the slow down. fi_in and gi_in are functions return 2d arrays.
The code is based on,
https://people.math.sc.edu/Burkardt/f_src/stochastic_rk/stochastic_rk.html
However I extended John’s 1 ODE version to 2 ODEs , actually can extend to any number of ODEs. The equations are in Kasdin’s 1995 SDE papers.
module constants
implicit none
integer, public, parameter :: i4=selected_int_kind(9)
integer, public, parameter :: i8=selected_int_kind(15)
integer, public, parameter :: r8=selected_real_kind(15,9)
real(kind=r8), public, parameter :: zero=0.0_r8,one=1.0_r8,two=2.0_r8,three=3.0_r8,four=4.0_r8 &
,five=5.0_r8,six=6.0_r8,seven=7.0_r8,eight=8.0_r8,nine=9.0_r8 &
,ten=10.0_r8,tenth=.1_r8,half=.5_r8,third=1.0_r8/3.0_r8,sixth=1.0_r8/6.0_r8 &
,pi=4.0_r8*atan(1.0_r8) &
,normpdf_factor=1.0_r8/sqrt(8.0_r8*atan(1.0_r8)) ! 1/sqrt(2 pi)
complex(kind=r8), public, parameter :: czero=(0.0_r8,0.0_r8),ci=(0.0_r8,1.0_r8),cone=(1.0_r8,0.0_r8)
end module constants
module fg
use constants
implicit none
integer(kind=i8), private, save :: n
contains
subroutine fg_init(nin)
integer ( kind = i8 ) :: nin
n = nin
return
end subroutine
subroutine fi_vec ( x, f )
use constants
implicit none
real ( kind = r8 ) :: f(n)
real ( kind = r8 ) :: x(n)
f(1) = -x(2)*x(1)
f(2) = -x(2) + one
return
end
subroutine gi_vec ( x, g )
use constants
implicit none
real ( kind = r8 ), parameter :: gp(2) = [one,zero]
real ( kind = r8 ) :: x(n)
real ( kind = r8 ) :: g(n)
g = gp
return
end
function func_fi_vec ( x )
use constants
implicit none
real ( kind = r8 ) :: func_fi_vec(n)
real ( kind = r8 ), intent(in) :: x(n)
func_fi_vec(1) = -x(2)*x(1)
func_fi_vec(2) = -x(2) + one
return
end
function func_gi_vec ( x )
use constants
implicit none
real ( kind = r8 ), parameter :: gp(2) = [one,zero]
real ( kind = r8 ), intent(in) :: x(n)
real ( kind = r8 ) :: func_gi_vec(2)
func_gi_vec = gp
return
end
end module fg
module random2
implicit none
integer, private, parameter :: i4=selected_int_kind(9)
integer, private, parameter :: i8=selected_int_kind(15)
integer, private, parameter :: r8=selected_real_kind(15,9)
contains
subroutine ran1(rn,irn)
! multiplicative congruential with additive constant
integer(kind=i8), parameter :: mask24 = ishft(1_8,24)-1
integer(kind=i8), parameter :: mask48 = ishft(1_8,48_8)-1_8
real(kind=r8), parameter :: twom48=2.0d0**(-48)
integer(kind=i8), parameter :: mult1 = 44485709377909_8
integer(kind=i8), parameter :: m11 = iand(mult1,mask24)
integer(kind=i8), parameter :: m12 = iand(ishft(mult1,-24),mask24)
integer(kind=i8), parameter :: iadd1 = 96309754297_8
integer(kind=i8) :: irn
real(kind=r8) :: rn
integer(kind=i8) :: is1,is2
is2 = iand(ishft(irn,-24),mask24)
is1 = iand(irn,mask24)
irn = iand(ishft(iand(is1*m12+is2*m11,mask24),24)+is1*m11+iadd1,mask48)
rn = ior(irn,1_8)*twom48
return
end subroutine ran1
subroutine ran2(rn,irn)
! multiplicative congruential with additive constant
integer(kind=i8), parameter :: mask24 = ishft(1_8,24)-1
integer(kind=i8), parameter :: mask48 = ishft(1_8,48)-1
real(kind=r8), parameter :: twom48=2.0d0**(-48)
integer(kind=i8), parameter :: mult2 = 34522712143931_8
integer(kind=i8), parameter :: m21 = iand(mult2,mask24)
integer(kind=i8), parameter :: m22 = iand(ishft(mult2,-24),mask24)
integer(kind=i8), parameter :: iadd2 = 55789347517_8
integer(kind=i8) :: irn
real(kind=r8) :: rn
integer(kind=i8) :: is1,is2
is2 = iand(ishft(irn,-24),mask24)
is1 = iand(irn,mask24)
irn = iand(ishft(iand(is1*m22+is2*m21,mask24),24)+is1*m21+iadd2,mask48)
rn = ior(irn,1_8)*twom48
return
end subroutine ran2
end module random2
module random
implicit none
integer, private, parameter :: i4=selected_int_kind(9)
integer, private, parameter :: i8=selected_int_kind(15)
integer, private, parameter :: r8=selected_real_kind(15,9)
integer(kind=i8), private, parameter :: mask48 = ishft(1_8,48)-1
integer(kind=i8), private, save :: irn = 1_8,irnsave
contains
function randn(n)
! return an array of random variates (0,1)
use random2
integer(kind=i8) :: n
real(kind=r8), dimension(n) :: randn
integer(kind=i8) :: i
do i=1,n
call ran1(randn(i),irn)
enddo
return
end function randn
function gaussian(n)
! return an array of gaussian random variates with variance 1
integer(kind=i8) :: n
real(kind=r8), dimension(n) :: gaussian
real(kind=r8), dimension(2*((n+1)/2)) :: rn
real(kind=r8) :: rn1,rn2,arg,x1,x2,x3
real(kind=r8), parameter :: pi=4.0d0*atan(1.0_r8)
integer(kind=i8) :: i
rn=randn(size(rn,kind=i8))
do i=1,n/2
rn1=rn(2*i-1)
rn2=rn(2*i)
arg=2.0_r8*pi*rn1
x1=sin(arg)
x2=cos(arg)
x3=sqrt(-2.0_r8*log(rn2))
gaussian(2*i-1)=x1*x3
gaussian(2*i)=x2*x3
enddo
if (mod(n,2).ne.0) then
rn1=rn(n)
rn2=rn(n+1)
arg=2.0_r8*pi*rn1
x2=cos(arg)
x3=sqrt(-2.0_r8*log(rn2))
gaussian(n)=x2*x3
endif
return
end function gaussian
subroutine setrn(irnin)
! set the seed
integer(kind=i8) :: irnin
integer(kind=i4) :: n
integer(kind=i4), allocatable :: iseed(:)
irn=iand(irnin,mask48)
call savern(irn)
call RANDOM_SEED(SIZE=n)
allocate(iseed(n))
iseed=int(irnin)
call RANDOM_SEED(PUT=iseed) ! set the internal ran function's seed.
return
end subroutine setrn
subroutine savern(irnin)
! save the seed
integer(kind=i8) :: irnin
irnsave=irnin
return
end subroutine savern
end module random
module stats
use constants
implicit none
contains
subroutine stat2(x,xave,xvar)
use constants
real (kind=r8) :: x(:,:),val(5),val2(5),xvar(5),xave(5)
integer (kind =i8) :: i,j,xsize,np
val=zero
val2=zero
np = size(x,2)
do i=1,np
do j=1,5
val(j) = val(j) + x(j,i)
val2(j) = val2(j) + x(j,i)**2
enddo
enddo
xave=val/np
xvar=abs( val2/np - xave**2 )
return
end subroutine stat2
end module stats
module stochastic_rk
use constants
implicit none
contains
subroutine rk4_ti_vec ( x0, t0, tn, nstep, q, h, sigma, normal, fi_in, gi_in, n, x )
! https://stackoverflow.com/questions/32809769/how-to-pass-subroutine-names-as-arguments-in-fortran
use random
use fg
implicit none
integer(kind = i8), intent(in) :: nstep
integer(kind = i8), intent(in) :: n
procedure(fi_vec) :: fi_in
procedure(gi_vec) :: gi_in
real(kind = r8), intent(in) :: q,h
real(kind = r8), intent(in) :: sigma(4)
real(kind = r8), intent(in) :: x0(n)
real(kind = r8), intent(in) :: t0, tn
real(kind = r8), intent(in) :: normal(n,4,nstep)
real ( kind = r8 ), parameter :: alphas(4) = [ 0.47012396888046_r8 &
,0.36597075368373_r8 &
,0.08906615686702_r8 &
,0.07483912056879_r8 ]
real(kind = r8), parameter :: as(4,4) = reshape([ &
& 0.0_r8, 0.0_r8, 0.0_r8, 0.0_r8, &
& 2.71644396264860_r8, 0.0_r8, 0.0_r8, 0.0_r8, &
& -6.95653259006152_r8, 0.78313689457981_r8, 0.0_r8, 0.0_r8, &
& 0.0_r8, 0.48257353309214_r8, 0.26171080165848_r8, 0.0_r8 ], [4,4])
real(kind = r8), parameter :: qs(4) = [ 2.12709852335625_r8, &
& 2.73245878238737_r8, &
& 11.22760917474960_r8, &
& 13.36199560336697_r8 ]
real(kind = r8) :: ks(n,4)
real(kind = r8) :: ts(n,4)
real(kind = r8) :: ws(n,4)
real(kind = r8) :: xs(n,4)
real(kind = r8) :: warray(n,4)
real(kind = r8) :: ak(n)
integer(kind = i8) :: i,j,k
real(kind = r8), intent(out) :: x(n,0:nstep)
real(kind = r8) :: xstar(n)
real ( kind = r8 ) :: f(n), g(n)
x(:,0) = x0
do k = 1, nstep
!t = t0 + (k*(tn-t0))/n
xstar = x(:,k-1)
do j=1,4
warray(:,j) = normal(:,j,k)*sigma(j) !gaussian(2)*sqrt(qs(j)*qoh)
do concurrent (i=1:n)
ak(i)=dot_product(as(:j-1,j), ks(i,:j-1))
enddo
xs(:,j) = x(:,k-1) + ak
call fi_in( xs(:,j) ,f )
call gi_in( xs(:,j) ,g )
ks(:,j) = h * ( f + g*warray(:,j) )
xstar = xstar + alphas(j)*ks(:,j)
enddo
x(:,k) = xstar
enddo
return
end subroutine rk4_ti_vec
subroutine rk4_ti_vec_func ( x0, t0, tn, nstep, q, h, sigma, normal, fi_in, gi_in, n, x )
! https://stackoverflow.com/questions/32809769/how-to-pass-subroutine-names-as-arguments-in-fortran
use random
use fg
implicit none
integer(kind = i8), intent(in) :: nstep
integer(kind = i8), intent(in) :: n
procedure(func_fi_vec) :: fi_in
procedure(func_gi_vec) :: gi_in
real(kind = r8), intent(in) :: q,h
real(kind = r8), intent(in) :: sigma(4)
real(kind = r8), intent(in) :: x0(n)
real(kind = r8), intent(in) :: t0, tn
real(kind = r8), intent(in) :: normal(n,4,nstep)
real ( kind = r8 ), parameter :: alphas(4) = [ 0.47012396888046_r8 &
,0.36597075368373_r8 &
,0.08906615686702_r8 &
,0.07483912056879_r8 ]
real(kind = r8), parameter :: as(4,4) = reshape([ &
& 0.0_r8, 0.0_r8, 0.0_r8, 0.0_r8, &
& 2.71644396264860_r8, 0.0_r8, 0.0_r8, 0.0_r8, &
& -6.95653259006152_r8, 0.78313689457981_r8, 0.0_r8, 0.0_r8, &
& 0.0_r8, 0.48257353309214_r8, 0.26171080165848_r8, 0.0_r8 ], [4,4])
real(kind = r8), parameter :: qs(4) = [ 2.12709852335625_r8, &
& 2.73245878238737_r8, &
& 11.22760917474960_r8, &
& 13.36199560336697_r8 ]
real(kind = r8) :: ks(n,4)
real(kind = r8) :: ts(n,4)
real(kind = r8) :: ws(n,4)
real(kind = r8) :: xs(n,4)
real(kind = r8) :: warray(n,4)
real(kind = r8) :: ak(n)
integer(kind = i8) :: i,j,k
real(kind = r8), intent(out) :: x(n,0:nstep)
real(kind = r8) :: xstar(n)
real ( kind = r8 ) :: f(n), g(n)
x(:,0) = x0
do k = 1, nstep
!t = t0 + (k*(tn-t0))/n
xstar = x(:,k-1)
do j=1,4
warray(:,j) = normal(:,j,k)*sigma(j) !gaussian(2)*sqrt(qs(j)*qoh)
do concurrent (i=1:n)
ak(i)=dot_product(as(:j-1,j), ks(i,:j-1))
enddo
xs(:,j) = x(:,k-1) + ak
ks(:,j) = h * ( fi_in(xs(:,j)) + gi_in(xs(:,j))*warray(:,j) )
xstar = xstar + alphas(j)*ks(:,j)
enddo
x(:,k) = xstar
enddo
return
end subroutine rk4_ti_vec_func
end module stochastic_rk
module tests
implicit none
contains
subroutine test03
use constants
use stochastic_rk
use random
use fg
use stats
implicit none
integer ( kind = i8 ), parameter :: n = 1000, np = 10000, nd =2
real(kind = r8), parameter :: qs(4) = [ 2.12709852335625_r8, &
& 2.73245878238737_r8, &
& 11.22760917474960_r8, &
& 13.36199560336697_r8 ]
real ( kind = r8 ) h
integer ( kind = i8 ) :: i, itot, istart, istep, j, k
real ( kind = r8 ) q
integer ( kind = i8 ) :: seed
real ( kind = r8 ) t
real ( kind = r8 ), parameter :: t0 = zero
real ( kind = r8 ), parameter :: tn = one
real ( kind = r8 ) :: x0(nd),x(nd,0:n)
real (kind = r8) :: xout(5,np)
integer ( kind = i8 ) :: normald
real (kind = r8) :: normal(nd,4,n,np)
real(kind = r8) :: qoh,sigma(4)
real ( kind = r8 ) :: t1,t2
real (kind=r8) :: xvar(5),xave(5)
write ( *, '(a)' ) ' '
write ( *, '(a)' ) 'STOCHASTIC_RK_TEST 03 ------------------------------------'
h = ( tn - t0 ) / n
q = 1.0_r8
!seed = 123456789
qoh = q / h
sigma=sqrt(qs*qoh)
normald = nd*4*n*np
normal = reshape(gaussian(normald),shape(normal))
x0(1) = 20.0_r8
x0(2) = one
itot = n
istart = itot/5
istep = istart
call CPU_TIME(t1)
do j = 1, np
!i = 0
!t = t0
call rk4_ti_vec_func(x0, t0, tn, n, q, h, sigma, normal(:,:,:,j), func_fi_vec, func_gi_vec, nd, x)
xout(1:5:1,j) = x(1,istart:itot:istep)
!write ( *, '(2x,i8,2x,f14.6,2x,g14.6)' ) i-1, t, xout
enddo
call CPU_TIME(t2)
write(6,'(''Time cost = '',t20,g12.5,'' sec'')') t2-t1
! statistics
call stat2(xout,xave,xvar)
write(6,'(''step size : '',t20,a2,i5)') '1/',n
write(6,'(''Np: '',t20,g12.5)') np
write(6,'('' '')')
write(6,'(''Simulated Mean at t=0.2:1.0:0.2 is: '',t50,5(g12.5,1x))') (xave(k),k=1,5)
write(6,'(''Theoretical Mean at t=0.2:1.0:0.2 is: '',t50,5(g12.5,1x))') ( 20.0*exp(-k*0.2),k=1,5 )
write(6,'('' '')')
write(6,'(''Simulated Variance at t=0.2:1.0:0.2 is: '',t50,5(g12.5,1x))') (xvar(k),k=1,5)
write(6,'(''Theoretical Variance at t=0.2:1.0:0.2 is: '',t50,5(g12.5,1x))') ( 0.5*(1.0-exp(-2*k*0.2)),k=1,5 )
write ( *, '(a)' ) ' ------------------------------------ '
return
end subroutine test03
subroutine test02
use constants
use stochastic_rk
use random
use fg
use stats
implicit none
integer ( kind = i8 ), parameter :: n = 1000, np = 10000, nd =2
real(kind = r8), parameter :: qs(4) = [ 2.12709852335625_r8, &
& 2.73245878238737_r8, &
& 11.22760917474960_r8, &
& 13.36199560336697_r8 ]
real ( kind = r8 ) h
integer ( kind = i8 ) :: i, itot, istart, istep, j, k
real ( kind = r8 ) q
integer ( kind = i8 ) :: seed
real ( kind = r8 ) t
real ( kind = r8 ), parameter :: t0 = zero
real ( kind = r8 ), parameter :: tn = one
real ( kind = r8 ) :: x0(nd),x(nd,0:n)
real (kind = r8) :: xout(5,np)
integer ( kind = i8 ) :: normald
real (kind = r8) :: normal(nd,4,n,np)
real(kind = r8) :: qoh,sigma(4)
real ( kind = r8 ) :: t1,t2
real (kind=r8) :: xvar(5),xave(5)
write ( *, '(a)' ) ' '
write ( *, '(a)' ) 'STOCHASTIC_RK_TEST 02 ------------------------------------'
h = ( tn - t0 ) / n
q = 1.0_r8
!seed = 123456789
qoh = q / h
sigma=sqrt(qs*qoh)
normald = nd*4*n*np
normal = reshape(gaussian(normald),shape(normal))
x0(1) = 20.0_r8
x0(2) = one
itot = n
istart = itot/5
istep = istart
call CPU_TIME(t1)
do j = 1, np
!i = 0
!t = t0
call rk4_ti_vec(x0, t0, tn, n, q, h, sigma, normal(:,:,:,j), fi_vec, gi_vec, nd, x)
xout(1:5:1,j) = x(1,istart:itot:istep)
!write ( *, '(2x,i8,2x,f14.6,2x,g14.6)' ) i-1, t, xout
enddo
call CPU_TIME(t2)
write(6,'(''Time cost = '',t20,g12.5,'' sec'')') t2-t1
! statistics
call stat2(xout,xave,xvar)
write(6,'(''step size : '',t20,a2,i5)') '1/',n
write(6,'(''Np: '',t20,g12.5)') np
write(6,'('' '')')
write(6,'(''Simulated Mean at t=0.2:1.0:0.2 is: '',t50,5(g12.5,1x))') (xave(k),k=1,5)
write(6,'(''Theoretical Mean at t=0.2:1.0:0.2 is: '',t50,5(g12.5,1x))') ( 20.0*exp(-k*0.2),k=1,5 )
write(6,'('' '')')
write(6,'(''Simulated Variance at t=0.2:1.0:0.2 is: '',t50,5(g12.5,1x))') (xvar(k),k=1,5)
write(6,'(''Theoretical Variance at t=0.2:1.0:0.2 is: '',t50,5(g12.5,1x))') ( 0.5*(1.0-exp(-2*k*0.2)),k=1,5 )
write ( *, '(a)' ) ' ------------------------------------ '
return
end subroutine test02
end module tests
program main
use random
use random2
use constants
use tests
use stochastic_rk
use fg
implicit none
integer(kind=i8) :: seed,n
seed = 123456789
call setrn(int(seed,8))
n=2
call fg_init(n)
call test02
call test03
stop ('Program end normally.')
end program main
Results is
STOCHASTIC_RK_TEST 02 ------------------------------------
Time cost = 1.0469 sec
step size : 1/ 1000
Np: 10000
Simulated Mean at t=0.2:1.0:0.2 is: 16.368 13.401 10.972 8.9760 7.3452
Theoretical Mean at t=0.2:1.0:0.2 is: 16.375 13.406 10.976 8.9866 7.3576
Simulated Variance at t=0.2:1.0:0.2 is: 0.16398 0.27954 0.35563 0.40265 0.43186
Theoretical Variance at t=0.2:1.0:0.2 is: 0.16484 0.27534 0.34940 0.39905 0.43233
------------------------------------
STOCHASTIC_RK_TEST 03 ------------------------------------
Time cost = 2.3594 sec
step size : 1/ 1000
Np: 10000
Simulated Mean at t=0.2:1.0:0.2 is: 16.377 13.406 10.972 8.9837 7.3610
Theoretical Mean at t=0.2:1.0:0.2 is: 16.375 13.406 10.976 8.9866 7.3576
Simulated Variance at t=0.2:1.0:0.2 is: 0.16249 0.27552 0.34895 0.40209 0.43566
Theoretical Variance at t=0.2:1.0:0.2 is: 0.16484 0.27534 0.34940 0.39905 0.43233
------------------------------------
Program end normally.