compiler flag:
/nologo /debug:full /O3 /QxHost /assume:buffered_io /heap-arrays0 /Qipo /debug-parameters:all /module:“x64\Release\” /object:“x64\Release\” /Fd"x64\Release\vc150.pdb" /traceback /libs:static /threads /Qmkl:cluster /c
linker flag:
/OUT:“x64\Release\stochastic_RK.exe” /INCREMENTAL:NO /NOLOGO /MANIFEST /MANIFESTFILE:“x64\Release\stochastic_RK.exe.intermediate.manifest” /MANIFESTUAC:“level=‘asInvoker’ uiAccess=‘false’” /DEBUG /PDB:“D:\Works CHLA\stochastic_RK\stochastic_RK\stochastic_RK\x64\Release\stochastic_RK.pdb” /SUBSYSTEM:CONSOLE /LARGEADDRESSAWARE /IMPLIB:“D:\Works CHLA\stochastic_RK\stochastic_RK\stochastic_RK\x64\Release\stochastic_RK.lib”
Basically it is just -O3 -QxHost -heaparray
Other flags are not important.
I solve two equations, and solve one equations, I am using the same flags.
I simplified my cases to the following,
I solve one equation dy/dt = 1
I solve 100 the same equations, i from 1 to 100, dy_i/dt =1
Then I should expect it took 100 times longer than solving just one equation, right?
In my code I did things like that and I expect solving 2 the same equations should cost just 2 times than solving one such equation. But I got a factor of 7 - 8 or so.
So I think perhaps there are some unnecessary memory allocation or deallocation, or some temp arrays are created. But I have not identify where.
Do you think the following code may generate temp arrays for x(:,i-1) and x(:,i)?
do i = 1, n
t = t0 + (real(i)*(tn-t0))/real(n)
call rk4_ti_step_vec_mod1 ( x(:,i-1), t, h, q, fi_vec2, gi_vec2, nd, x(:,i) )
end do
it is in the subroutine,
subroutine test02 ( xout )
use constants
use stochastic_rk
implicit none
interface !! Declare function
function fi_vec2 ( x ) result (f)
use constants
real ( kind = r8 ) :: f(2)
real ( kind = r8 ) :: x(2)
end function
function gi_vec2 ( x ) result (g)
use constants
real ( kind = r8 ) :: g(2)
real ( kind = r8 ) :: x(2)
end function
end interface
real ( kind = r8 ), external :: fi_vec21, fi_vec22, gi_vec21, gi_vec22
integer ( kind = i8 ), parameter :: n = 1000
real ( kind = r8 ) h
integer ( kind = i8 ) :: i, itot, istart, istep
real ( kind = r8 ) q
integer ( kind = i8 ) :: seed
real ( kind = r8 ) t
real ( kind = r8 ), parameter :: t0 = 0.0
real ( kind = r8 ), parameter :: tn = 1.0
real ( kind = r8 ) :: x(2,0:n)
real (kind = r8) :: xout(5)
integer ( kind = i8 ) :: nd ! dimension, =2
h = ( tn - t0 ) / real(n)
q = 1.0
seed = 123456789
i = 0
t = t0
x(1,i) = 20.0_r8
x(2,i) = one
nd = 2
!write ( *, '(a)' ) ' '
!write ( *, '(a)' ) ' I T X'
!write ( *, '(a)' ) ' '
!write ( *, '(2x,i8,2x,f14.6,2x,g14.6)' ) i, t, x(i)
do i = 1, n
t = t0 + (real(i)*(tn-t0))/real(n)
call rk4_ti_step_vec_mod1 ( x(:,i-1), t, h, q, fi_vec2, gi_vec2, nd, x(:,i) )
end do
itot = i-1
istart = itot/5
istep = istart
xout(1:5:1) = x(1,istart:itot:istep)
!write ( *, '(2x,i8,2x,f14.6,2x,g14.6)' ) i-1, t, xout
return
end subroutine test02
with functions,
function fi_vec2 ( x ) result (f)
use constants
implicit none
real ( kind = r8 ) :: f(2)
real ( kind = r8 ), intent(in) :: x(2)
f(1) = -x(2)*x(1)
f(2) = -x(2) + one
return
end function fi_vec2
function gi_vec2 ( x ) result(g)
use constants
implicit none
real ( kind = r8 ), parameter :: gp(2) = [one,zero]
real ( kind = r8 ), intent(in) :: x(2)
real ( kind = r8 ) :: g(2)
g=gp
return
end function gi_vec2