I’m trying to see if using elemental functions can improve performance of my code, but my example does not work as intended.
I would like to compute a matrix defined as
F(a’,a) = \log(cash - a’)
where cash = (1+r)a + wz. If cash - a’ \leq 0, then F is equal to a large negative number.
I calculate the matrix F in three different ways
- Calling the function
f_returnwithin two loops - Calling the function
f_return_vecusing array syntax: I pass a vector and the function returns a vector with the same shape. Inside the function I use the intrinsicmerge(but I could usewhere,forallor other constructs, I don’t think it is relevant). - Calling the function
f_return_elusing elemental function.
All three should give the same result of course but the third method, based on elemental syntax, fails. Any idea why?
Thanks!
! Compute F(a',a) = log(cash - a') where cash = (1+r)a + wz
! If cash - a' <= 0, return a large negative number
module mymod
implicit none
contains
pure function f_return(cash,aprime) result(F)
! Scalar-syntax version with scalar input and output
implicit none
real(8), intent(in) :: cash, aprime
real(8) :: F
real(8) :: consumption
consumption = cash - aprime
if (consumption > 0.0d0) then
F = log(consumption)
else
F = -1.0d10
endif
end function f_return
pure function f_return_vec(cash,aprime) result(F)
! Array-syntax version with vector input and output
implicit none
real(8), intent(in) :: cash
real(8), intent(in) :: aprime(:)
real(8) :: F(size(aprime))
real(8) :: consumption(size(aprime))
consumption = cash - aprime
F = merge(log(consumption), -1.0d10, consumption > 0.0d0)
end function f_return_vec
elemental pure function f_return_el(cash,aprime) result(F)
! Elemental version with all inputs/output as scalars
implicit none
real(8), intent(in) :: cash
real(8), intent(in) :: aprime
real(8) :: F
real(8) :: consumption
consumption = cash - aprime
F = merge(log(consumption), -1.0d10, consumption > 0.0d0)
!if (consumption > 0.0d0) then
! F = log(consumption)
!else
! F = -1.0d10
!endif
end function f_return_el
end module mymod
program main
use mymod, only: f_return, f_return_vec, f_return_el
implicit none
integer, parameter :: n_a = 4000
real(8), parameter :: a_min = 0.0d0, a_max = 20.0d0
real(8), parameter :: w=1.0d0, z=1.0d0
integer :: i, a_c, ap_c
real(8) :: cash, step_size, diff, diff3
real(8) :: t_start1, t_end1, t_start2, t_end2, t_start3, t_end3
real(8) :: a_grid(n_a)
real(8), allocatable :: ret_matrix1(:,:), ret_matrix2(:,:), ret_matrix3(:,:)
! Create equally spaced grid for assets
step_size = (a_max-a_min)/(real(n_a-1,8))
do i = 1,n_a
a_grid(i) = a_min + step_size*(i - 1)
enddo
allocate(ret_matrix1(n_a,n_a), ret_matrix2(n_a,n_a), ret_matrix3(n_a,n_a))
! Measure time for three different ways to evaluate the
call cpu_time(t_start1)
do a_c = 1,n_a
do ap_c = 1,n_a
cash = (1.0d0+a_grid(a_c)) + w*z
! ret_matrix(a',a) gives the return for asset today a and asset tomorrow a'
ret_matrix1(ap_c,a_c) = f_return(cash,a_grid(ap_c))
enddo
enddo
call cpu_time(t_end1)
call cpu_time(t_start2)
do a_c = 1,n_a
cash = (1.0d0+a_grid(a_c)) + w*z
! ret_matrix(a',a) gives the return for asset today a and asset tomorrow a'
ret_matrix2(:,a_c) = f_return_vec(cash,a_grid)
enddo
call cpu_time(t_start2)
call cpu_time(t_start3)
do a_c = 1,n_a
cash = (1.0d0+a_grid(a_c)) + w*z
! ret_matrix(a',a) gives the return for asset today a and asset tomorrow a'
ret_matrix3(:,a_c) = f_return_el(cash,a_grid)
enddo
call cpu_time(t_start3)
diff = maxval(abs(ret_matrix1 - ret_matrix2))
diff3 = maxval(abs(ret_matrix1 - ret_matrix3))
write(*,*) 'Maximum difference between scalar and vectorized evaluations: ', diff
write(*,*) 'Maximum difference between scalar and elemental evaluations: ', diff3
write(*,*) 'Time taken for scalar evaluations: ', t_end1 - t_start1
write(*,*) 'Time taken for vectorized evaluations: ', t_start2 - t_end2
write(*,*) 'Time taken for elemental evaluations: ', t_start3 - t_end3
end program main
The output of this program is:
Maximum difference between scalar and vectorized evaluations:
0.000000000000000E+000
Maximum difference between scalar and elemental evaluations:
10000000000.0000
Press any key to continue . . .
Update
I updated this MWE fixing a bug, replaced the if condition with merge in the second function (thanks @zaikunzhang) and added the timing.
