The reason why I am testing the fast-math library is because I would like to speed up a routine for computing the logsum in one of my projects (see here for some background info).
My original subroutine using the intrinsic exp
and log
subroutine sub_logsum(LogSum,Prob, V,sigma)
! DESCRIPTION
! Calculates the log-sum and choice-probabilities. The computation avoids
! overflows.
!See for more info:
! https://gregorygundersen.com/blog/2020/02/09/log-sum-exp/
! https://nhigham.com/2021/01/05/what-is-the-log-sum-exp-function/
! INPUTS
! V: Vector with values of the different choices
! sigma: Standard deviation of the taste shock
! OUTPUTS
! LogSum: Log sum of exponentials
! Prob: Choice probabilities
real(8), intent(in) :: V(:)
real(8), intent(in) :: sigma
real(8), intent(out) :: LogSum
real(8), intent(out) :: Prob(:)
! Local variables:
integer :: n, id
real(8) :: mxm, sum_prob
! Body of sub_logsum
n = size(V)
! Check inputs
if (n==1) then
call myerror("sub_logsum: Need at least two elements")
endif
if (n/=size(Prob)) then
call myerror("sub_logsum: V and Prob are not compatible")
endif
if (sigma<=0d0) then
call myerror("sub_logsum: sigma must be zero or positive")
endif
! Maximum over the discrete choice
id = maxloc(V,dim=1)
mxm = V(id)
! Logsum and probabilities
! a. numerically robust log-sum
LogSum = mxm + sigma*log(sum(exp((V-mxm)/sigma)))
! b. numerically robust probability
Prob = exp((V-LogSum)/sigma)
!-----------------------------------------------------------------!
! Check if outputs are correct. Comment out this section for speed
if (LogSum/=LogSum) then
call myerror("sub_logsum: LogSum is either NaN or Inf")
endif
if (any(Prob<0d0)) then
call myerror("sub_logsum: Some elements of Prob are negative")
endif
sum_prob = sum(Prob)
if (abs(sum_prob-1d0)>1d-7) then
write(*,*) "sum_prob = ", sum_prob
call myerror("sub_logsum: Prob does not sum to one")
endif
Prob = Prob/sum_prob
!-----------------------------------------------------------------!
end subroutine sub_logsum
And here is the new routine which uses @FedericoPerini’s library:
subroutine sub_logsum_f(LogSum,Prob, V,sigma)
! NOTE: sub_logsum_f is copied from sub_logsum and the only difference
! is that it uses the fast_exp and fast_log functions.
! DESCRIPTION: see sub_logsum for more information.
real(8), intent(in) :: V(:)
real(8), intent(in) :: sigma
real(8), intent(out) :: LogSum
real(8), intent(out) :: Prob(:)
! Local variables:
integer :: n, id
real(8) :: mxm, sum_prob
! Body of sub_logsum
n = size(V)
! Check inputs
if (n==1) then
call myerror("sub_logsum: Need at least two elements")
endif
if (n/=size(Prob)) then
call myerror("sub_logsum: V and Prob are not compatible")
endif
if (sigma<0d0) then
call myerror("sub_logsum: sigma must be zero or positive")
endif
! Maximum over the discrete choice
id = maxloc(V,dim=1)
mxm = V(id)
! Logsum and probabilities
! a. numerically robust log-sum
LogSum = mxm + sigma*flog(sum(fexp((V-mxm)/sigma)))
! b. numerically robust probability
Prob = fexp((V-LogSum)/sigma)
!-----------------------------------------------------------------!
! Check if outputs are correct. Comment out this section for speed
if (LogSum/=LogSum) then
call myerror("sub_logsum_f: LogSum is either NaN or Inf")
endif
if (any(Prob<0d0)) then
call myerror("sub_logsum_f: Some elements of Prob are negative")
endif
sum_prob = sum(Prob)
if (abs(sum_prob-1d0)>1d-5) then
write(*,*) "sum_prob = ", sum_prob
call myerror("sub_logsum_f: Prob does not sum to one")
endif
Prob = Prob/sum_prob
!-----------------------------------------------------------------!
end subroutine sub_logsum_f
This routine (sub_logsum or sub_logsum_f) is the real bottleneck of my code. Unfortunately, it seems that both on ifort and ifx using fast-math actually slows down a bit the code with respect to the version that relies on the intrinsic log and exp.
I wrote a test program to compare the performance of sub_logsum and sub_logsum_f
module mymod
use fast_exp
use fast_log
implicit none
contains
subroutine myerror(string)
implicit none
character(len=*), intent(in) :: string
write(*,*) "ERROR: ", string
write(*,*) "Program will terminate.."
pause
stop
end subroutine myerror
subroutine sub_logsum(LogSum,Prob, V,sigma)
! DESCRIPTION
! Calculates the log-sum and choice-probabilities. The computation avoids
! overflows.
!See for more info:
! https://gregorygundersen.com/blog/2020/02/09/log-sum-exp/
! https://nhigham.com/2021/01/05/what-is-the-log-sum-exp-function/
! INPUTS
! V: Vector with values of the different choices
! sigma: Standard deviation of the taste shock
! OUTPUTS
! LogSum: Log sum of exponentials
! Prob: Choice probabilities
real(8), intent(in) :: V(:)
real(8), intent(in) :: sigma
real(8), intent(out) :: LogSum
real(8), intent(out) :: Prob(:)
! Local variables:
integer :: n, id
real(8) :: mxm, sum_prob
! Body of sub_logsum
n = size(V)
! Check inputs
if (n==1) then
call myerror("sub_logsum: Need at least two elements")
endif
if (n/=size(Prob)) then
call myerror("sub_logsum: V and Prob are not compatible")
endif
if (sigma<=0d0) then
call myerror("sub_logsum: sigma must be positive")
endif
! Maximum over the discrete choice
id = maxloc(V,dim=1)
mxm = V(id)
! Logsum and probabilities
! a. numerically robust log-sum
LogSum = mxm + sigma*log(sum(exp((V-mxm)/sigma)))
! b. numerically robust probability
Prob = exp((V-LogSum)/sigma)
!-----------------------------------------------------------------!
! Check if outputs are correct. Comment out this section for speed
if (LogSum/=LogSum) then
call myerror("sub_logsum: LogSum is either NaN or Inf")
endif
if (any(Prob<0d0)) then
call myerror("sub_logsum: Some elements of Prob are negative")
endif
sum_prob = sum(Prob)
if (abs(sum_prob-1d0)>1d-7) then
write(*,*) "sum_prob = ", sum_prob
call myerror("sub_logsum: Prob does not sum to one")
endif
Prob = Prob/sum_prob
!-----------------------------------------------------------------!
end subroutine sub_logsum
subroutine sub_logsum_f(LogSum,Prob, V,sigma)
! NOTE: sub_logsum_f is copied from sub_logsum and the only difference
! is that it uses the fast_exp and fast_log functions.
! DESCRIPTION: see sub_logsum for more information.
real(8), intent(in) :: V(:)
real(8), intent(in) :: sigma
real(8), intent(out) :: LogSum
real(8), intent(out) :: Prob(:)
! Local variables:
integer :: n, id
real(8) :: mxm, sum_prob
! Body of sub_logsum
n = size(V)
! Check inputs
if (n==1) then
call myerror("sub_logsum: Need at least two elements")
endif
if (n/=size(Prob)) then
call myerror("sub_logsum: V and Prob are not compatible")
endif
if (sigma<=0d0) then
call myerror("sub_logsum: sigma must be positive")
endif
! Maximum over the discrete choice
id = maxloc(V,dim=1)
mxm = V(id)
! Logsum and probabilities
! a. numerically robust log-sum
LogSum = mxm + sigma*flog(sum(fexp((V-mxm)/sigma)))
! b. numerically robust probability
Prob = fexp((V-LogSum)/sigma)
!-----------------------------------------------------------------!
! Check if outputs are correct. Comment out this section for speed
if (LogSum/=LogSum) then
call myerror("sub_logsum_f: LogSum is either NaN or Inf")
endif
if (any(Prob<0d0)) then
call myerror("sub_logsum_f: Some elements of Prob are negative")
endif
sum_prob = sum(Prob)
if (abs(sum_prob-1d0)>1d-5) then
write(*,*) "sum_prob = ", sum_prob
call myerror("sub_logsum_f: Prob does not sum to one")
endif
Prob = Prob/sum_prob
!-----------------------------------------------------------------!
end subroutine sub_logsum_f
end module mymod
!===============================================================================!
program test_logsum_f
use mymod, only: sub_logsum, sub_logsum_f
implicit none
real(8), allocatable :: V(:), Prob(:)
real(8) :: LogSum, sigma, time, time_f, c_start, c_end
integer, parameter :: ntest = 20000000
integer :: i,ii
! Assign values
sigma = 0.1d0
V = [1d0, 2d0, 3d0, 4d0]
! Call sub_logsum
allocate(Prob(size(V)))
write(*,*) "Call sub_logsum"
call cpu_time(c_start)
do i=1,ntest
call sub_logsum(LogSum,Prob, V,sigma)
enddo
call cpu_time(c_end)
time = c_end-c_start
write(*,*) "time = ", time
write(*,'(X,A,F13.10)') "LogSum = ", LogSum
write(*,*) "Prob = "
do ii = 1,size(Prob)
write(*,'(F13.10)') Prob(ii)
enddo
deallocate(Prob)
write(*,*) "------------------------------------------"
write(*,*) "Call sub_logsum_f"
allocate(Prob(size(V)))
call cpu_time(c_start)
do i=1,ntest
call sub_logsum_f(LogSum,Prob, V,sigma)
enddo
call cpu_time(c_end)
time_f = c_end-c_start
write(*,*) "time_f = ", time_f
write(*,'(X,A,F13.10)') "LogSum = ", LogSum
write(*,*) "Prob = "
do ii = 1,size(Prob)
write(*,'(F13.10)') Prob(ii)
enddo
write(*,*) "------------------------------------------"
write(*,'(X,A)') "logsum with fast math vs logsum:"
write(*,'(X,A,F13.10)') "time_f/time = ", time_f/time
pause
end program test_logsum_f
After compiling with ifort and /O3 the results are
Call sub_logsum
time = 0.718750000000000
LogSum = 4.0000045401
Prob =
0.0000000000
0.0000000021
0.0000453979
0.9999546001
------------------------------------------
Call sub_logsum_f
time_f = 0.953125000000000
LogSum = 4.0000043780
Prob =
0.0000000000
0.0000000021
0.0000453979
0.9999546000
------------------------------------------
logsum with fast math vs logsum:
time_f/time = 1.3260869565
The results with ifx and /O3 are:
Call sub_logsum
time = 0.968750000000000
LogSum = 4.0000045401
Prob =
0.0000000000
0.0000000021
0.0000453979
0.9999546001
------------------------------------------
Call sub_logsum_f
time_f = 1.04687500000000
LogSum = 4.0000043780
Prob =
0.0000000000
0.0000000021
0.0000453979
0.9999546000
------------------------------------------
logsum with fast math vs logsum:
time_f/time = 1.0806451613
NOTE: To compile my test file, you have to include the fast-math modules fast_exp.f90
and fast_log.f90
available here.