Introducing yet another way to do sums I tried using the intrinsic sum
in higher precision in this program, where the first 3 methods are copied from previous contributors but p2sum
was my own.
program sums
use, intrinsic :: iso_fortran_env, only: p1=>real32, & ! use real32 or real64
compiler_version, compiler_options
implicit none
integer, parameter :: n=10**8, p2 = selected_real_kind(precision(1._p1)+1)
real :: start,finish
real(p1) :: s, x(n)
character(*), parameter :: cfmt = '(a/a)', sfmt(2) = &
['(a,es16.07,1x,z0,1x,g0.3,a)','(a,es23.15,1x,z0,1x,g0.3,a)']
integer :: whichp = merge(1,2,kind(x) == kind(1.0)), whichx
write(*,cfmt) 'Compiler_version:',compiler_version()
write(*,cfmt) 'Compiler_options:',compiler_options()
call random_number(x)
x = x * 0.5_p1 + 0.5_p1 ! values between 0.5 and 1.0 have the same exponent.
do whichx = 1,2
if(whichx==2) x = 256 * (x - 0.75_p1) ! mixed signs and exponents
write(*,*) merge('0.5 <= x <= 1.0','-64 <= x <= +64',whichx==1)
call cpu_time(start)
s = sum(x)
call cpu_time(finish)
write(*,sfmt(whichp)) 'intrinsic p1:', s, s, finish-start, ' sec'
call cpu_time(start)
s = e_sum(x)
call cpu_time(finish)
write(*,sfmt(whichp)) 'extended p2:', s, s, finish-start, ' sec'
call cpu_time(start)
s = r_sum(x)
call cpu_time(finish)
write(*,sfmt(whichp)) 'recursive p1:', s, s, finish-start, ' sec'
call cpu_time(start)
s = p2sum(x)
call cpu_time(finish)
write(*,sfmt(whichp)) 'intrinsic p2:', s, s, finish-start, ' sec'
end do
contains
function e_sum(x) ! extended precision summation.
implicit none
real(p1) :: e_sum
real(p1), intent(in) :: x(:)
integer :: i
real(p2) :: temp
temp = 0.0_p2
do i = 1, size(x)
temp = temp + real( x(i), p2 )
enddo
e_sum = real(temp,p1)
return
end function e_sum
function r_sum(x) ! recursive summation.
implicit none
real(p1) :: r_sum
real(p1), intent(in) :: x(:)
integer :: i, j, p
real(p1) :: psum( bit_size(1) - leadz(size(x)) ) ! max recursion depth.
p = 0
do i = 1, size(x)
p = p + 1
psum(p) = x(i)
do j = 1, trailz(i)
psum(p-1) = psum(p-1) + psum(p)
p = p - 1
enddo
enddo
do while (p > 1) ! cleanup loop. accumulate in reverse order.
psum(p-1) = psum(p-1) + psum(p)
p = p - 1
enddo
r_sum = psum(1)
return
end function r_sum
function p2sum(x) ! intrinsic higher precision sum
real(p1) p2sum
real(p1),intent(in):: x(:)
p2sum = real(sum(real(x,p2)),p1)
end function p2sum
end program sums
The output from gfortran was
Compiler_version:
GCC version 12.1.0
Compiler_options:
-mtune=generic -march=x86-64 -g -Og -Wall -Wuninitialized -Wconversion -Wsurprising -Wcharacter-truncation -Wpedantic -Wno-unused-dummy-argument -fmax-errors=10 -fcheck=all -ffpe-trap=invalid,zero,overflow -fbacktrace -fpre-include=/usr/include/finclude/math-vector-fortran.h
0.5 <= x <= 1.0
intrinsic p1: 1.6777216E+07 4B800000 0.105 sec
extended p2: 7.4999016E+07 4C8F0C9D 0.156 sec
recursive p1: 7.4999016E+07 4C8F0C9D 0.362 sec
intrinsic p2: 7.4999016E+07 4C8F0C9D 0.106 sec
-64 <= x <= +64
intrinsic p1: -2.5215947E+05 C8763FDE 0.104 sec
extended p2: -2.5201980E+05 C8761CF3 0.155 sec
recursive p1: -2.5201975E+05 C8761CF0 0.362 sec
intrinsic p2: -2.5201980E+05 C8761CF3 0.106 sec
Ifort gave
Compiler_version:
Intel(R) Fortran Intel(R) 64 Compiler Classic for applications running on Intel(R) 64, Version 2021.9.0 Build 20230302_000000
Compiler_options:
-L/opt/intel//oneapi/compiler/latest/lib/pkgconfig/../..//linux/compiler/lib/intel64/ -L/lib/gcc/x86_64-linux-gnu/12 -lgcc -lgcc_s -liomp5 -assume protect_parens -check all -fmath-errno -ftrapuv -traceback -warn interface -standard-semantics -warn nostderrors -check noarg_temp_created -O0 -g -o sums_p1p2.f90oi
0.5 <= x <= 1.0
intrinsic p1: 1.6777216E+07 4B800000 .229 sec
extended p2: 7.4998536E+07 4C8F0C61 .321 sec
recursive p1: 7.4998536E+07 4C8F0C61 1.12 sec
intrinsic p2: 7.4998536E+07 4C8F0C61 .231 sec
-64 <= x <= +64
intrinsic p1: -3.7506603E+05 C8B72341 .228 sec
extended p2: -3.7500778E+05 C8B71BF9 .321 sec
recursive p1: -3.7500778E+05 C8B71BF9 1.12 sec
intrinsic p2: -3.7500778E+05 C8B71BF9 .232 sec
Why did both compilers give exactly 2.0**24 for the first result? I was surprised that gfortran was noticeably faster than ifort, but I was trying for good bughunting rather than high speed with both.