Introducing yet another way to do sums I tried using the intrinsic sum in higher precision in this program, where the first 3 methods are copied from previous contributors but p2sum was my own.
program sums
  use, intrinsic :: iso_fortran_env, only: p1=>real32, & ! use real32 or real64
       compiler_version, compiler_options
  implicit none
  integer, parameter :: n=10**8, p2 = selected_real_kind(precision(1._p1)+1)
  real :: start,finish
  real(p1) :: s, x(n)
  character(*), parameter :: cfmt = '(a/a)', sfmt(2) = &
       ['(a,es16.07,1x,z0,1x,g0.3,a)','(a,es23.15,1x,z0,1x,g0.3,a)']
  integer :: whichp = merge(1,2,kind(x) == kind(1.0)), whichx
  write(*,cfmt) 'Compiler_version:',compiler_version()
  write(*,cfmt) 'Compiler_options:',compiler_options()
  call random_number(x)
  x = x * 0.5_p1 + 0.5_p1 ! values between 0.5 and 1.0 have the same exponent.
  do whichx = 1,2
     if(whichx==2) x = 256 * (x - 0.75_p1) ! mixed signs and exponents
     write(*,*) merge('0.5 <= x <= 1.0','-64 <= x <= +64',whichx==1)
     call cpu_time(start)
     s = sum(x)
     call cpu_time(finish)
     write(*,sfmt(whichp)) 'intrinsic p1:', s, s, finish-start, ' sec'
     call cpu_time(start)
     s = e_sum(x)
     call cpu_time(finish)
     write(*,sfmt(whichp)) 'extended  p2:', s, s, finish-start, ' sec'
     call cpu_time(start)
     s = r_sum(x)
     call cpu_time(finish)
     write(*,sfmt(whichp)) 'recursive p1:', s, s, finish-start, ' sec'
     call cpu_time(start)
     s = p2sum(x)
     call cpu_time(finish)
     write(*,sfmt(whichp)) 'intrinsic p2:', s, s, finish-start, ' sec'
  end do
contains
  function e_sum(x) ! extended precision summation.
    implicit none
    real(p1) :: e_sum
    real(p1), intent(in) :: x(:)
    integer :: i
    real(p2) :: temp
    temp = 0.0_p2
    do i = 1, size(x)
       temp = temp + real( x(i), p2 )
    enddo
    e_sum = real(temp,p1)
    return
  end function e_sum
  function r_sum(x) ! recursive summation.
    implicit none
    real(p1) :: r_sum
    real(p1), intent(in) :: x(:)
    integer :: i, j, p
    real(p1) :: psum( bit_size(1) - leadz(size(x)) )  ! max recursion depth.
    p = 0
    do i = 1, size(x)
       p = p + 1
       psum(p) = x(i)
       do j = 1, trailz(i)
          psum(p-1) = psum(p-1) + psum(p)
          p = p - 1
       enddo
    enddo
    do while (p > 1)  ! cleanup loop. accumulate in reverse order.
       psum(p-1) = psum(p-1) + psum(p)
       p = p - 1
    enddo
    r_sum = psum(1)
    return
  end function r_sum
  function p2sum(x) ! intrinsic higher precision sum
    real(p1) p2sum
    real(p1),intent(in):: x(:)
    p2sum = real(sum(real(x,p2)),p1)
  end function p2sum
    
  end program sums
The output from gfortran was
Compiler_version:
GCC version 12.1.0
Compiler_options:
-mtune=generic -march=x86-64 -g -Og -Wall -Wuninitialized -Wconversion -Wsurprising -Wcharacter-truncation -Wpedantic -Wno-unused-dummy-argument -fmax-errors=10 -fcheck=all -ffpe-trap=invalid,zero,overflow -fbacktrace -fpre-include=/usr/include/finclude/math-vector-fortran.h
 0.5 <= x <= 1.0
intrinsic p1:   1.6777216E+07 4B800000 0.105 sec
extended  p2:   7.4999016E+07 4C8F0C9D 0.156 sec
recursive p1:   7.4999016E+07 4C8F0C9D 0.362 sec
intrinsic p2:   7.4999016E+07 4C8F0C9D 0.106 sec
 -64 <= x <= +64
intrinsic p1:  -2.5215947E+05 C8763FDE 0.104 sec
extended  p2:  -2.5201980E+05 C8761CF3 0.155 sec
recursive p1:  -2.5201975E+05 C8761CF0 0.362 sec
intrinsic p2:  -2.5201980E+05 C8761CF3 0.106 sec
Ifort gave
Compiler_version:
Intel(R) Fortran Intel(R) 64 Compiler Classic for applications running on Intel(R) 64, Version 2021.9.0 Build 20230302_000000
Compiler_options:
-L/opt/intel//oneapi/compiler/latest/lib/pkgconfig/../..//linux/compiler/lib/intel64/ -L/lib/gcc/x86_64-linux-gnu/12 -lgcc -lgcc_s -liomp5 -assume protect_parens -check all -fmath-errno -ftrapuv -traceback -warn interface -standard-semantics -warn nostderrors -check noarg_temp_created -O0 -g -o sums_p1p2.f90oi
 0.5 <= x <= 1.0
intrinsic p1:   1.6777216E+07 4B800000 .229 sec
extended  p2:   7.4998536E+07 4C8F0C61 .321 sec
recursive p1:   7.4998536E+07 4C8F0C61 1.12 sec
intrinsic p2:   7.4998536E+07 4C8F0C61 .231 sec
 -64 <= x <= +64
intrinsic p1:  -3.7506603E+05 C8B72341 .228 sec
extended  p2:  -3.7500778E+05 C8B71BF9 .321 sec
recursive p1:  -3.7500778E+05 C8B71BF9 1.12 sec
intrinsic p2:  -3.7500778E+05 C8B71BF9 .232 sec
Why did both compilers give exactly 2.0**24 for the first result? I was surprised that gfortran was noticeably faster than ifort, but I was trying for good bughunting rather than high speed with both.