@RonShepard
I have now included a modified j_sum as je_sum, that uses e_sum for sums < 65.
I also introduced a modified test order, as this can influence some large memory tests, but not so much help here for e_sum (Array x is 400MBytes ?).
We possibly need a random vector of type 3, with greater exponent range for x, say x = 10**(10*x), to better demonstrate the error response or overflow in the accumulator. ( edit: now included )
The results of your e_sum test vs je_sum > e_sum are surprisingly different for elapsed time. This is the type of results I attribute to better L1 cache usage/variability, but without any definate knowledge.
Would be pleased to get any comments / suggestions
Anyway, I have done these tests on Ryzen 5900X, Win 10 and Gfortran 11.1.0, and included alternative compile options in the lists.
program sums
!   use, intrinsic :: iso_fortran_env, only: dp=>real64
   use iso_fortran_env, only: dp=>real64
   implicit none
   integer, parameter :: n=10**8
   integer :: itype, k, order(6) = [ 1, 5, 6, 2, 3, 4 ]
   real :: x(n), s1, s2, s3, s4, s5, s6
   real(dp) :: cpu0, cpu1
   character(*), parameter :: sfmt = '(a,es15.7,1x,z8,f11.6," seconds")'
   write(*,'("Vector Size n = ",i0)') n
   do itype = 1, 3
      write(*,'("Random vector of type ",i0)') itype
      select case (itype)
       case (1)
         call random_number(x)
         x = x * 0.5 + 0.5    ! values between .5 and 1.0 have the same exponent.
       case (2)
         call random_number(x)
         x = 256 * (x - 0.5)  ! mixed signs and exponents.
       case (3)
         call random_number(x)
         x = 10 ** (10*x)      ! greater exponent range
      end select
     do k = 1, 6
      select case ( order(k) )
       case (1)
          cpu0 = delta_sec ()
        s1 = sum(x)
          cpu0 = delta_sec ()
        write(*,sfmt) 'intrinsic:', s1, transfer(s1,1), cpu0
  
       case (2)
          cpu0 = delta_sec ()
        s2 = e_sum(x)
          cpu0 = delta_sec ()
        write(*,sfmt) 'extended :', s2, transfer(s2,1), cpu0
  
       case (3)
          cpu0 = delta_sec ()
        s3 = r_sum(x)
          cpu0 = delta_sec ()
        write(*,sfmt) 'recursive:', s3, transfer(s3,1), cpu0
  
       case (4)
          cpu0 = delta_sec ()
        s4 = p_sum(x)
          cpu0 = delta_sec ()
        write(*,sfmt) 'p_sum    :', s4, transfer(s4,1), cpu0
  
       case (5)
          cpu0 = delta_sec ()
        s5 = j_sum(x)
          cpu0 = delta_sec ()
        write(*,sfmt) 'j_sum    :', s5, transfer(s5,1), cpu0
  
       case (6)
          cpu0 = delta_sec ()
        s6 = je_sum(x)
          cpu0 = delta_sec ()
        write(*,sfmt) 'je_sum   :', s6, transfer(s6,1), cpu0
      end select
     end do
   end do
contains
   pure function e_sum(x)
      ! extended precision summation.
      implicit none
      real :: e_sum
      real, intent(in) :: x(:)
      integer :: i
      real(dp) :: temp
      temp = 0.0_dp
      do i = 1, size(x)
         temp = temp + real( x(i), dp )
      enddo
      e_sum = (temp)
      return
   end function e_sum
   function r_sum(x)
      ! recursive summation with manual stack.
      implicit none
      real :: r_sum
      real, intent(in) :: x(:)
      integer :: i, j, p
      real :: psum( bit_size(1) - leadz(size(x)) )  ! max reccursion depth.
      p = 0
      do i = 1, size(x)
         p = p + 1
         psum(p) = x(i)
         do j = 1, trailz(i)
            psum(p-1) = psum(p-1) + psum(p)
            p = p - 1
         enddo
      enddo
      do while (p > 1)  ! cleanup loop. accumulate in reverse order.
         psum(p-1) = psum(p-1) + psum(p)
         p = p - 1
      enddo
      r_sum = psum(1)
      return
   end function r_sum
   pure recursive function p_sum(x) result(r)
      ! recursive summation with function calls.
      implicit none
      real :: r
      real, intent(in) :: x(:)
      integer :: n
      n = size(x)
      select case (n)
      case (:0)
         r = 0.0
      case (1)
         r = x(1)
      case (2)
         r = x(1) + x(2)
      case (3:)
         r = p_sum( x(1:n/2) ) + p_sum( x(n/2+1:n) )
      end select
      return
   end function p_sum
   pure recursive function j_sum(x) result(r)
      ! recursive summation with function calls.
      implicit none
      real :: r
      real, intent(in) :: x(:)
      integer :: n
      integer, parameter :: min_n = 65 ! 64 ! 16
      n = size(x)
      if ( n < min_n ) then
         r = sum (x)
      else
         r = j_sum ( x(1:n/2) ) + j_sum ( x(n/2+1:n) )
      end if
      return
   end function j_sum
   pure recursive function je_sum(x) result(r)
   
      ! recursive summation with extended precision function calls.
      implicit none
      real :: r
      real, intent(in) :: x(:)
      integer :: n
      integer, parameter :: min_n = 65 ! 128 ! 64 ! 16
      n = size(x)
      if ( n < min_n ) then
!         r = sum (x)
         r = e_sum (x)
      else
         r = je_sum ( x(1:n/2) ) + je_sum ( x(n/2+1:n) )
      end if
      return
   end function je_sum
   function delta_sec ()
      ! high precision timer.
      implicit none
      real(dp) :: delta_sec
      integer*8 :: tick, last=-1, rate=-1
      if ( rate < 0 ) call system_clock ( last, rate )
      call system_clock (tick)
      delta_sec = dble (tick-last) / dble (rate)
      last      = tick
   end function delta_sec
end program sums
set basic=-v -fimplicit-none -fallow-argument-mismatch -O2 -march=native -ffast-math
set omp=-fopenmp -fstack-arrays
set basic=-v -fimplicit-none -fallow-argument-mismatch -O -march=native
set basic=-v -fimplicit-none -fallow-argument-mismatch -O2 -march=native -ffast-math
set basic=-v -fimplicit-none -fallow-argument-mismatch -O1
gfortran %1.f90   %basic%  -o %1.exe  >> %1.tce  2>&1
%1 >> %1.tce
type %1.tce
COLLECT_GCC_OPTIONS='-v' '-fimplicit-none' '-fallow-argument-mismatch' '-O' '-march=native' '-o' 'ron_sum2.exe' '-dumpdir' 'ron_sum2.'
Vector Size n = 100000000
Random vector of type 1
intrinsic:  1.6777216E+07 4B800000   0.061096 seconds
extended :  7.4997416E+07 4C8F0BD5   0.061107 seconds
recursive:  7.4997416E+07 4C8F0BD5   0.120826 seconds
p_sum    :  7.4997416E+07 4C8F0BD5   0.327250 seconds
j_sum    :  7.4997416E+07 4C8F0BD5   0.050302 seconds
je_sum   :  7.4997416E+07 4C8F0BD5   0.052829 seconds
Random vector of type 2
intrinsic: -1.4523291E+05 C80DD43A   0.061420 seconds
extended : -1.4519922E+05 C80DCBCE   0.061530 seconds
recursive: -1.4519938E+05 C80DCBD8   0.118981 seconds
p_sum    : -1.4519919E+05 C80DCBCC   0.326990 seconds
j_sum    : -1.4519934E+05 C80DCBD6   0.050335 seconds
je_sum   : -1.4519922E+05 C80DCBCE   0.050940 seconds
COLLECT_GCC_OPTIONS='-v' '-fimplicit-none' '-fallow-argument-mismatch' '-O' '-march=native' '-ffast-math' '-o' 'ron_sum2.exe' '-dumpdir' 'ron_sum2.'
Vector Size n = 100000000
Random vector of type 1
intrinsic:  1.6777216E+07 4B800000   0.061198 seconds
extended :  7.4998288E+07 4C8F0C42   0.061058 seconds
recursive:  7.4998288E+07 4C8F0C42   0.118410 seconds
p_sum    :  7.4998288E+07 4C8F0C42   0.327628 seconds
j_sum    :  7.4998288E+07 4C8F0C42   0.050230 seconds
je_sum   :  7.4998288E+07 4C8F0C42   0.051019 seconds
Random vector of type 2
intrinsic:  9.2840462E+05 4962A94A   0.061880 seconds
extended :  9.2844181E+05 4962AB9D   0.061782 seconds
recursive:  9.2844188E+05 4962AB9E   0.119847 seconds
p_sum    :  9.2844188E+05 4962AB9E   0.327099 seconds
j_sum    :  9.2844188E+05 4962AB9E   0.050386 seconds
je_sum   :  9.2844188E+05 4962AB9E   0.050888 seconds
COLLECT_GCC_OPTIONS='-v' '-fimplicit-none' '-fallow-argument-mismatch' '-O2' '-march=native' '-ffast-math' '-o' 'ron_sum2.exe' '-dumpdir' 'ron_sum2.'
Vector Size n = 100000000
Random vector of type 1
intrinsic:  1.6777216E+07 4B800000   0.061357 seconds
extended :  7.4998368E+07 4C8F0C4C   0.061024 seconds
recursive:  7.4998368E+07 4C8F0C4C   0.092400 seconds
p_sum    :  7.4998368E+07 4C8F0C4C   0.232685 seconds
j_sum    :  7.4998368E+07 4C8F0C4C   0.042841 seconds
je_sum   :  7.4998368E+07 4C8F0C4C   0.042998 seconds
Random vector of type 2
intrinsic: -2.9935404E+04 C6E9DECF   0.063704 seconds
extended : -2.9868031E+04 C6E95810   0.063095 seconds
recursive: -2.9868047E+04 C6E95818   0.092844 seconds
p_sum    : -2.9868062E+04 C6E95820   0.233533 seconds
j_sum    : -2.9868000E+04 C6E95800   0.042905 seconds
je_sum   : -2.9868203E+04 C6E95868   0.043013 seconds
COLLECT_GCC_OPTIONS='-v' '-fimplicit-none' '-fallow-argument-mismatch' '-O1' '-o' 'ron_sum2.exe' '-mtune=generic' '-march=x86-64' '-dumpdir' 'ron_sum2.'
Vector Size n = 100000000
Random vector of type 1
intrinsic:  1.6777216E+07 4B800000   0.061273 seconds
extended :  7.5000264E+07 4C8F0D39   0.061080 seconds
recursive:  7.5000264E+07 4C8F0D39   0.125566 seconds
p_sum    :  7.5000264E+07 4C8F0D39   0.327155 seconds
j_sum    :  7.5000264E+07 4C8F0D39   0.049916 seconds
je_sum   :  7.5000264E+07 4C8F0D39   0.051608 seconds
Random vector of type 2
intrinsic:  9.8548633E+04 47C07A51   0.061245 seconds
extended :  9.8605258E+04 47C096A1   0.061045 seconds
recursive:  9.8605281E+04 47C096A4   0.125928 seconds
p_sum    :  9.8605109E+04 47C0968E   0.328060 seconds
j_sum    :  9.8605062E+04 47C09688   0.049556 seconds
je_sum   :  9.8605203E+04 47C0969A   0.051334 seconds
{{ Revised order }}
COLLECT_GCC_OPTIONS='-v' '-fimplicit-none' '-fallow-argument-mismatch' '-O1' '-o' 'ron_sum2.exe' '-mtune=generic' '-march=x86-64' '-dumpdir' 'ron_sum2.'
Vector Size n = 100000000
Random vector of type 1
intrinsic:  1.6777216E+07 4B800000   0.061068 seconds
j_sum    :  7.4997688E+07 4C8F0BF7   0.049448 seconds
je_sum   :  7.4997688E+07 4C8F0BF7   0.051313 seconds
extended :  7.4997688E+07 4C8F0BF7   0.061019 seconds
recursive:  7.4997688E+07 4C8F0BF7   0.126306 seconds
p_sum    :  7.4997688E+07 4C8F0BF7   0.330779 seconds
Random vector of type 2
intrinsic: -6.9337450E+05 C92947E8   0.061718 seconds
j_sum    : -6.9337188E+05 C92947BE   0.049567 seconds
je_sum   : -6.9337169E+05 C92947BB   0.051128 seconds
extended : -6.9337188E+05 C92947BE   0.061709 seconds
recursive: -6.9337188E+05 C92947BE   0.127979 seconds
p_sum    : -6.9337188E+05 C92947BE   0.325464 seconds
COLLECT_GCC_OPTIONS='-v' '-fimplicit-none' '-fallow-argument-mismatch' '-O1' '-o' 'ron_sum2.exe' '-mtune=generic' '-march=x86-64' '-dumpdir' 'ron_sum2.'
Vector Size n = 100000000
Random vector of type 1
intrinsic:  1.6777216E+07 4B800000   0.061153 seconds
j_sum    :  7.4999704E+07 4C8F0CF3   0.049519 seconds
je_sum   :  7.4999704E+07 4C8F0CF3   0.050971 seconds
extended :  7.4999704E+07 4C8F0CF3   0.061078 seconds
recursive:  7.4999704E+07 4C8F0CF3   0.126844 seconds
p_sum    :  7.4999704E+07 4C8F0CF3   0.331213 seconds
Random vector of type 2
intrinsic: -3.0828716E+05 C89687E5   0.062152 seconds
j_sum    : -3.0824225E+05 C8968248   0.049818 seconds
je_sum   : -3.0824238E+05 C896824C   0.051187 seconds
extended : -3.0824219E+05 C8968246   0.061058 seconds
recursive: -3.0824234E+05 C896824B   0.126645 seconds
p_sum    : -3.0824225E+05 C8968248   0.329529 seconds
COLLECT_GCC_OPTIONS='-v' '-fimplicit-none' '-fallow-argument-mismatch' '-O1' '-o' 'ron_sum2.exe' '-mtune=generic' '-march=x86-64' '-dumpdir' 'ron_sum2.'
Vector Size n = 100000000
Random vector of type 1
intrinsic:  1.6777216E+07 4B800000   0.061181 seconds
j_sum    :  7.4999504E+07 4C8F0CDA   0.049612 seconds
je_sum   :  7.4999504E+07 4C8F0CDA   0.051121 seconds
extended :  7.4999504E+07 4C8F0CDA   0.061106 seconds
recursive:  7.4999504E+07 4C8F0CDA   0.124811 seconds
p_sum    :  7.4999504E+07 4C8F0CDA   0.329647 seconds
Random vector of type 2
intrinsic: -6.9732494E+05 C92A3ECF   0.061310 seconds
j_sum    : -6.9740488E+05 C92A43CE   0.049676 seconds
je_sum   : -6.9740469E+05 C92A43CB   0.051098 seconds
extended : -6.9740481E+05 C92A43CD   0.061636 seconds
recursive: -6.9740494E+05 C92A43CF   0.125510 seconds
p_sum    : -6.9740488E+05 C92A43CE   0.327516 seconds
Random vector of type 3
intrinsic:  4.0709641E+16 5B10A134   0.062140 seconds
j_sum    :  4.3465584E+16 5B1A6BB8   0.049463 seconds
je_sum   :  4.3465584E+16 5B1A6BB8   0.050820 seconds
extended :  4.3465589E+16 5B1A6BB9   0.061240 seconds
recursive:  4.3465589E+16 5B1A6BB9   0.125593 seconds
p_sum    :  4.3465584E+16 5B1A6BB8   0.329287 seconds