Hi! I got really inspired by this thread and it helped me boost reading huge mesh files!! since this helped me so much I wanted to share my grain of salt in case it can be useful:
module str2real_m
implicit none
private
public :: str2real
integer, parameter :: wp = kind(1.0d0)
integer, parameter :: ikind = selected_int_kind(2)
integer(kind=ikind), parameter :: digit_0 = ichar('0')
integer(kind=ikind), parameter :: period = ichar('.') - digit_0
integer(kind=ikind), parameter :: minus_sign = ichar('-') - digit_0
real(wp), parameter :: whole_number_base(*) = &
[1d15, 1d14, 1d13, 1d12, 1d11, 1d10, 1d9, 1d8, &
1d7, 1d6, 1d5, 1d4, 1d3, 1d2, 1d1, 1d0]
real(wp), parameter :: fractional_base(*) = &
[1d-1, 1d-2, 1d-3, 1d-4, 1d-5, 1d-6, 1d-7, 1d-8, &
1d-9, 1d-10, 1d-11, 1d-12, 1d-13, 1d-14, 1d-15, 1d-16, &
1d-17, 1d-18, 1d-19, 1d-20, 1d-21, 1d-22, 1d-23, 1d-24]
real(wp), parameter :: period_skip = 0d0
real(wp), parameter :: base(*) = &
[whole_number_base, period_skip, fractional_base]
real(wp), parameter :: expbase(*) = [whole_number_base, fractional_base]
contains
elemental function str2real(s) result(r)
character(*), intent(in) :: s
real(wp) :: r
real(wp) :: r_coefficient
integer, parameter :: N = 32
character(N) :: factors_char
integer(kind=ikind) :: factors(N)
integer(kind=ikind) :: mask(N)
integer :: period_loc
integer :: exponent_loc
integer :: exponent_end
integer :: mask_from
integer :: mask_till
intrinsic :: findloc, scan
integer :: findloc, scan
integer(1) :: sig_coef, sig_exp
integer :: i_exponent
equivalence(factors, factors_char)
sig_coef = 1; sig_exp = 1
factors_char = s
factors = factors - digit_0
period_loc = findloc(factors, period, 1)
exponent_loc = scan(s, 'eE', back=.true.)
if(exponent_loc == 0) exponent_loc = len(s) + 1
if(period_loc == 0) period_loc = exponent_loc
where (0 <= factors .and. factors <= 9)
mask = 1
elsewhere
mask = 0
end where
if(factors(exponent_loc+1) == minus_sign) sig_exp = -1
if(factors(1)== minus_sign) sig_coef = -1
mask_from = 18 - period_loc
mask_till = mask_from + exponent_loc - 2
r_coefficient = sum( &
factors(:exponent_loc - 1) * &
base(mask_from:mask_till) * &
mask(:exponent_loc - 1))
i_exponent = str2int( s(exponent_loc+1:len(s)) )
r = (sig_coef*r_coefficient) * expbase(16-sig_exp*i_exponent)
end function
elemental function str2int(s) result(int)
character(*), intent(in) :: s
integer :: int
integer :: i, val
int = 0
do i = 1, len(s)
val = iachar(s(i:i))-digit_0
if( val >= 0 .and. val <= 9 ) int = int*10 + val
end do
end function
end module str2real_m
There are a few changes with different purposes:
-
in order to avoid the problem with white spaces or line endings (CR LF) being mixed in the string I replaced reading the exponent with the str2int function (this was also very useful for reading large arrays of integers).
-
In order to accelerate a bit more the computation:
2.1) Instead of computing the exponential as a sum I took directly the table of bases and constructed the table expbase(*) = [whole_number_base, fractional_base] which gives the value to multiply the coefficient.
2.2) I stored the signs for the coefficient and exponential in two integer(1) values.
With these changes:
I used the test in the GitHub and added the following check:
call check(" 2.5647869e-003 "//char(13)//char(10))
There is one white space before and after the string plus CRLF at the end! this passes without problem
I also ran the tests by @tqviet ( working in Windows11 / Intel 19 / -O3 )
Before the changes I had for the Block 4: time consumed = 0.086 (both lines)
And after: time consumed = 0.073 for the loop, and 0.07 for the array
Results
--------------------------------------------------------------------------------
BLOCK 1: formatted read toward string to double
Read: time consumed = 0.6290 seconds
A = Max |rval(:)-rref(:)| = 5.551E-17, B = epsilon(1.0d0) = 2.220E-16, B-A = 1.665E-16
--------------------------------------------------------------------------------
BLOCK 2: C st_to_r8
Read: time consumed = 0.2350 seconds
Read: time consumed = 0.2320 seconds (array)
A = Max |rval(:)-rref(:)| = 5.551E-17, B = epsilon(1.0d0) = 2.220E-16, B-A = 1.665E-16
--------------------------------------------------------------------------------
BLOCK 3: F st_to_r8
Read: time consumed = 0.1130 seconds
Read: time consumed = 0.1140 seconds (array)
A = Max |rval(:)-rref(:)| = 2.220E-16, B = epsilon(1.0d0) = 2.220E-16, B-A = 0.000E+00
--------------------------------------------------------------------------------
BLOCK 4: F str2real
Read: time consumed = 0.0730 seconds
Read: time consumed = 0.0700 seconds (array)
A = Max |rval(:)-rref(:)| = 3.331E-16, B = epsilon(1.0d0) = 2.220E-16, B-A = -1.110E-16
*** WARNING: A > B, A/B = 1.500E+00
==============================================================
UPDATE
I tried a “simplification”:
elemental function str2real(s) result(r)
character(*), intent(in) :: s
real(wp) :: r
real(wp) :: r_fraction
integer, parameter :: N = 32
character(N) :: factors_char
integer(kind=ikind) :: factors(N)
integer :: period_loc
integer :: exponent_loc
intrinsic :: findloc, scan
integer :: findloc, scan
integer(1) :: sig_coef, sig_exp
integer :: i_exponent, i, val
equivalence(factors, factors_char)
sig_coef = 1; sig_exp = 1
factors_char = s
factors = factors - digit_0
period_loc = findloc(factors, period, 1)
exponent_loc = scan(s, 'eE', back=.true.)
if(exponent_loc == 0) exponent_loc = len(s) + 1
if(period_loc == 0) period_loc = exponent_loc
if(factors(exponent_loc+1) == minus_sign) sig_exp = -1
if(factors(1)== minus_sign) sig_coef = -1
val = str2int( s(1:period_loc-1) )
r = val
do i = period_loc+1, exponent_loc-1
val = factors(i)
if( val >= 0 .and. val <= 9 ) r = r + val*fractional_base(i-period_loc)
end do
r = sig_coef*r
i_exponent = str2int( s(exponent_loc+1:len(s)) )
if(i_exponent.ne.0) r = r * expbase(16-sig_exp*i_exponent)
end function
By computing the whole number part with the str2int function and then iterating on the fractional part plus avoiding multiplying by the exponential coefficient if not needed it brough my times down from 0.07s to 0.052s
Results
--------------------------------------------------------------------------------
BLOCK 1: formatted read toward string to double
Read: time consumed = 0.6270 seconds
A = Max |rval(:)-rref(:)| = 5.551E-17, B = epsilon(1.0d0) = 2.220E-16, B-A = 1.665E-16
--------------------------------------------------------------------------------
BLOCK 2: C st_to_r8
Read: time consumed = 0.2370 seconds
Read: time consumed = 0.2320 seconds (array)
A = Max |rval(:)-rref(:)| = 5.551E-17, B = epsilon(1.0d0) = 2.220E-16, B-A = 1.665E-16
--------------------------------------------------------------------------------
BLOCK 3: F st_to_r8
Read: time consumed = 0.1390 seconds
Read: time consumed = 0.1360 seconds (array)
A = Max |rval(:)-rref(:)| = 2.220E-16, B = epsilon(1.0d0) = 2.220E-16, B-A = 0.000E+00
--------------------------------------------------------------------------------
BLOCK 4: F str2real
Read: time consumed = 0.0520 seconds
Read: time consumed = 0.0520 seconds (array)
A = Max |rval(:)-rref(:)| = 5.551E-16, B = epsilon(1.0d0) = 2.220E-16, B-A = -3.331E-16
*** WARNING: A > B, A/B = 2.500E+00
==============================================================
UPDATE 2
Just the change w.r.t to previous update of the last lines of the str2real function:
val = str2int( s(1:period_loc-1) )
r = sum( factors(period_loc+1:exponent_loc-1)*fractional_base(1:exponent_loc-period_loc-1) )
r = sig_coef*(r+val)
i_exponent = str2int( s(exponent_loc+1:len(s)) )
if(i_exponent.ne.0) r = r * expbase(16-sig_exp*i_exponent)
Results
--------------------------------------------------------------------------------
BLOCK 1: formatted read toward string to double
Read: time consumed = 0.6390 seconds
A = Max |rval(:)-rref(:)| = 5.551E-17, B = epsilon(1.0d0) = 2.220E-16, B-A = 1.665E-16
--------------------------------------------------------------------------------
BLOCK 2: C st_to_r8
Read: time consumed = 0.2370 seconds
Read: time consumed = 0.2350 seconds (array)
A = Max |rval(:)-rref(:)| = 5.551E-17, B = epsilon(1.0d0) = 2.220E-16, B-A = 1.665E-16
--------------------------------------------------------------------------------
BLOCK 3: F st_to_r8
Read: time consumed = 0.1160 seconds
Read: time consumed = 0.1140 seconds (array)
A = Max |rval(:)-rref(:)| = 2.220E-16, B = epsilon(1.0d0) = 2.220E-16, B-A = 0.000E+00
--------------------------------------------------------------------------------
BLOCK 4: F str2real
Read: time consumed = 0.0480 seconds
Read: time consumed = 0.0480 seconds (array)
A = Max |rval(:)-rref(:)| = 2.220E-16, B = epsilon(1.0d0) = 2.220E-16, B-A = 0.000E+00
My time went down now to 0.048 and the error is now B-A=0
By the way @Carltoffel I think in line 41 of your test.f90 file the test should be:
if(abs(rel_err) > epsilon(1.0d0)) then
Otherwise I was getting a print with call check(“0.1234E0”), when both values were identical to the last decimal but “abs_err = str2real_out - formatted_read_out” was returning a value smaller than the precision