Improving Fortran Results in the Julia Micro-benchmarks

In addition to the topic of print_to_file, in the topic of matrix_statistics, I found that the scale function is used in that code (rnorm function):

DO
    CALL RANDOM_NUMBER( u )
    CALL RANDOM_NUMBER( v )
    u = SCALE( u, 1 ) - one
    v = SCALE( v, 1 ) - one
    sum = u*u + v*v + vsmall         ! vsmall added to prevent LOG(zero) / zero
    IF(sum < one) EXIT
  END DO
  sln = SQRT(- SCALE( LOG(sum), 1 ) / sum)
  fn_val = u*sln

As far as I know, simply using 2*u to replace the code like scale(u, 1) can be much (a bit) faster:

DO
    CALL RANDOM_NUMBER( u )
    CALL RANDOM_NUMBER( v )
    u = 2*u - one
    v = 2*v - one
    sum = u*u + v*v + vsmall         ! vsmall added to prevent LOG(zero) / zero
    IF(sum < one) EXIT
  END DO
  sln = SQRT(- 2*LOG(sum) / sum)
  fn_val = u*sln

Simple example (Incorrect):

Summary
program main

    real(8) :: t1, t2, x, y
    integer, parameter :: N = 100000000

    print *, "x = ", x, "; y = ", y

    call cpu_time(t1)
    do i = 1, N
        x = 2*y + i
    end do
    call cpu_time(t2)
    print *, "time for N = ", N, ", x = 2*y        : ", t2 - t1, " s; x = ", x

    call cpu_time(t1)
    do i = 1, N
        x = scale(y, 1) + i
    end do
    call cpu_time(t2)
    print *, "time for N = ", N, ", x = scale(y, 1): ", t2 - t1, " s; x = ", x

end program main

Results:
(Compiler optimization options are not used, because with optimization, Fortran runs too fast, and the running time is 0)

# On WSL2-Debian gfortran 10.3
>> gfortran main.f90 && ./a.out
 x =    9.8813129168249309E-324 ; y =    2.0000000000000000     
 time for N =    100000000 , x = 2*y        :   0.16183899999999998       s; x =    100000004.00000000     
 time for N =    100000000 , x = scale(y, 1):   0.72182100000000005       s; x =    100000004.00000000
# On MSYS2-ucrt64-gfortran 12.1
>> gfortran main.f90 && ./a
 x =    6.9530460433855700E-310 ; y =    2.0000000000000000     
 time for N =    100000000 , x = 2*y        :   0.15625000000000000       s; x =    100000004.00000000     
 time for N =    100000000 , x = scale(y, 1):   0.60937500000000000       s; x =    100000004.00000000  
# Windows10-ifort 2021.6.0
>> ifort main.f90 && ./main
 x =   0.000000000000000E+000 ; y =    2.00000000000000     
 time for N =    100000000 , x = 2*y        :   0.000000000000000E+000  s; x =
   100000004.000000
 time for N =    100000000 , x = scale(y, 1):   3.125000000000000E-002  s; x = 
   100000004.000000

Modified example:

program main

    real(8) :: t1, t2, x, y = 2.0
    integer, parameter :: N = 100000000

    call cpu_time(t1)
    do i = 1, N
        call random_number(y)
        x = 2*y
    end do
    call cpu_time(t2)
    print *, "time for N = ", N, ", x = 2*y        : ", t2 - t1, " s; x = ", x

    call cpu_time(t1)
    do i = 1, N
        call random_number(y)
        x = scale(y, 1)
    end do
    call cpu_time(t2)
    print *, "time for N = ", N, ", x = scale(y, 1): ", t2 - t1, " s; x = ", x

end program main

Results:

# WSL2-Debian gfortran 10.3
>> gfortran -Ofast main.f90 && ./a
 time for N =    100000000 , x = 2*y        :   0.78022399999999992       s; x =    1.1880948469991348     
 time for N =    100000000 , x = scale(y, 1):    1.4326160000000001       s; x =    1.6401134682038268
# MSYS2-ucrt64-gfortran 12.1
>> gfortran -Ofast main.f90 && ./a
 time for N =    100000000 , x = 2*y        :    2.5156250000000000       s; x =    1.1728265567290517     
 time for N =    100000000 , x = scale(y, 1):    3.0625000000000000       s; x =    1.5269402525788756
# On Windows-ifort 2021.6.0
>> ifort /fast main.f90 && ./main
 time for N =    100000000 , x = 2*y        :   0.437500000000000       s; x = 
   1.03704569309432
 time for N =    100000000 , x = scale(y, 1):   0.625000000000000       s; x = 
   1.14890448267426 
1 Like