# Speed of array intrinsics

The array intrinsic functions of Fortran are convenient, but it appears that if you are doing multiple calculations on the same array, it may be faster to write your own function. For the program

``````module min_max_mod
implicit none
integer, parameter :: wp = kind(1.0d0)
contains
pure function min_max(x) result(y)
real(kind=wp), intent(in) :: x(:)
real(kind=wp)             :: y(2)
integer                   :: i,n
n = size(x)
if (n < 1) return
y(1) = x(1)
y(2) = y(1)
do i=2,n
if (x(i) < y(1)) y(1) = x(i)
if (x(i) > y(2)) y(2) = x(i)
end do
end function min_max
!
pure function min_max_mean(x) result(y)
real(kind=wp), intent(in) :: x(:)
real(kind=wp)             :: y(3)
integer                   :: i,n
n = size(x)
if (n < 1) return
y(1)   = x(1)
y(2:3) = y(1)
do i=2,n
if (x(i) < y(1)) y(1) = x(i)
if (x(i) > y(2)) y(2) = x(i)
y(3) = y(3) + x(i)
end do
if (n > 0) y(3) = y(3)/n
end function min_max_mean
end module min_max_mod
!
program xmin_max
use min_max_mod
implicit none
integer :: i,n
integer, parameter :: nt = 5, nlen = 30
real(kind=wp), allocatable :: x(:)
real(kind=wp)              :: extremes(2,2),t(nt),xmin_max_mean(3,2)
character (len=nlen) :: labels(nt-1) = [character(len=nlen) :: &
"minval(x), maxval(x)","min_max(x)", &
"minval(x),maxval(x),sum(x)/n","min_max_mean(x)"]
character (len=*), parameter :: fmt_cr = "(a20,*(1x,f16.12))"
n = 10**8
allocate (x(n))
call random_number(x)
call cpu_time(t(1))
extremes(:,1) = [minval(x),maxval(x)]
call cpu_time(t(2))
extremes(:,2) = min_max(x)
call cpu_time(t(3))
xmin_max_mean(:,2) = [minval(x),maxval(x),sum(x)/n]
call cpu_time(t(4))
xmin_max_mean(:,1) = min_max_mean(x)
call cpu_time(t(5))
print fmt_cr,"min, max:",extremes(:,1)
print fmt_cr,"min, max:",extremes(:,2)
print fmt_cr,"min, max, mean:",xmin_max_mean(:,1)
print fmt_cr,"min, max, mean:",xmin_max_mean(:,2)
print "(f8.4,5x,a30)",(t(i+1)-t(i),trim(labels(i)),i=1,nt-1)
end program xmin_max
``````

the output for gfortran -O3 on WSL is

``````           min, max:   0.000000009614   0.999999996113
min, max:   0.000000009614   0.999999996113
min, max, mean:   0.000000009614   0.999999996113   0.499997321868
min, max, mean:   0.000000009614   0.999999996113   0.499997321868

0.2214               minval(x), maxval(x)
0.1194                         min_max(x)
0.3329       minval(x),maxval(x),sum(x)/n
0.2380                    min_max_mean(x)
``````

and for ifort -O3 on WSL it is

``````           min, max:   0.000000003725   0.999999968801
min, max:   0.000000003725   0.999999968801
min, max, mean:   0.000000003725   0.999999968801   0.499970722298
min, max, mean:   0.000000003725   0.999999968801   0.499970722298

0.1138               minval(x), maxval(x)
0.0624                         min_max(x)
0.1568       minval(x),maxval(x),sum(x)/n
0.0657                    min_max_mean(x)
``````
1 Like

If one wants the largest and smallest elements of an array of intrinsic type, the compilerâ€™s optimization quality is what counts. In your function min_max, 2(n-1) comparisons are required. If each element of the array is of a user-defined type composite variable, it may be worthwhile to reduce the number of comparisons. For example, the following algorithm gets by with 3.n/2 - 2 comparisons, when n is even.

1. Take the first two elements, compare their keys and use the results to initialize key_min and key_max.
2. Take the next pair, identify which of the two is larger. Compare the smaller to the previous min. Compare the larger to the previous max.
3. Repeat Step 2 until all pairs are processed.

If the size of the array, n, is odd, modify the algorithm to suit.

2 Likes

Thanks for the suggestion, but I am finding that algorithm to be slower. For the code

``````module min_max_mod
implicit none
integer, parameter :: wp = kind(1.0d0)
contains
pure function min_max(x) result(y)
real(kind=wp), intent(in) :: x(:)
real(kind=wp)             :: y(2)
integer                   :: i,n
n = size(x)
if (n < 1) return
y(1) = x(1)
y(2) = y(1)
do i=2,n
if (x(i) < y(1)) y(1) = x(i)
if (x(i) > y(2)) y(2) = x(i)
end do
end function min_max
!
pure function min_max_pairs(x) result(y)
real(kind=wp), intent(in) :: x(:)
real(kind=wp)             :: y(2)
integer                   :: i,n
logical                   :: odd
n = size(x)
if (n < 1) return
if (n == 1) then
y(1) = x(1)
y(2) = x(2)
return
end if
odd = mod(n,2) == 1
if (x(2) > x(1)) then
y(1) = x(1)
y(2) = x(2)
else
y(1) = x(2)
y(2) = x(1)
end if
do i=3,n-1,2
if (x(i+1) > x(i)) then
if (y(1) > x(i))   y(1) = x(i)
if (y(2) < x(i+1)) y(2) = x(i+1)
else
if (y(1) > x(i+1)) y(1) = x(i+1)
if (y(2) < x(i))   y(2) = x(i)
end if
end do
if (odd) then
y(1) = min(y(1),x(n))
y(2) = max(y(1),x(n))
end if
end function min_max_pairs
!
pure function min_max_local(x) result(y)
! same as min_max_pairs but sets
! xi to x(i) and xi1 to x(i+1)
real(kind=wp), intent(in) :: x(:)
real(kind=wp)             :: y(2)
real(kind=wp)             :: xi,xi1
integer                   :: i,n
logical                   :: odd
n = size(x)
if (n < 1) return
if (n == 1) then
y(1) = x(1)
y(2) = x(2)
return
end if
odd = mod(n,2) == 1
if (x(2) > x(1)) then
y(1) = x(1)
y(2) = x(2)
else
y(1) = x(2)
y(2) = x(1)
end if
do i=3,n-1,2
xi1 = x(i+1)
xi  = x(i)
if (xi1 > xi) then
if (y(1) > xi)   y(1) = xi
if (y(2) < xi1) y(2) = xi1
else
if (y(1) > xi1) y(1) = xi1
if (y(2) < xi)   y(2) = xi
end if
end do
if (odd) then
y(1) = min(y(1),x(n))
y(2) = max(y(1),x(n))
end if
end function min_max_local
!
pure function min_max_mean(x) result(y)
real(kind=wp), intent(in) :: x(:)
real(kind=wp)             :: y(3)
integer                   :: i,n
n = size(x)
if (n < 1) return
y(1)   = x(1)
y(2:3) = y(1)
do i=2,n
if (x(i) < y(1)) y(1) = x(i)
if (x(i) > y(2)) y(2) = x(i)
y(3) = y(3) + x(i)
end do
if (n > 0) y(3) = y(3)/n
end function min_max_mean
end module min_max_mod
!
program xmin_max
use min_max_mod
implicit none
integer :: i,n
integer, parameter :: nt = 7, nlen = 30, ncol_extremes = 4
integer                    :: icol
real(kind=wp), allocatable :: x(:)
real(kind=wp)              :: extremes(2,ncol_extremes),t(nt),xmin_max_mean(3,2)
character (len=nlen) :: labels(nt-1) = [character(len=nlen) :: &
"minval(x), maxval(x)","min_max(x)","min_max_pairs(x)", &
"min_max_local(x)","minval(x),maxval(x),sum(x)/n","min_max_mean(x)"]
character (len=*), parameter :: fmt_cr = "(a20,*(1x,f16.12))"
n = 10**8
allocate (x(n))
call random_number(x)
call cpu_time(t(1))
extremes(:,1) = [minval(x),maxval(x)]
call cpu_time(t(2))
extremes(:,2) = min_max(x)
call cpu_time(t(3))
extremes(:,3) = min_max_pairs(x)
call cpu_time(t(4))
extremes(:,4) = min_max_local(x)
call cpu_time(t(5))
xmin_max_mean(:,2) = [minval(x),maxval(x),sum(x)/n]
call cpu_time(t(6))
xmin_max_mean(:,1) = min_max_mean(x)
call cpu_time(t(7))
do icol=1,ncol_extremes
print fmt_cr,"min, max:",extremes(:,icol)
end do
print fmt_cr,"min, max, mean:",xmin_max_mean(:,1)
print fmt_cr,"min, max, mean:",xmin_max_mean(:,2)
print "(f8.4,5x,a30)",(t(i+1)-t(i),trim(labels(i)),i=1,nt-1)
end program xmin_max
``````

with gfortran -O3 on WSL2 I get

``````           min, max:   0.000000002249   0.999999986684
min, max:   0.000000002249   0.999999986684
min, max:   0.000000002249   0.999999986684
min, max:   0.000000002249   0.999999986684
min, max, mean:   0.000000002249   0.999999986684   0.499991319230
min, max, mean:   0.000000002249   0.999999986684   0.499991319230

0.2200               minval(x), maxval(x)
0.1264                         min_max(x)
0.2363                   min_max_pairs(x)
0.2429                   min_max_local(x)
0.3305       minval(x),maxval(x),sum(x)/n
0.2371                    min_max_mean(x)
``````

where `min_max_pairs` and `min_max_local` are the functions that try to reduce the number of comparisons, and with ifort -O3 on WSL2 I get

``````           min, max:   0.000000003725   0.999999968801
min, max:   0.000000003725   0.999999968801
min, max:   0.000000003725   0.999999968801
min, max:   0.000000003725   0.999999968801
min, max, mean:   0.000000003725   0.999999968801   0.499970722298
min, max, mean:   0.000000003725   0.999999968801   0.499970722298

0.1142               minval(x), maxval(x)
0.0630                         min_max(x)
0.2162                   min_max_pairs(x)
0.2153                   min_max_local(x)
0.1548       minval(x),maxval(x),sum(x)/n
0.0674                    min_max_mean(x)
``````

Interesting cases.
I found for my Win7-gfortran 11.1 on an i5-2300:
using the pairs approach was the slowest approach,
using more optimisation helped, but
using -O3 -ffast-math with intrisnics was the fastest.

Why donâ€™t pairs do better

Of interest, my minor changes to min_max, min_max_pair and timer, which achieved very little were:

``````  pure function min_max_new(x) result(y)
! same as min_max but different starting case
real(kind=wp), intent(in) :: x(:)
real(kind=wp)             :: y(2)
integer                   :: i,n

n = size(x)
if (n < 1) return

y = x(n)

do i = 1,n-1
if ( x(i) < y(1) ) y(1) = x(i)
if ( x(i) > y(2) ) y(2) = x(i)
end do

end function min_max_new

pure function min_max_dev(x) result(y)
! same as min_max_pairs, but different start case eliminates ODD
real(kind=wp), intent(in) :: x(:)
real(kind=wp)             :: y(2)
integer                   :: i,n

n = size(x)
if (n < 1) return

y = x(n)

do i = 1,n-1,2
if ( x(i) > x(i+1) ) then
if ( x(i+1) < y(1) ) y(1) = x(i+1)
if ( x(i)   > y(2) ) y(2) = x(i)
else
if ( x(i)   < y(1) ) y(1) = x(i)
if ( x(i+1) > y(2) ) y(2) = x(i+1)
end if
end do

end function min_max_dev

subroutine elapse_time (sec)
real(kind=wp)  :: sec
integer*8 :: tick, rate
!z  call cpu_time (sec)
call system_clock ( tick, rate )
sec = dble(tick) / dble(rate)
end subroutine elapse_time

``````