For a little program I wrote to add two matrices, I am not seeing much difference when traversing a matrix by row or column, using gfortran -O3 or ifort -O3
program test_loop_order
implicit none
integer, parameter :: n = 5*10**3, wp = kind(1.0d0), niter = 100
real(kind=wp) :: a(n,n), b(n,n), c(n,n), t1, t2, loop_time, &
tinit, tfinal
character (len=1) :: order
integer :: i, j, iter
call cpu_time(tinit)
call get_command_argument(1,order)
loop_time = 0.0_wp
do iter=1,niter
call random_number(a)
call random_number(b)
call cpu_time(t1)
if (order == "c") then ! traverse arrays one column at a time
do j = 1, n
do i = 1, n
c(i,j) = a(i,j) + b(i,j)
end do
end do
else
do i = 1, n
do j = 1, n
c(i,j) = a(i,j) + b(i,j)
end do
end do
end if
call cpu_time(t2)
loop_time = loop_time + t2 - t1
if (maxval(c) > 2.0) stop "bad variates" ! force c(:,:) to be computed
end do
call cpu_time(tfinal)
print "(a,3(1x,g0),2(1x,f0.2))", "order, n, niter, loop_time, total_time =", &
merge("col","row",order=="c"), n, niter, loop_time, tfinal-tinit
end program test_loop_order
c:\fortran\test>ifort -O3 loop_order.f90
Intel(R) Fortran Intel(R) 64 Compiler Classic for applications running on Intel(R) 64, Version 2021.4.0 Build 20210910_000000
Copyright (C) 1985-2021 Intel Corporation. All rights reserved.
Microsoft (R) Incremental Linker Version 14.16.27027.1
Copyright (C) Microsoft Corporation. All rights reserved.
-out:loop_order.exe
-subsystem:console
loop_order.obj
c:\fortran\test>loop_order c
order, n, niter, loop_time, total_time = col 5000 100 2.78 32.48
c:\fortran\test>loop_order r
order, n, niter, loop_time, total_time = row 5000 100 2.56 32.28
c:\fortran\test>gfortran -O3 -o a.exe loop_order.f90
c:\fortran\test>a c
order, n, niter, loop_time, total_time = col 5000 100 3.44 26.59
c:\fortran\test>a r
order, n, niter, loop_time, total_time = row 5000 100 3.50 26.84
With optimization turned off, however, column-major access is much faster, as we were taught:
c:\fortran\test>ifort -Od loop_order.f90
Intel(R) Fortran Intel(R) 64 Compiler Classic for applications running on Intel(R) 64, Version 2021.4.0 Build 20210910_000000
Copyright (C) 1985-2021 Intel Corporation. All rights reserved.
Microsoft (R) Incremental Linker Version 14.16.27027.1
Copyright (C) Microsoft Corporation. All rights reserved.
-out:loop_order.exe
-subsystem:console
loop_order.obj
c:\fortran\test>loop_order c
order, n, niter, loop_time, total_time = col 5000 100 8.73 76.62
c:\fortran\test>loop_order r
order, n, niter, loop_time, total_time = row 5000 100 59.86 128.31
c:\fortran\test>gfortran -O0 -o a.exe loop_order.f90
c:\fortran\test>a.exe c
order, n, niter, loop_time, total_time = col 5000 100 7.17 29.91
c:\fortran\test>a.exe r
order, n, niter, loop_time, total_time = row 5000 100 59.34 81.94