Great example. Depending on the platform and compiler options and the
array size you can get all kinds of CPU_TIME/WALLCLOCK_TIME ratios.
Using fpm and being on a platform with the time command I could
reproduce the OPs numbers easily; and by changing the array size could
get all kinds of other interesting ratios; some of which were really
surprising when on a MicroSoft OS. Far less surprises on Linux for me.
I added a few command line options. Since the OP mentioned calling C/C++
it would be easy to get a bad stride because of the differences in F
and C storage order as part of the explanation or the timings being seen.
Your example is a keeper for demonstrating several performance issues. I
already added it to my “hello” command; which generates a directory of
examples for users (it originally created “hello world!” examples of
MPI, OpenMP, PVM, .. batch jobs using PBS/LSF/Slurm/TORQUE directives –
hence the name; but generates a lot of other examples of good and bad
practice and module use now as well).
#!/bin/bash
(
exec 2>&1
fpm --profile=release --runner='/bin/time -v' run -- ratio=3 good=T verbose=T
fpm --profile=release --runner='/bin/time -v' run -- ratio=3 good=F verbose=T
fpm --profile=debug --runner='/bin/time -v' run -- ratio=3 good=T verbose=T
fpm --profile=debug --runner='/bin/time -v' run -- ratio=3 good=F verbose=T
)|tee log.txt
code with command line options
program slowmotion
use, intrinsic::iso_fortran_env, only: int8, int16, int32, int64
use, intrinsic::iso_fortran_env, only: sp=>real32, dp=>real64
real(kind=dp), allocatable :: large_array(:, :, :)
integer :: i, j, k, ni, nj, nk
real :: tic, toc, start, end
! command line
real :: ratio = 1.0; namelist /cmd/ ratio
logical :: good = .true.; namelist /cmd/ good
logical :: verbose = .false.; namelist /cmd/ verbose
character(len=:), allocatable :: string
character(len=255) :: iomsg
integer :: iostat
string = get_namelist() ! return command line arguments as NAMELIST input
read (string, nml=cmd, iostat=iostat, iomsg=iomsg) ! internal read of namelist
if (iostat .ne. 0) then
write (*, '("<ERROR>",i0,1x,a)') iostat, trim(iomsg)
write (*, *) 'COMMAND OPTIONS ARE'
write (*, nml=cmd)
stop 1
endif
if(verbose)call platform()
ni = nint(1000/ratio)
nj = nint(2000/ratio)
nk = nint(3000/ratio)
allocate (large_array(ni, nj, nk))
if (good) then
write (*, *) 'GOOD access'
call CPU_TIME(start)
tic = wall_time() ! wall time counter
do k = 1, nk
do j = 1, nj
do i = 1, ni
large_array(i, j, k) = i + j + k
end do
end do
end do
toc = wall_time() ! wall time counter
call CPU_TIME(end)
write (*, *) 'cpu time is: ', end - start
write (*, *) 'wall time is: ', toc - tic
else
write (*, *) 'BAD access'
call CPU_TIME(start)
tic = wall_time() ! wall time counter
do i = 1, ni
do j = 1, nj
do k = 1, nk
large_array(i, j, k) = i + j + k
end do
end do
end do
toc = wall_time() ! wall time counter
call CPU_TIME(end)
write (*, *) 'cpu time is: ', end - start
write (*, *) 'wall time is: ', toc - tic
endif
contains
real function wall_time()
integer(kind=int64) :: ticks, rate
call system_clock(ticks, rate)
wall_time = dble(ticks)/dble(rate)
end function wall_time
function get_namelist() result(string)
character(len=:), allocatable :: string
integer :: command_line_length
call get_command(length=command_line_length)
allocate (character(len=command_line_length) :: string)
call get_command(string)
string = adjustl(string)//' '
string = string(index(string, ' '):)
string = "&cmd "//string//" /"
end function get_namelist
subroutine platform()
use, intrinsic :: iso_fortran_env, only : compiler_version
use, intrinsic :: iso_fortran_env, only : compiler_options
implicit none
character(len=:),allocatable :: version, options
character(len=*),parameter :: nl=new_line('a')
integer :: where, start, break, i, last, col
version=compiler_version()//' '
options=' '//compiler_options()
start=1
do
where=index(options(start:),' -')
if(where.eq.0)exit
break=where+start-1
options(break:break)=nl
start=where
enddo
if(start.eq.1)then
do
where=index(options(start:),' /')
if(where.eq.0)exit
break=where+start-1
options(break:break)=nl
start=where
enddo
endif
last=len_trim(version)+1
col=0
do i=1,len_trim(version)
col=col+1
if(version(i:i).eq.' ')last=i
if(col.gt.76)then
version(last:last)=nl
col=0
endif
enddo
print '(a,/,3x,*(a))', 'This file was compiled by :', inset(version)
if(options.ne.'')then
print '(*(a))', 'using the options :', inset(options)
endif
end subroutine platform
function inset(string) result(longer)
character(len=*),intent(in) :: string
character(len=:),allocatable :: longer
character(len=*),parameter :: nl=new_line('a')
integer :: i
longer=''
do i=1,len(string)
longer=longer//string(i:i)
if(string(i:i).eq.nl)then
longer=longer//' '
endif
enddo
end function inset
end program slowmotion
What values are valid varies a lot from platform to platform when using the time command on Unix-like systems but there is at least some data in there.
that is always useful and you can use it on any command but /bin/time -v seems to be often overlooked, so perhaps the rather verbose output will be of interest to some. I did not add it but on Linux adding a routine at the end of your program to read the /proc/$$/status file and echo it back out gives all kinds of useful info without having to call any C at all. Not sure what the equivalent is on other OSes.
Output
This file was compiled by :
GCC version 16.0.0 20250727 (experimental)
using the options :
-I build/gfortran_2654F75F5833692A
-mtune=generic
-march=x86-64
-O3
-Wimplicit-interface
-Werror=implicit-interface
-funroll-loops
-fPIC
-fmax-errors=1
-fcoarray=single
-fimplicit-none
-ffree-form
-J build/gfortran_2654F75F5833692A
GOOD access
cpu time is: 1.96800005
wall time is: 10.2500000
Command being timed: "build/gfortran_688A5DB7BAD2F9DA/app/timeit ratio=3 good=T"
User time (seconds): 0.20
System time (seconds): 1.81
Percent of CPU this job got: 19%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:10.39
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 384784
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 437078
Minor (reclaiming a frame) page faults: 0
Voluntary context switches: 0
Involuntary context switches: 0
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 65536
Exit status: 0
Project is up to date
This file was compiled by :
GCC version 16.0.0 20250727 (experimental)
using the options :
-I build/gfortran_2654F75F5833692A
-mtune=generic
-march=x86-64
-O3
-Wimplicit-interface
-Werror=implicit-interface
-funroll-loops
-fPIC
-fmax-errors=1
-fcoarray=single
-fimplicit-none
-ffree-form
-J build/gfortran_2654F75F5833692A
BAD access
cpu time is: 17.2659988
wall time is: 51.1250000
Command being timed: "build/gfortran_688A5DB7BAD2F9DA/app/timeit ratio=3 good=F"
User time (seconds): 8.28
System time (seconds): 9.01
Percent of CPU this job got: 33%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:51.23
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1738816
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 1351881
Minor (reclaiming a frame) page faults: 0
Voluntary context switches: 0
Involuntary context switches: 0
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 65536
Exit status: 0
Project is up to date
This file was compiled by :
GCC version 16.0.0 20250727 (experimental)
using the options :
-I build/gfortran_87E2AE0597D39913
-mtune=generic
-march=x86-64
-g
-Wall
-Wextra
-Werror=implicit-interface
-fPIC
-fmax-errors=1
-fbounds-check
-fcheck=array-temps
-fbacktrace
-fcoarray=single
-fimplicit-none
-ffree-form
-J build/gfortran_87E2AE0597D39913
GOOD access
cpu time is: 4.04600000
wall time is: 7.50000000
Command being timed: "build/gfortran_E167FD2A985B468F/app/timeit ratio=3 good=T"
User time (seconds): 2.89
System time (seconds): 1.18
Percent of CPU this job got: 50%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:08.02
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1703704
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 436898
Minor (reclaiming a frame) page faults: 0
Voluntary context switches: 0
Involuntary context switches: 0
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 65536
Exit status: 0
Project is up to date
This file was compiled by :
GCC version 16.0.0 20250727 (experimental)
using the options :
-I build/gfortran_87E2AE0597D39913
-mtune=generic
-march=x86-64
-g
-Wall
-Wextra
-Werror=implicit-interface
-fPIC
-fmax-errors=1
-fbounds-check
-fcheck=array-temps
-fbacktrace
-fcoarray=single
-fimplicit-none
-ffree-form
-J build/gfortran_87E2AE0597D39913
BAD access
cpu time is: 11.1709995
wall time is: 25.8750000
Command being timed: "build/gfortran_E167FD2A985B468F/app/timeit ratio=3 good=F"
User time (seconds): 7.15
System time (seconds): 4.03
Percent of CPU this job got: 43%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:25.98
Average shared text size (kbytes): 0
Average unshared data size (kbytes): 0
Average stack size (kbytes): 0
Average total size (kbytes): 0
Maximum resident set size (kbytes): 1738796
Average resident set size (kbytes): 0
Major (requiring I/O) page faults: 885418
Minor (reclaiming a frame) page faults: 0
Voluntary context switches: 0
Involuntary context switches: 0
Swaps: 0
File system inputs: 0
File system outputs: 0
Socket messages sent: 0
Socket messages received: 0
Signals delivered: 0
Page size (bytes): 65536
Exit status: 0