I want to compare the elapsed time for matrix multiplication using the following methods:
- The intrinsic
matmul
function - The
dgemm
function - A Coarray version of matmul (utilizing intrinsic
matmul
) - A Coarray version of matmul (utilizing
dgemm
).
As the number of images increases, the time it takes for the standard matmul
and dgemm
functions also increases. I believe it should remain constant for any number of images regardless of the number of images.
How can I perform a comparison of the elapsed time for these four functions?
Results for -coarray-num-images=1
Elapsed time (matmul): 1.645 [s]
Elapsed time (dgemm): 0.768 [s]
Elapsed time (coarray with matmul) image= 1: 1.696 [s]
Elapsed time (coarray with dgemm) image= 1: 0.823 [s]
Results for -coarray-num-images=2
Elapsed time (matmul): 1.687 [s] ??
Elapsed time (dgemm): 0.783 [s] ??
Elapsed time (coarray with matmul) image= 1: 0.899 [s]
Elapsed time (coarray with matmul) image= 2: 0.899 [s]
Elapsed time (coarray with dgemm) image= 2: 0.447 [s]
Elapsed time (coarray with dgemm) image= 1: 0.447 [s]
Results for -coarray-num-images=4
Elapsed time (matmul): 2.173 [s] ??
Elapsed time (dgemm): 0.927 [s] ??
Elapsed time (coarray with matmul) image= 3: 0.553 [s]
Elapsed time (coarray with matmul) image= 4: 0.553 [s]
Elapsed time (coarray with matmul) image= 1: 0.553 [s]
Elapsed time (coarray with matmul) image= 2: 0.553 [s]
Elapsed time (coarray with dgemm) image= 3: 0.287 [s]
Elapsed time (coarray with dgemm) image= 4: 0.287 [s]
Elapsed time (coarray with dgemm) image= 1: 0.287 [s]
Elapsed time (coarray with dgemm) image= 2: 0.287 [s]
Results for -coarray-num-images=8
Elapsed time (matmul): 2.355 [s] ??
Elapsed time (dgemm): 1.238 [s] ??
Elapsed time (coarray with matmul) image= 3: 0.373 [s]
Elapsed time (coarray with matmul) image= 1: 0.373 [s]
Elapsed time (coarray with matmul) image= 6: 0.373 [s]
Elapsed time (coarray with matmul) image= 4: 0.374 [s]
Elapsed time (coarray with matmul) image= 7: 0.374 [s]
Elapsed time (coarray with matmul) image= 5: 0.374 [s]
Elapsed time (coarray with matmul) image= 8: 0.374 [s]
Elapsed time (coarray with matmul) image= 2: 0.374 [s]
Elapsed time (coarray with dgemm) image= 3: 0.232 [s]
Elapsed time (coarray with dgemm) image= 1: 0.232 [s]
Elapsed time (coarray with dgemm) image= 4: 0.232 [s]
Elapsed time (coarray with dgemm) image= 2: 0.232 [s]
Elapsed time (coarray with dgemm) image= 7: 0.232 [s]
Elapsed time (coarray with dgemm) image= 6: 0.232 [s]
Elapsed time (coarray with dgemm) image= 5: 0.232 [s]
Elapsed time (coarray with dgemm) image= 8: 0.232 [s]
Code
program benchmark1
use kinds, only: rk
use fortime, only: timer
use formatmul, only: matmul, matmul_blas
implicit none
real(rk), allocatable :: A(:,:), B(:,:)
real(rk), allocatable :: C(:,:)
type(timer) :: t
integer :: m, n, o, i ,l
character(2) :: im
! C(m,o) = A(m,n).B(n,o)
m = 4000
n = 3000
o = 2000
l = 5
allocate(A(m,n),B(n,o))
call random_number(A)
call random_number(B)
sync all
if (this_image() == 1) call t%timer_start()
do i = 1,l
C = matmul(A,B) ! intrinsic matmul
end do
if (this_image() == 1) call t%timer_stop(message=' Elapsed time (matmul):',nloops=l)
sync all
if (this_image() == 1) call t%timer_start()
do i = 1,l
C = matmul_blas(A,B) ! dgemm
end do
if (this_image() == 1) call t%timer_stop(message=' Elapsed time (dgemm):',nloops=l)
sync all
call t%timer_start()
do i = 1,l
C = matmul(A,B,'coarray','matmul') ! coarray version of matmul (utilizes intrinsic matmul)
end do
write (im, "(I2)") this_image()
call t%timer_stop(message=' Elapsed time (coarray with matmul) image='//trim(im)//':',nloops=l)
sync all
call t%timer_start()
do i = 1,l
C = matmul(A,B,'coarray','blas') ! coarray version of matmul (utilizes dgemm).
end do
call t%timer_stop(message=' Elapsed time (coarray with dgemm) image='//trim(im)//':',nloops=l)
end program benchmark1