I would like to generate a large array of random numbers for a simulation and I am trying to speed things up using OpenMP. Since the performance of the intrinsic random_number
is terrible in parallel (I am using ifort on Windows, results are very similar with ifx), I am using as an alternative a routine from the famous Numerical Recipes textbook (Fortran 90 version, of course). However, the perforance is still not very good, even though is better than the intrinsic random_number.
So I have two questions:
- How can I make the OpenMP code faster? Especially the one that used the Numerical Recipes routine, since random_number in Windows is a lost cause, apparently.
- Are the OpenMP directives correct? In particular, I am not sure how to declare the variable idum: shared, private or firstprivate. Note the idum has the
INTENT(INOUT)
attribute in the subroutineran
from the Numerical Recipes. Furthermore, in the subroutine there are some local variables declared with the SAVE attribute. Not sure if this creates problem when parallelizing.
I provide below a MWE to make it easier for users interested in replicating this issue. Thanks in advance to anyone to can help me out on this!
[Small note: I shared a post on a related topic some time ago (see OpenMP stochastic simulation). However, here the focus is quite different.]
module mod_random
implicit none
! Reference: Numerical Recipes in Fortran 90
private
public :: ran
contains
! TODO: Add routine for vectors
FUNCTION ran(idum)
IMPLICIT NONE
INTEGER, PARAMETER :: K4B=selected_int_kind(9)
INTEGER(K4B), INTENT(INOUT) :: idum
REAL(8) :: ran
INTEGER(K4B), PARAMETER :: IA=16807,IM=2147483647,IQ=127773,IR=2836
REAL(8), SAVE :: am
INTEGER(K4B), SAVE :: ix=-1,iy=-1,k
if (idum <= 0 .or. iy < 0) then
am=nearest(1.0,-1.0)/IM
iy=ior(ieor(888889999,abs(idum)),1)
ix=ieor(777755555,abs(idum))
idum=abs(idum)+1
endif
ix=ieor(ix,ishft(ix,13))
ix=ieor(ix,ishft(ix,-17))
ix=ieor(ix,ishft(ix,5))
k=iy/IQ
iy=IA*(iy-k*IQ)-IR*k
if (iy < 0) iy=iy+IM
ran=am*ior(iand(IM,ieor(ix,iy)),1)
END FUNCTION ran
end module mod_random
program main
use mod_random, only: ran
use omp_lib ! OpenMP library
implicit none
! Declare variables
integer, parameter :: par_fortran = 1
integer, parameter :: T_sim = 1000 ! In my real code, this is larger
integer, parameter :: N_sim = 50000 ! In my real code, this is larger
logical, parameter :: do_print = .false.
integer :: i, hh_c, iostat, idum
real(8) :: start, finish ! Timing variables
real(8), allocatable :: x(:,:)
write(*,*) "Hello, world!"
! Call random number generator
allocate(x(T_sim,N_sim),stat=iostat)
if (iostat /= 0) then
print *, 'Error allocating memory'
stop
endif
write(*,*) "Numerical recipes <ran>"
! Start timing
start = omp_get_wtime()
idum = -1
! QUESTION: Should idum be declared as shared, private or firstprivate?
!$omp parallel if (par_fortran==1) default(shared) private(hh_c,i) firstprivate(idum)
!$omp do
do hh_c=1,N_sim
do i = 1,T_sim
x(i,hh_c) = ran(idum)
!print *, idum
enddo
enddo
!$omp enddo
!$omp end parallel
! Stop timing
finish = omp_get_wtime()
! Print execution time in seconds
print *, 'Execution Time (s):', finish - start
if (do_print) call print_matrix(x)
write(*,*) "idum = ", idum
write(*,*) "Mean(x) = ", sum(x)/real(size(x),8)
!----------------------------------------------------------------
write(*,*) "----------------------------------------------------------------"
write(*,*) "Intrinsic random_number"
if (allocated(x)) deallocate(x)
allocate(x(T_sim,N_sim))
! Start timing
start = omp_get_wtime()
!$omp parallel if (par_fortran==1) default(shared) private(hh_c,i)
!$omp do
do hh_c=1,N_sim
do i = 1,T_sim
call RANDOM_NUMBER(x(i,hh_c))
enddo
enddo
!$omp enddo
!$omp end parallel
! Stop timing
finish = omp_get_wtime()
! Print execution time in seconds
print *, 'Execution Time (s):', finish - start
if (do_print) call print_matrix(x)
write(*,*) "Mean(x) = ", sum(x)/real(size(x),8)
if (allocated(x)) deallocate(x)
contains
subroutine print_matrix(x)
real(8), intent(in) :: x(:,:)
integer :: i, j
!Display matrix x(row,col) on screen
do i = 1,size(x,1)
do j = 1,size(x,2)
write(*,'(f12.6," ")',advance='no'), x(i,j)
write(*,'(" ")', advance = 'no')
enddo
write(*,"()")
enddo
write(*,*) " "
end subroutine print_matrix
end program main