I slightly modified your rnorm
subroutine and also wrote a alternative version using the associate
statement. On my machine, the version with the associate
statement takes about 5.7s and is about 1 second faster than the version without associate ( edited to add: for N=10^8).
random_normal.f90
program main
use iso_fortran_env, only: rwp=>real64
implicit none
integer N
real(rwp), allocatable :: z(:)
real(rwp):: t1, t2, mean, std_dev
N=1+10**8
allocate(z(N))
call cpu_time(t1)
call standard_normal(Z,0d0,1d0)
call cpu_time(t2)
mean=sum(Z)/N
std_dev=sqrt(sum((Z-mean)**2)/N)
write(*,*)" M2:"
write(*,'(A,x,I12)') "size : ", size(z)
write(*,'(A,x,F16.8)') "Time(s) : ", t2-t1
write(*,'(A,x,F16.8)') "mean : ", mean
write(*,'(A,x,F16.8)') "std-dev : ", std_dev
call cpu_time(t1)
call rnorm(Z,0d0,1d0)
call cpu_time(t2)
mean=sum(Z)/N
std_dev=sqrt(sum((Z-mean)**2)/N)
write(*,*)" M2:"
write(*,'(A,x,I12)') "size : ", size(z)
write(*,'(A,x,F16.8)') "Time(s) : ", t2-t1
write(*,'(A,x,F16.8)') "mean : ", mean
write(*,'(A,x,F16.8)') "std-dev : ", std_dev
contains
subroutine standard_normal(z, mu, sigma)
use iso_fortran_env, only: rwp=>real64, iwp=>int64
implicit none
real(rwp), optional :: mu, sigma
real(rwp) :: z(:)
real(rwp), parameter :: PI=3.14159265358979323846
real(rwp), allocatable :: u(:)
!get size of Z
integer(iwp) N, n1
N=size(z,kind=iwp)
n1=N + modulo(N,2)
allocate(u(n1))
call random_number(u)
u = 1d0 - u
associate (u1=>sqrt(-2d0*log(u(1:n1/2))),u2=>2d0*PI*u(n1/2+1:n1))
z=[u1*cos(u2), u1(1:N/2)*sin(u2(1:N/2))]
endassociate
if(present(sigma)) z=z*sigma
if(present(mu)) z=z+mu
endsubroutine standard_normal
subroutine rnorm(x, mu, sigma)
implicit none
integer(8) :: n,n1
real(8), parameter :: PI=3.14159265358979323846
real(8) :: x(:), mu, sigma
real(8), allocatable :: u1(:), u2(:)
n = size(x,kind=8)
n1 = n + modulo(n, 2)
allocate(u1(n1/2), u2(n1/2))
call random_number(u1)
call random_number(u2)
u1 = 1d0 - u1
u2 = 1d0 - u2
x = mu + sigma*[sqrt(-2d0*log(u1))*cos(2d0*pi*u2), sqrt(-2d0*log(u1(1:n/2)))*sin(2d0*pi*u2(1:n/2))]
end subroutine rnorm
end program main
Output
M1:
size : 100000001
Time(s) : 5.71875000
mean : -0.00002553
std-dev : 0.99999518
M2:
size : 100000001
Time(s) : 6.71875000
mean : 0.00003767
std-dev : 1.00007775
(another edit to fix std_dev calc)