inode
MUST NOT be private
gfortran will handle that correctly (generally uses the heap instead of the stack for arrays above a given size). Otherwise you can indeed protect the jWork
update with an atomic directive, or allocate your own private copies of jWork
and operate a manual reduction at the end.
!$OMP PARALLEL DO PRIVATE(kount,nod,jnode,nf2)
do jelem=1,nelz ! ( size(nelz) >1 million))
kount = count(lnods(jelem,1:lnomax) == inode)
if(kount == 0)cycle
do nod=1,lnomax
jnode=lnods(jelem,nod)
nf2=(jnode-1)*ndfmax
do idof = 1, ndfmax
!$OMP ATOMIC UPDATE
jWork(nf2+idof) = jWork(nf2+idof) + 1
end do
end do
end do
!$OMP PARALLEL PRIVATE(kount,nod,jnode,nf2,jWork_local)
allocate( jWork_local(size(jWork)), source=0)
!$OMP DO
do jelem=1,nelz ! ( size(nelz) >1 million))
kount = count(lnods(jelem,1:lnomax) == inode)
if(kount == 0)cycle
do nod=1,lnomax
jnode=lnods(jelem,nod)
nf2=(jnode-1)*ndfmax
do idof = 1, ndfmax
jWork_local(nf2+idof) = jWork_local(nf2+idof) + 1
end do
end do
end do
!$OMP END DO
!$OMP CRITICAL
jWork(:) = jWork(:) + jWork_local(:)
!$OMP END CRITICAL
deallocate( jWork_local )
!$OMP END PARALLEL