inode  MUST NOT be private
gfortran will handle that correctly (generally uses the heap instead of the stack for arrays above a given size). Otherwise you can indeed protect the jWork update with an atomic directive, or allocate your own private copies of jWork and operate a manual reduction at the end.
 !$OMP PARALLEL DO PRIVATE(kount,nod,jnode,nf2)
            do jelem=1,nelz ! ( size(nelz) >1 million))
                kount = count(lnods(jelem,1:lnomax) == inode)                            
                if(kount == 0)cycle     
                do nod=1,lnomax
                   jnode=lnods(jelem,nod)
                   nf2=(jnode-1)*ndfmax 
                   do idof = 1, ndfmax
                       !$OMP ATOMIC UPDATE
                       jWork(nf2+idof) = jWork(nf2+idof) + 1   
                   end do
                end do                   
             end do
            !$OMP PARALLEL PRIVATE(kount,nod,jnode,nf2,jWork_local)
            allocate( jWork_local(size(jWork)), source=0)
            !$OMP DO 
            do jelem=1,nelz ! ( size(nelz) >1 million))
                kount = count(lnods(jelem,1:lnomax) == inode)                            
                if(kount == 0)cycle     
                do nod=1,lnomax
                   jnode=lnods(jelem,nod)
                   nf2=(jnode-1)*ndfmax 
                   do idof = 1, ndfmax
                       jWork_local(nf2+idof) = jWork_local(nf2+idof) + 1   
                   end do
                end do                   
             end do
             !$OMP END DO
             !$OMP CRITICAL
             jWork(:) = jWork(:) + jWork_local(:)
             !$OMP END CRITICAL
             deallocate( jWork_local )
             !$OMP END PARALLEL