Looking at the assembly of your subroutine in Compiler Explorer (compiled with gfortran -O3 -march=skylake), it contains a number of jumps:
__foo_MOD_sort_network_arr4_int:
vmovq xmm0, QWORD PTR [rdi]
vpextrd edx, xmm0, 1
vmovd esi, xmm0
cmp esi, edx
jle .L4
mov eax, esi
vpshufd xmm0, xmm0, 225
mov esi, edx
vmovq QWORD PTR [rdi], xmm0
mov edx, eax
.L4:
vmovq xmm0, QWORD PTR [rdi+8]
vpextrd ecx, xmm0, 1
vmovd eax, xmm0
cmp eax, ecx
jle .L5
mov r8d, eax
vpshufd xmm0, xmm0, 225
mov eax, ecx
vmovq QWORD PTR [rdi+8], xmm0
mov ecx, r8d
.L5:
cmp esi, eax
jle .L6
mov DWORD PTR [rdi+8], esi
mov DWORD PTR [rdi], eax
mov eax, esi
.L6:
cmp edx, ecx
jle .L7
mov DWORD PTR [rdi+4], ecx
mov DWORD PTR [rdi+12], edx
mov edx, ecx
.L7:
cmp eax, edx
jge .L9
vmovd xmm1, eax
vpinsrd xmm0, xmm1, edx, 1
vmovq QWORD PTR [rdi+4], xmm0
.L9:
ret
Following this lecture, I naively converted it to this:
subroutine sort4(x)
integer, intent(inout) :: x(4)
integer :: z1, z2
call cmpswap(x(1),x(2))
call cmpswap(x(3),x(4))
call cmpswap(x(1),x(3))
call cmpswap(x(2),x(4))
call cmpswap(x(2),x(3))
contains
subroutine cmpswap(a,b)
integer, intent(inout) :: a, b
integer :: tmp
tmp = min(a,b)
b = max(a,b)
a = tmp
end subroutine
end subroutine sort4
and the assembly produced looks like this:
__foo_MOD_sort4:
vmovd xmm0, DWORD PTR [rdi+4]
vmovd xmm3, DWORD PTR [rdi]
vmovd xmm2, DWORD PTR [rdi+12]
vpminsd xmm4, xmm3, xmm0
vpmaxsd xmm3, xmm3, xmm0
vmovd xmm0, DWORD PTR [rdi+8]
vpminsd xmm1, xmm0, xmm2
vpmaxsd xmm0, xmm0, xmm2
vpmaxsd xmm5, xmm1, xmm4
vpminsd xmm2, xmm0, xmm3
vpminsd xmm1, xmm1, xmm4
vpmaxsd xmm0, xmm0, xmm3
vpminsd xmm4, xmm2, xmm5
vmovd eax, xmm0
vpmaxsd xmm2, xmm2, xmm5
vmovd edx, xmm4
vpinsrd xmm2, xmm2, eax, 1
vpinsrd xmm1, xmm1, edx, 1
vpunpcklqdq xmm1, xmm1, xmm2
vmovdqu XMMWORD PTR [rdi], xmm1
ret
When called like this:
a = [3,1,4,2]
call sort_network_arr4_int(a)
print *, a
a = [3,1,4,2]
call sort4(a)
print *, a
I get the output
1 2 3 4
1 2 3 4