I got curious about that difference, then I did some tests using Julia here (I find it easier to benchmark). The function Iβm benchmarking sums the result of three_median
for 10_000 points:
julia> function test(v,f)
s = 0.
for x in v
s += f(x[1],x[2],x[3])
end
return s
end
test (generic function with 1 method)
The three functions I tested are:
# original
function three_median(x, y, z)
if ((y <= x && x <= z) || (z <= x && x <= y))
res = x
elseif ((x <= y && y <= z) || (z <= y && y <= x))
res = y
else
res = z
end
return res
end
# using max/min
function three_median1(a, b, c)
res = max(min(a,b),min(max(a,b),c))
return res
end
# identical to max/min, but with conditionals:
function three_median2(a,b,c)
a1 = if a < b
a
else
b
end
a2 = if a > b
a
else
b
end
a3 = if a2 < c
a2
else
c
end
res = if a3 > a1
a3
else
a1
end
return res
end
The results are:
julia> @btime test($x,$three_median)
65.632 ΞΌs (0 allocations: 0 bytes)
5046.737882726079
julia> @btime test($x,$three_median1)
47.879 ΞΌs (0 allocations: 0 bytes)
5046.737882726079
julia> @btime test($x,$three_median2)
10.952 ΞΌs (0 allocations: 0 bytes)
5046.737882726079
Thus, function with max/min
is indeed faster than the original one, but, curiously, the one with the conditionals written in a different way is quite faster than both options.
I donβt know if that will be the case with Fortran as well, the corresponding function would be:
function three_median(x, y, z)
real(8) :: x, y, z, three_median, a1, a2, a3
if (a < b) then
a1 = a
else
a1 = b
end if
if (a > b) then
a2 = a
else
a2 = b
end if
if (a2 < c) then
a3 = a2
else
a3 = c
end if
if (a3 > a1) then
three_median = a3
else
three_median = a1
end if
end function
I do not understand very clearly lowered code, and those differences are clearly interesting. Anyway, if someone wants to check that out, here go the assembly codes of all these options:
Summary
julia> @code_native three_median(a,b,c)
.text
; β @ three_median.jl:2 within `three_median'
; ββ @ float.jl:374 within `<='
vucomisd %xmm1, %xmm0
; ββ
jb L12
vucomisd %xmm0, %xmm2
jae L24
L12:
vucomisd %xmm0, %xmm1
jb L25
vucomisd %xmm2, %xmm0
jb L25
; β @ three_median.jl:9 within `three_median'
L24:
retq
; β @ three_median.jl:4 within `three_median'
L25:
vcmpnlesd %xmm1, %xmm2, %xmm3
vblendvpd %xmm3, %xmm2, %xmm1, %xmm3
vcmpnlesd %xmm0, %xmm1, %xmm4
vblendvpd %xmm4, %xmm2, %xmm3, %xmm3
vcmpnlesd %xmm2, %xmm1, %xmm2
vblendvpd %xmm2, %xmm3, %xmm1, %xmm2
vcmpnlesd %xmm1, %xmm0, %xmm0
vblendvpd %xmm0, %xmm3, %xmm2, %xmm0
retq
nopw %cs:(%rax,%rax)
; β
julia> @code_native three_median1(a,b,c)
.text
; β @ three_median.jl:13 within `three_median1'
; ββ @ math.jl:726 within `min'
; βββ @ floatfuncs.jl:15 within `signbit'
vmovq %xmm1, %rcx
vmovq %xmm0, %rax
; βββ
vcmpordsd %xmm0, %xmm0, %xmm3
vblendvpd %xmm3, %xmm1, %xmm0, %xmm3
vcmpordsd %xmm1, %xmm1, %xmm4
vblendvpd %xmm4, %xmm0, %xmm1, %xmm5
; βββ @ operators.jl:305 within `>'
; ββββ @ bool.jl:84 within `<'
; βββββ @ bool.jl:36 within `&'
testq %rcx, %rcx
; βββββ
js L52
vmovapd %xmm5, %xmm8
vmovapd %xmm3, %xmm6
; βββ @ operators.jl:305 within `>'
; ββββ @ bool.jl:84 within `<'
; βββββ @ bool.jl:33 within `!'
testq %rax, %rax
; βββββ
jns L69
jmp L65
L52:
vmovapd %xmm3, %xmm8
vmovapd %xmm5, %xmm6
; βββ @ operators.jl:305 within `>'
; ββββ @ bool.jl:84 within `<'
; βββββ @ bool.jl:33 within `!'
testq %rax, %rax
; βββββ
jns L69
L65:
vmovapd %xmm5, %xmm8
; ββ
; ββ @ math.jl:722 within `max'
L69:
js L75
vmovapd %xmm5, %xmm6
; ββ
; ββ @ math.jl within `min'
L75:
vcmpltsd %xmm0, %xmm1, %xmm7
; ββ
; ββ @ math.jl:722 within `max'
vcmpltsd %xmm1, %xmm0, %xmm0
vblendvpd %xmm0, %xmm3, %xmm6, %xmm1
; ββ
; ββ @ math.jl:726 within `min'
; βββ @ floatfuncs.jl:15 within `signbit'
vmovq %xmm2, %rax
vmovq %xmm1, %rcx
; βββ
vcmpordsd %xmm1, %xmm1, %xmm0
vblendvpd %xmm0, %xmm2, %xmm1, %xmm4
vcmpordsd %xmm2, %xmm2, %xmm0
vblendvpd %xmm0, %xmm1, %xmm2, %xmm6
vmovapd %xmm4, %xmm5
; βββ @ operators.jl:305 within `>'
; ββββ @ bool.jl:84 within `<'
; βββββ @ bool.jl:33 within `!'
testq %rcx, %rcx
; βββββ
jns L136
vmovapd %xmm6, %xmm5
; ββ
; ββ @ math.jl within `min'
L136:
vblendvpd %xmm7, %xmm3, %xmm8, %xmm0
; ββ
; ββ @ math.jl:726 within `min'
; βββ @ operators.jl:305 within `>'
; ββββ @ bool.jl:84 within `<'
; βββββ @ bool.jl:36 within `&'
testq %rax, %rax
; βββββ
js L151
vmovapd %xmm6, %xmm5
L151:
vcmpltsd %xmm1, %xmm2, %xmm1
vblendvpd %xmm1, %xmm4, %xmm5, %xmm1
; ββ
; ββ @ math.jl:722 within `max'
; βββ @ floatfuncs.jl:15 within `signbit'
vmovq %xmm1, %rcx
vmovq %xmm0, %rax
; βββ
vcmpordsd %xmm0, %xmm0, %xmm2
vblendvpd %xmm2, %xmm1, %xmm0, %xmm2
vcmpordsd %xmm1, %xmm1, %xmm3
vblendvpd %xmm3, %xmm0, %xmm1, %xmm3
vmovapd %xmm2, %xmm4
; βββ @ bool.jl:84 within `<'
; ββββ @ bool.jl:33 within `!'
testq %rcx, %rcx
; ββββ
jns L207
vmovapd %xmm3, %xmm4
; βββ @ bool.jl:84 within `<'
; ββββ @ bool.jl:36 within `&'
L207:
testq %rax, %rax
; ββββ
js L216
vmovapd %xmm3, %xmm4
L216:
vcmpltsd %xmm1, %xmm0, %xmm0
vblendvpd %xmm0, %xmm2, %xmm4, %xmm0
; ββ
; β @ three_median.jl:14 within `three_median1'
retq
nopw %cs:(%rax,%rax)
; β
julia> @code_native three_median2(a,b,c)
.text
; β @ three_median.jl within `three_median2'
vminsd %xmm1, %xmm0, %xmm3
; β @ three_median.jl:23 within `three_median2'
vmaxsd %xmm1, %xmm0, %xmm0
; β @ three_median.jl within `three_median2'
vminsd %xmm2, %xmm0, %xmm0
; β @ three_median.jl:33 within `three_median2'
vmaxsd %xmm3, %xmm0, %xmm0
; β @ three_median.jl:38 within `three_median2'
retq
nopw %cs:(%rax,%rax)
; β
The assembly code of the third option is much more concise, curiously.
edit: In Julia, the reason for the difference is that there are specific checks for NaN
s in the intrinsic max
functions.