An optimization I thought of is that if you have computed means for the 3 segments defined by (i1,i2), and you want to consider the partition (i1,i2+1), the mean of the first segment x(1:i1)
is unchanged, and the means of the other two segments x(i1+1:i2)
and x(i2+1:N)
can be updated by adding and removing one value. This is faster than recomputing all means as done above.
ETA: the algorithm can be speeded up by almost a factor of 2 by precomputing the cumulative sum of x(:), as done below.
module change_point_mod
implicit none
private
public :: dp,change_points_2,change_points_2_slow
integer, parameter :: dp = kind(1.0d0)
contains
subroutine change_points_2_slow(x,xmean,i1,i2)
! find the two positions i1 and i2 that best partition x(:) into segments of minimum variance
real(kind=dp), intent(in) :: x(:)
real(kind=dp), intent(out) :: xmean(:)
integer , intent(out) :: i1,i2
integer :: j1,j2,n
real(kind=dp) :: sumsq,sumsq_min,xmean_try(size(x))
n = size(x)
if (n < 3) stop "need size(x) > 2"
if (size(xmean) /= size(x)) stop "need size(x) == size(xmean)"
sumsq_min = huge(x)
do j1=2,n-1
call set_mean(x,1,j1-1,xmean_try)
do j2=j1+1,n
call set_mean(x,j1,j2-1,xmean_try)
call set_mean(x,j2,n,xmean_try)
sumsq = sum((x-xmean_try)**2)
if (sumsq < sumsq_min) then
sumsq_min = sumsq
i1 = j1
i2 = j2
xmean = xmean_try
end if
end do
end do
end subroutine change_points_2_slow
!
subroutine change_points_2(x,xmean,i1,i2)
! find the two positions i1 and i2 that best partition x(:) into segments of minimum variance
! precomputes the cumulative sum of x(:) for speed
real(kind=dp), intent(in) :: x(:)
real(kind=dp), intent(out) :: xmean(:)
integer , intent(out) :: i1,i2
integer :: i,j1,j2,n
real(kind=dp) :: sumsq,sumsq_min,xmean_try(size(x)),cumul_sum(size(x))
n = size(x)
if (n < 3) stop "need size(x) > 2"
if (size(xmean) /= size(x)) stop "need size(x) == size(xmean)"
cumul_sum(1) = x(1)
do i=2,n
cumul_sum(i) = cumul_sum(i-1) + x(i)
end do
sumsq_min = huge(x)
do j1=2,n-1
call set_mean(x,1,j1-1,xmean_try,cumul_sum)
do j2=j1+1,n
call set_mean(x,j1,j2-1,xmean_try,cumul_sum)
call set_mean(x,j2,n,xmean_try,cumul_sum)
sumsq = sum((x-xmean_try)**2)
if (sumsq < sumsq_min) then
sumsq_min = sumsq
i1 = j1
i2 = j2
xmean = xmean_try
end if
end do
end do
end subroutine change_points_2
!
subroutine set_mean(x,j1,j2,xmean,cumul_sum)
! set xmean(j1:j2) to mean(x(j1:j2))
real(kind=dp), intent(in) :: x(:)
integer , intent(in) :: j1,j2
real(kind=dp), intent(inout) :: xmean(:)
real(kind=dp), intent(in), optional :: cumul_sum(:)
integer :: n,denom
n = size(x)
if (n > 0 .and. j1 > 0 .and. j2 >= j1 .and. j2 <= n) then
if (j1 == j2) then
xmean(j1) = x(j1)
return
end if
denom = j2 - j1 + 1
if (present(cumul_sum)) then
if (j1 > 1) then
xmean(j1:j2) = (cumul_sum(j2)-cumul_sum(j1-1))/denom
else
xmean(j1:j2) = cumul_sum(j2)/denom
end if
else
xmean(j1:j2) = sum(x(j1:j2))/denom
end if
end if
end subroutine set_mean
end module change_point_mod
!
program xchange_points
! 04/23/2021 02:28 PM calls fast and or slow versions of change_points_2
! 04/23/2021 11:02 AM driver for change_points_2
use change_point_mod , only: dp,change_points_2,change_points_2_slow
implicit none
integer :: i1,i2,iter
integer, parameter :: n = 1000, niter = 5, i1_true=nint(0.3*n), i2_true = nint(0.6*n)
real(kind=dp) :: xx(n),xmean(n),diff
logical, parameter :: do_fast = .true., do_slow = .false.
call random_seed()
diff = 0.1_dp
print*,"n, do_fast, do_slow =",n,do_fast,do_slow
write (*,"(5a8)") "i1","i2","mean1","mean2","mean3"
write (*,"(2i8,3f8.4,1x,'TRUE')") i1_true,i2_true,0.0_dp,diff,-diff
do iter=1,niter
call random_number(xx) ! using uniform variates for simplicity -- better to use normal variates
xx = xx - 0.5_dp
xx(i1_true:i2_true-1) = xx(i1_true:i2_true-1) + diff
xx(i2_true:) = xx(i2_true:) - diff
if (do_fast) then
call change_points_2(xx,xmean,i1,i2)
write (*,"(2i8,3f8.4)") i1,i2,xmean([1,i1,i2])
end if
if (do_slow) then
call change_points_2_slow(xx,xmean,i1,i2)
write (*,"(2i8,3f8.4)") i1,i2,xmean([1,i1,i2])
end if
end do
end program xchange_points