Given a 1D real array, viewed as a time series, one can search for M change points, sometimes termed structural breaks, where the mean changes by exhaustive search, although the time taken is exponential in M. One minimizes the total squared error for the M+1 segments of data. Below is a code that handles M = 2. Does someone have a faster and more general algorithm? There are many Fortran codes for continuous optimization but fewer for integer optimization (here the integer parameters are the positions of the breaks). Having found potential change points, statistical tests are needed to estimate the probabilities they are spurious. Changepoint: An R Package for Changepoint Analysis surveys the literature and presents an R package calling C code, but it would be nice to have a Fortran version.
module change_point_mod
implicit none
private
public :: dp,change_points_2
integer, parameter :: dp = kind(1.0d0)
contains
subroutine change_points_2(x,xmean,i1,i2)
! find the two positions i1 and i2 that best partition x(:) into segments of minimum variance
real(kind=dp), intent(in) :: x(:)
real(kind=dp), intent(out) :: xmean(:)
integer , intent(out) :: i1,i2
integer :: j1,j2,n
real(kind=dp) :: sumsq,sumsq_min,xmean_try(size(x))
n = size(x)
if (n < 3) stop "need size(x) > 2"
if (size(xmean) /= size(x)) stop "need size(x) == size(xmean)"
sumsq_min = huge(x)
do j1=2,n-1
do j2=j1+1,n
call set_mean(x,1,j1-1,xmean_try)
call set_mean(x,j1,j2-1,xmean_try)
call set_mean(x,j2,n,xmean_try)
sumsq = sum((x-xmean_try)**2)
if (sumsq < sumsq_min) then
sumsq_min = sumsq
i1 = j1
i2 = j2
xmean = xmean_try
end if
end do
end do
end subroutine change_points_2
!
subroutine set_mean(x,j1,j2,xmean)
! set xmean(j1:j2) to mean(x(j1:j2))
real(kind=dp), intent(in) :: x(:)
integer , intent(in) :: j1,j2
real(kind=dp), intent(inout) :: xmean(:)
integer :: n
n = size(x)
if (n > 0 .and. j1 > 0 .and. j2 >= j1 .and. j2 <= n) xmean(j1:j2) = sum(x(j1:j2))/(j2-j1+1)
end subroutine set_mean
end module change_point_mod
program xchange_points
! 04/23/2021 11:02 AM driver for change_points_2
use change_point_mod , only: dp,change_points_2
implicit none
integer :: i1,i2,iter
integer, parameter :: n = 1000, niter = 5, i1_true=301, i2_true = 601
real(kind=dp) :: xx(n),xmean(n),diff
call random_seed()
diff = 0.1_dp
write (*,"(5a8)") "i1","i2","mean1","mean2","mean3"
write (*,"(2i8,3f8.4,1x,'TRUE')") i1_true,i2_true,0.0_dp,diff,-diff
do iter=1,niter
call random_number(xx) ! using uniform variates for simplicity -- better to use normal variates
xx = xx - 0.5_dp
xx(i1_true:i2_true-1) = xx(i1_true:i2_true-1) + diff
xx(i2_true:) = xx(i2_true:) - diff
call change_points_2(xx,xmean,i1,i2)
write (*,"(2i8,3f8.4)") i1,i2,xmean([1,i1,i2])
end do
end program xchange_points
gives output
i1 i2 mean1 mean2 mean3
301 601 0.0000 0.1000 -0.1000 TRUE
289 601 -0.0207 0.0931 -0.1155
354 601 -0.0156 0.1260 -0.1053
370 601 -0.0106 0.1410 -0.0976
427 601 0.0040 0.1364 -0.1233
303 598 -0.0154 0.1153 -0.1155
in 6 seconds on my PC using gfortran -O2.