Given a 1D real array, viewed as a time series, one can search for M change points, sometimes termed structural breaks, where the mean changes by exhaustive search, although the time taken is exponential in M. One minimizes the total squared error for the M+1 segments of data. Below is a code that handles M = 2. Does someone have a faster and more general algorithm? There are many Fortran codes for continuous optimization but fewer for integer optimization (here the integer parameters are the positions of the breaks). Having found potential change points, statistical tests are needed to estimate the probabilities they are spurious. Changepoint: An R Package for Changepoint Analysis surveys the literature and presents an R package calling C code, but it would be nice to have a Fortran version.
module change_point_mod implicit none private public :: dp,change_points_2 integer, parameter :: dp = kind(1.0d0) contains subroutine change_points_2(x,xmean,i1,i2) ! find the two positions i1 and i2 that best partition x(:) into segments of minimum variance real(kind=dp), intent(in) :: x(:) real(kind=dp), intent(out) :: xmean(:) integer , intent(out) :: i1,i2 integer :: j1,j2,n real(kind=dp) :: sumsq,sumsq_min,xmean_try(size(x)) n = size(x) if (n < 3) stop "need size(x) > 2" if (size(xmean) /= size(x)) stop "need size(x) == size(xmean)" sumsq_min = huge(x) do j1=2,n-1 do j2=j1+1,n call set_mean(x,1,j1-1,xmean_try) call set_mean(x,j1,j2-1,xmean_try) call set_mean(x,j2,n,xmean_try) sumsq = sum((x-xmean_try)**2) if (sumsq < sumsq_min) then sumsq_min = sumsq i1 = j1 i2 = j2 xmean = xmean_try end if end do end do end subroutine change_points_2 ! subroutine set_mean(x,j1,j2,xmean) ! set xmean(j1:j2) to mean(x(j1:j2)) real(kind=dp), intent(in) :: x(:) integer , intent(in) :: j1,j2 real(kind=dp), intent(inout) :: xmean(:) integer :: n n = size(x) if (n > 0 .and. j1 > 0 .and. j2 >= j1 .and. j2 <= n) xmean(j1:j2) = sum(x(j1:j2))/(j2-j1+1) end subroutine set_mean end module change_point_mod program xchange_points ! 04/23/2021 11:02 AM driver for change_points_2 use change_point_mod , only: dp,change_points_2 implicit none integer :: i1,i2,iter integer, parameter :: n = 1000, niter = 5, i1_true=301, i2_true = 601 real(kind=dp) :: xx(n),xmean(n),diff call random_seed() diff = 0.1_dp write (*,"(5a8)") "i1","i2","mean1","mean2","mean3" write (*,"(2i8,3f8.4,1x,'TRUE')") i1_true,i2_true,0.0_dp,diff,-diff do iter=1,niter call random_number(xx) ! using uniform variates for simplicity -- better to use normal variates xx = xx - 0.5_dp xx(i1_true:i2_true-1) = xx(i1_true:i2_true-1) + diff xx(i2_true:) = xx(i2_true:) - diff call change_points_2(xx,xmean,i1,i2) write (*,"(2i8,3f8.4)") i1,i2,xmean([1,i1,i2]) end do end program xchange_points
i1 i2 mean1 mean2 mean3 301 601 0.0000 0.1000 -0.1000 TRUE 289 601 -0.0207 0.0931 -0.1155 354 601 -0.0156 0.1260 -0.1053 370 601 -0.0106 0.1410 -0.0976 427 601 0.0040 0.1364 -0.1233 303 598 -0.0154 0.1153 -0.1155
in 6 seconds on my PC using gfortran -O2.