Writing wrappers for LAPACK and BLAS routines

Yes, the subroutines that wrap BLAS routines in my code use arrays with assumed shape. As mentioned in original post, I use these wrapper functions in my RK4 subroutine (see the code below).

    subroutine rungekutta(func, y0, ti, tf, dt, print_nstep, outfile)
        use blas_wrappers
        use, intrinsic :: iso_fortran_env, only:dp=>real64
        implicit none
        complex(dp), intent(in) :: func(:,:)
        complex(dp), intent(in) :: y0(:)
        real(dp), intent(in) :: ti, tf, dt
        integer, intent(in) :: print_nstep
        character (len=*), intent(in) :: outfile
       
        complex(dp), allocatable :: yi(:)
        complex(dp), allocatable, dimension(:) :: k1, k2, k3, k4 
        complex(dp), allocatable, dimension(:) :: kt

        
        complex(dp) :: expt, norm, autocorr
        real(dp) :: t
        integer :: n, m
        integer :: tstep
        
        n = size(func, dim=2)
        m = size(y0)
        ! some consistency checks
        if (n .ne. m) then
            print*, "!!ERROR: Dimensions of func and y0 don't match."
            stop
        end if

        allocate(k1(n), k2(n), k3(n), k4(n))
        allocate(kt(n))
        allocate(yi(n))
    
        t = ti
        tstep = 0
        yi = y0
        open(100, file=trim(outfile))
        ! writing for t0
        call zmul_zdotc(yi, yi, norm)
        ! expectation values
        call zmul_mv(func, yi, kt)
        call zmul_zdotc(yi,kt, expt)
        ! autocorr
        call zmul_zdotc(y0, yi, autocorr)
        write(100,'(4f32.16)') t, abs(norm), abs(autocorr), abs(expt)  

        do while (t < tf)
            call zmul_mv(func, yi, k1)
            call zmul_mv(func, (yi + (dt*0.5d0)*k1), k2)
            call zmul_mv(func, (yi + (dt*0.5d0)*k2), k3)
            call zmul_mv(func, (yi + (dt*1.0d0)*k3), k4)
            yi = yi + (dt/6.0d0)*(k1 + 2.0d0*k2 + 2.0d0*k3 + k4)
            t = t + dt
            tstep = tstep + 1
            if (modulo(tstep, print_nstep) == 0) then
                ! norm
                call zmul_zdotc(yi, yi, norm)
                ! intermediate normalisation only for itp
                yi = 1/(norm)**(0.5d0) * yi
                call zmul_zdotc(yi, yi, norm)
                ! expectation values
                call zmul_mv(func, yi, kt)
                call zmul_zdotc(yi,kt, expt)
                ! autocorr
                call zmul_zdotc(y0, yi, autocorr)
                write(100,'(4f32.16)') t, abs(norm), abs(autocorr), abs(expt)  
            end if
        end do
        close(100)
    end subroutine rungekutta

Note: I just renamed helpers to blas_wrappers and the only other change is beta = (0.0d0, 0.0d0)

So, the when I try to test rungekutta with any func matrix larger than 1000, I run into stackoverflow, but I am not sure why this happens.