Help with Blas band matrix operations (SOLVED)

By the looks of things your original code (before Edit 4) uses wrong alignment of the banded storage of matrix A with respect to what the dgbmv routine expects.

I’ve created a full example using the “top” alignment (following Edit 4), and the results from matmul and dgbmv are equal down to the last few decimal places:

Full example

Don’t forget to link with -lblas.

module helpers

  implicit none
  integer, parameter :: dp = kind(1.0d0)

  interface
    subroutine dgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy)
      import dp
      character(len=1), intent(in) :: trans
      integer, intent(in) :: m, n, kl, ku, lda, incx, incy
      real(dp), intent(in) :: alpha, a(lda,*), x(*), beta
      real(dp), intent(inout) :: y(*)
    end subroutine
  end interface

contains

  !> Banded matrix times vector multiplication
  !>
  !> Assumes matrix is stored in the first (kl + ku + 1) rows of array A
  !>
  subroutine banded_times_vec(A, x, n, kl, ku, y)
    real(dp), intent(in) :: A(:,:), x(: )
    integer, intent(in) :: n, kl, ku
    real(dp) :: y(size(x))

    y = 0.0d0

    call dgbmv('n',n,n,kl,ku,1.0d0,A,size(A,1),x,1,0.0d0,y,1)

  end subroutine

  !> Replace values below the diagonal kl and above diagonal ku with zero
  subroutine make_banded(A,kl,ku)
    real(dp), intent(inout) :: A(:,:)
    integer, intent(in) :: kl, ku

    integer :: i, j

    do j = 1, size(A,1)
      do i = 1, size(A,2)
        if (max(1, j - ku) <= i .and. i <= min(size(A,1), j+kl)) then
          cycle
        else
          A(i,j) = 0.0d0
        end if
      end do
    end do

  end subroutine

  function denseToBand(A, nsup, nsub) result(AB)

      integer, intent(in) :: nsub, nsup
      real(dp), intent(in) :: A(:,: )
      integer :: j, i
      real(dp) :: AB(2*nsub+nsup+1, size(A,1))

      do j = 1, size(A,1)
          do i = max(1, j - nsup), min(size(A,1), j+nsub)
              AB(nsub+nsup+1+i-j, j) = A(i,j)
          end do
      end do

  end function

  function denseToBand_BLAS(A, ku, kl) result(AB)

      integer, intent(in) :: ku, kl
      real(dp), intent(in) :: A(:,:)
      integer :: j, i, k
      real(dp) :: AB(2*kl+ku+1, size(A,1))
      integer :: m, n

      m = size(A,1)
      n = size(A,2)

       DO J = 1, N
          K = KU + 1 - J
          DO I = MAX( 1, J - KU ), MIN( M, J + KL )
             AB( K + I, J ) = A( I, J )
        END DO
      END DO

  end function

  subroutine print_mat(A)
    real(dp), intent(in) :: A(:,:)
    integer :: i
    do i = 1, size(A,1)
      write(*,'(*(F8.3,2X))') A(i,:)
    end do
  end subroutine

end module

program main

  use helpers
  implicit none

  integer, parameter :: n = 6
  integer, parameter :: kl = 1, ku = 1
  real(dp) :: Adense(n,n)
  real(dp), allocatable :: Ab1(:,:), Ab2(:,:)
  integer :: i
  real(dp) :: x(n), y(n), yb1(n), yb2(n)

  call random_number(Adense)
  call make_banded(Adense,kl,ku)

  print *, "Dense"
  call print_mat(Adense)

  ! returns leading diagonal at row ku + 1
  Ab1 = denseToBand_BLAS(Adense,kl,ku)
  print *, "Banded BLAS"
  call print_mat(Ab1)

  ! returns leading diagonal at row kl + ku + 1
  Ab2 = denseToBand(Adense,kl,ku)
  print *, "Banded OP"
  call print_mat(Ab2)


  call random_number(x)
  print *, "x = "
  do i = 1, n
    print *, x(i)
  end do

  ! Dense matmul
  y = matmul(Adense,x)

  ! Banded matmul
  call banded_times_vec(Ab1,x,n,kl,ku,yb1) ! correct
  call banded_times_vec(Ab2,x,n,kl,ku,yb2) ! wrong

  print *, "y = "
  do i = 1, n
    print *, i, y(i), yb1(i), yb2(i)
  end do

end program

It is also possible to use your original alignment (which is the one expected by the factorization routine DGBTRF), but you need to pass the address of element A(NSUB+1,1) instead of A in the call to dgbmv (lda remains the same) in your original example.

I would be happy to learn from other posters on this forum, if there is standard-conformant way to do this when using an explicit interface for the dgbmv routine. I found what I think is a solution using pointers:

Solution
  subroutine banded_times_vec(A, x, n, kl, ku, y)
    real(dp), intent(in), contiguous, target :: A(:,:)
    real(dp), intent(in) :: x(:)
    integer, intent(in) :: n, kl, ku
    real(dp) :: y(size(x))
    real(dp), pointer, contiguous :: Ap(:) => null()

    y = 0.0d0

    Ap(1:size(A)) => A

    call dgbmv('n',n,n,kl,ku,1.0d0,Ap(kl+1:),size(A,1),x,1,0.0d0,y,1)

  end subroutine