By the looks of things your original code (before Edit 4) uses wrong alignment of the banded storage of matrix A with respect to what the dgbmv
routine expects.
I’ve created a full example using the “top” alignment (following Edit 4), and the results from matmul
and dgbmv
are equal down to the last few decimal places:
Full example
Don’t forget to link with -lblas
.
module helpers
implicit none
integer, parameter :: dp = kind(1.0d0)
interface
subroutine dgbmv(trans,m,n,kl,ku,alpha,a,lda,x,incx,beta,y,incy)
import dp
character(len=1), intent(in) :: trans
integer, intent(in) :: m, n, kl, ku, lda, incx, incy
real(dp), intent(in) :: alpha, a(lda,*), x(*), beta
real(dp), intent(inout) :: y(*)
end subroutine
end interface
contains
!> Banded matrix times vector multiplication
!>
!> Assumes matrix is stored in the first (kl + ku + 1) rows of array A
!>
subroutine banded_times_vec(A, x, n, kl, ku, y)
real(dp), intent(in) :: A(:,:), x(: )
integer, intent(in) :: n, kl, ku
real(dp) :: y(size(x))
y = 0.0d0
call dgbmv('n',n,n,kl,ku,1.0d0,A,size(A,1),x,1,0.0d0,y,1)
end subroutine
!> Replace values below the diagonal kl and above diagonal ku with zero
subroutine make_banded(A,kl,ku)
real(dp), intent(inout) :: A(:,:)
integer, intent(in) :: kl, ku
integer :: i, j
do j = 1, size(A,1)
do i = 1, size(A,2)
if (max(1, j - ku) <= i .and. i <= min(size(A,1), j+kl)) then
cycle
else
A(i,j) = 0.0d0
end if
end do
end do
end subroutine
function denseToBand(A, nsup, nsub) result(AB)
integer, intent(in) :: nsub, nsup
real(dp), intent(in) :: A(:,: )
integer :: j, i
real(dp) :: AB(2*nsub+nsup+1, size(A,1))
do j = 1, size(A,1)
do i = max(1, j - nsup), min(size(A,1), j+nsub)
AB(nsub+nsup+1+i-j, j) = A(i,j)
end do
end do
end function
function denseToBand_BLAS(A, ku, kl) result(AB)
integer, intent(in) :: ku, kl
real(dp), intent(in) :: A(:,:)
integer :: j, i, k
real(dp) :: AB(2*kl+ku+1, size(A,1))
integer :: m, n
m = size(A,1)
n = size(A,2)
DO J = 1, N
K = KU + 1 - J
DO I = MAX( 1, J - KU ), MIN( M, J + KL )
AB( K + I, J ) = A( I, J )
END DO
END DO
end function
subroutine print_mat(A)
real(dp), intent(in) :: A(:,:)
integer :: i
do i = 1, size(A,1)
write(*,'(*(F8.3,2X))') A(i,:)
end do
end subroutine
end module
program main
use helpers
implicit none
integer, parameter :: n = 6
integer, parameter :: kl = 1, ku = 1
real(dp) :: Adense(n,n)
real(dp), allocatable :: Ab1(:,:), Ab2(:,:)
integer :: i
real(dp) :: x(n), y(n), yb1(n), yb2(n)
call random_number(Adense)
call make_banded(Adense,kl,ku)
print *, "Dense"
call print_mat(Adense)
! returns leading diagonal at row ku + 1
Ab1 = denseToBand_BLAS(Adense,kl,ku)
print *, "Banded BLAS"
call print_mat(Ab1)
! returns leading diagonal at row kl + ku + 1
Ab2 = denseToBand(Adense,kl,ku)
print *, "Banded OP"
call print_mat(Ab2)
call random_number(x)
print *, "x = "
do i = 1, n
print *, x(i)
end do
! Dense matmul
y = matmul(Adense,x)
! Banded matmul
call banded_times_vec(Ab1,x,n,kl,ku,yb1) ! correct
call banded_times_vec(Ab2,x,n,kl,ku,yb2) ! wrong
print *, "y = "
do i = 1, n
print *, i, y(i), yb1(i), yb2(i)
end do
end program
It is also possible to use your original alignment (which is the one expected by the factorization routine DGBTRF
), but you need to pass the address of element A(NSUB+1,1)
instead of A
in the call to dgbmv
(lda
remains the same) in your original example.
I would be happy to learn from other posters on this forum, if there is standard-conformant way to do this when using an explicit interface for the dgbmv
routine. I found what I think is a solution using pointers:
Solution
subroutine banded_times_vec(A, x, n, kl, ku, y)
real(dp), intent(in), contiguous, target :: A(:,:)
real(dp), intent(in) :: x(:)
integer, intent(in) :: n, kl, ku
real(dp) :: y(size(x))
real(dp), pointer, contiguous :: Ap(:) => null()
y = 0.0d0
Ap(1:size(A)) => A
call dgbmv('n',n,n,kl,ku,1.0d0,Ap(kl+1:),size(A,1),x,1,0.0d0,y,1)
end subroutine