Hi,
I managed to implemented a very basic linear elastic finite element solver using openmpi, openmp and mumps.
I am new to openmpi and openmp and was wondering if someone has an opinion on my openmp implementation. Mumps requires openmpi, while my element assembly uses openmp.
main.f90
program main
use debug_test, only: test
use veggies, only: result_t
use mpi_f08, only: MPI_INIT, MPI_FINALIZE, MPI_SUCCESS
implicit none
type(result_t) :: rslt ! comment this line out when doing: fpm run ...
integer :: ierr
CALL MPI_INIT(ierr)
if (ierr /= MPI_SUCCESS) error stop "MPI_INIT failed"
! print *, "Hello mumps"
rslt = test() ! comment this line out when doing: fpm run ...
CALL MPI_FINALIZE(IERR)
if (ierr /= MPI_SUCCESS) error stop "MPI_FINALIZE failed"
print *, "bye"
end program main
debug_test.f90
module debug_test
use veggies, only: result_t, assert_equals, assert_equals_within_absolute
use stdlib_linalg_constants, only: dp
implicit none
private
public:: test
contains
function test() result(rslt)
type(result_t) :: rslt
rslt = check_openmp_mumps_assembly_bc()
end function test
subroutine getElem(el, k, topo, A, rhs)
! element stiffness matrix, global degrees of freedom list and rhs
integer, intent(in):: el
real(dp), intent(in):: k
integer, intent(out):: topo(2)
real(dp), intent(out):: A(2,2)
real(dp), intent(out):: rhs(2)
real(dp):: sum
integer::i
! do some intensive computation
sum = 0._dp
do i = 1, 1e7
sum = sum + i**2
end do
! element stiffness matrix and rhs
topo = [el,el+1]
A = k*reshape([1,-1,-1,1],[2,2])
rhs = [0._dp,0._dp]
end subroutine
function check_openmp_mumps_assembly_bc() result(rslt)
! elastic bars of equal length and stiffness k connected in series; left side fixed; right side force Fx applied
use mumpsSolver, only: solve_elemental
use mpi_f08, only: MPI_COMM_WORLD ! MPI_COMM_SELF when using 'fpm test'
type(result_t) :: rslt
integer:: n ! number of nodes
integer:: nelt ! number of elements
integer:: ndofs ! number of dofs per element
integer :: el ! element counter
integer :: i ! row counter
integer :: j ! column counter
integer :: itime ! time-step counter
real(dp):: k ! element stiffness
real(dp):: Fx3 ! axial force
integer:: bcdof
integer, allocatable :: eltptr_ref(:) ! indices of each element's 1st dof in elt_var
integer, allocatable :: eltptr(:) ! indices of each element's 1st dof in elt_var
integer, allocatable :: eltvar_ref(:) ! concatenated list of each element's dofs
integer, allocatable :: eltvar(:) ! concatenated list of each element's dofs
real(dp), allocatable :: A_elt_ref(:)
real(dp),allocatable :: A_elt(:)
integer, allocatable :: A_rows(:)
integer, allocatable :: indices(:)
real(dp), allocatable :: rhs_ref(:)
real(dp), allocatable :: rhs(:)
real(dp), allocatable :: A_el(:,:) ! element stiffness matrix
real(dp),allocatable :: rhs_el(:) ! element rhs
integer, allocatable :: topo(:) ! element global dof numbers
nelt = 100
n = nelt + 1
ndofs = 2
k = 1
Fx3 = 1
bcdof = 1
! Allocate
allocate(eltptr(nelt+1), eltptr_ref(nelt+1))
allocate(eltvar(nelt*ndofs), eltvar_ref(nelt*ndofs))
allocate(A_elt(nelt*ndofs*ndofs), A_elt_ref(nelt*ndofs*ndofs), A_rows(nelt*ndofs*ndofs))
allocate(rhs(n), rhs_ref(n))
allocate(A_el(ndofs,ndofs))
allocate(rhs_el(ndofs))
allocate(topo(ndofs))
! benchmark test values
do el = 1, nelt
eltptr_ref(el) = 2*el-1
eltvar_ref(2*el-1:2*el) = [el,el+1]
A_elt_ref(4*el-3:4*el) = k*[1,-1,-1,1]
rhs_ref(el) = Fx3/k*(el-1)
end do
eltptr_ref(nelt+1) = nelt*ndofs+1
rhs_ref(nelt+1) = nelt*Fx3/k
A_elt_ref(3) = 0._dp
! Time loop iteration
do itime = 1, 2
! Initialize
rhs = 0._dp
! Assemble
!$omp parallel do
do el = 1, nelt
call getElem(el, k, topo, A_el, rhs_el)
eltptr(el) = (el-1)*ndofs+1
do j = 1, ndofs
do i = 1, ndofs
A_elt((el-1)*ndofs*ndofs+(j-1)*ndofs+i) = A_el(i,j)
A_rows((el-1)*ndofs*ndofs+(j-1)*ndofs+i) = topo(i)
end do
eltvar((el-1)*ndofs+j) = topo(j)
rhs(topo(j)) = rhs(topo(j)) + rhs_el(j)
end do
end do
!$omp end parallel do
eltptr(nelt+1) = nelt*ndofs+1
! Apply BCs
indices = pack([(i,i=1,size(A_rows))], A_rows==bcdof)
rhs(bcdof) = 0._dp
rhs(nelt+1) = Fx3
A_elt(indices) = 0._dp
A_elt(indices(1)) = 1._dp
! Solve
call solve_elemental(MPI_COMM_WORLD%MPI_VAL, n, nelt, eltptr, eltvar, A_elt, rhs) ! MPI_COMM_SELF
end do
! Assert
rslt = assert_equals_within_absolute(A_elt_ref, A_elt, 1.e-13_dp, "A_els") .and. &
assert_equals_within_absolute(rhs_ref, rhs, 1.e-12_dp, "rhs") .and. &
assert_equals(eltvar_ref, eltvar, "eltvar") .and. &
assert_equals(eltptr_ref, eltptr,"eltptr")
print *, 'bye bye'
end function check_openmp_mumps_assembly_bc
end module debug_test
mumpsSolver.f90
module mumpsSolver
use stdlib_linalg_constants, only: dp
use iso_fortran_env, only: stdout=>output_unit
implicit none
private
public :: solve_elemental
contains
subroutine solve_elemental(comm, n, nelt, eltptr, eltvar, A_elt, rhs)
! use mpi_f08, only: MPI_COMM_WORLD
IMPLICIT NONE
INCLUDE 'dmumps_struc.h'
type(dmumps_struc) :: mumps_par
integer :: i
integer, intent(in) :: comm ! MPI_COMM_WORLD%MPI_VAL or MPI_COMM_SELF%MPI_VAL
integer, intent(in) :: n
integer, intent(in) :: nelt
integer, intent(in) :: eltptr(:) ! list of row numbers for non-zero matrix entries
integer, intent(in) :: eltvar(:) ! list of column numbers for non-zero matrix entries
real(dp), intent(in) :: a_elt(:)
real(dp), intent(inout) :: rhs(:)
mumps_par%COMM = comm! MPI_COMM_WORLD%MPI_VAL
mumps_par%JOB = -1 ! initialize mumps instance
mumps_par%SYM = 0 ! A is unsymmetric
mumps_par%PAR = 1 ! host processor also invoved in factorization and computation phase
CALL DMUMPS(mumps_par)
IF (mumps_par%INFOG(1) < 0) THEN ! if init failed
WRITE(stdout,'(A,A,I6,A,I9)') " ERROR RETURN: ",&
" mumps_par%INFOG(1)= ", mumps_par%INFOG(1),&
" mumps_par%INFOG(2)= ", mumps_par%INFOG(2)
else ! if init succeeded
IF (mumps_par%MYID == 0) THEN
mumps_par%n = n
mumps_par%nelt = nelt
allocate(mumps_par%eltptr, source=eltptr)
allocate(mumps_par%eltvar, source=eltvar)
allocate(mumps_par%a_elt, source=a_elt)
allocate(mumps_par%rhs, source=rhs)
END IF
mumps_par%icntl(1:4) = -1 ! suppress solver output
mumps_par%ICNTL(5) = 1 ! Elemental format
mumps_par%JOB = 6 ! perform job 1 (analysis), 2 (factorization) and 3 (computation)
CALL DMUMPS(mumps_par)
IF (mumps_par%INFOG(1) < 0) THEN ! if solve failed
WRITE(stdout,'(A,A,I6,A,I9)') " ERROR RETURN: ",&
" mumps_par%INFOG(1)= ", mumps_par%INFOG(1),&
" mumps_par%INFOG(2)= ", mumps_par%INFOG(2)
else ! if solve succeeded
IF (mumps_par%MYID == 0) THEN
WRITE(stdout,*) 'Solution is ',(mumps_par%RHS(i),i=1,mumps_par%N)
rhs = mumps_par%rhs
END IF
mumps_par%JOB = -2 ! terminate mumps instance
CALL DMUMPS(mumps_par)
IF (mumps_par%INFOG(1) < 0) THEN ! if terminate failed
WRITE(stdout,'(A,A,I6,A,I9)') " ERROR RETURN: ",&
" mumps_par%INFOG(1)= ", mumps_par%INFOG(1),&
" mumps_par%INFOG(2)= ", mumps_par%INFOG(2)
END IF
END IF
END IF
end subroutine solve_elemental
end module mumpsSolver
fpm.toml
name = "mumps2"
version = "0.1.0"
license = "license"
author = "Jane Doe"
maintainer = "jane.doe@example.com"
copyright = "Copyright 2025, Jane Doe"
[build]
auto-executables = true
auto-tests = true
auto-examples = true
module-naming = false
link = ["dmumps"]
[install]
library = false
test = false
[fortran]
implicit-typing = true
implicit-external = true
source-form = "free"
[dependencies]
mpi = "*"
stdlib = "*"
rojff = { git = "https://gitlab.com/everythingfunctional/rojff.git", tag = "v1.0.1" }
[dev-dependencies]
veggies = { git = "https://gitlab.com/everythingfunctional/veggies.git", tag = "v1.0.0" }
The mumps solver I got from ubuntu
sudo apt install libmumps-headers-dev libmumps-dev mumps-test
the command I use to run:
fpm run --flag '-I/usr/include' --runner-args=' -np 3'