Perhaps using aligned_alloc
is slightly easier, since it comes with the C standard library:
! test_aligned_alloc.f90
!
program test_aligned_alloc
use, intrinsic :: iso_c_binding
implicit none
interface
! size must be a multiple of alignment
function c_aligned_alloc(alignment,size) bind(c,name="aligned_alloc")
import c_size_t, c_ptr
integer(c_size_t), value :: alignment, size
type(c_ptr) :: c_aligned_alloc
end function
subroutine c_free(ptr) bind(c,name="free")
import c_ptr
type(c_ptr), value :: ptr
end subroutine
end interface
integer, parameter :: wp = c_float
type(c_ptr) :: a_p = c_null_ptr
real(wp), pointer :: a(:)
integer :: n, npad
integer, parameter :: balign = 16
n = 5
print *, "n = ", n
npad = n + (balign - mod(n * c_sizeof(1.0_wp), balign))/c_sizeof(1.0_wp)
print *, "npad = ", npad
! size of allocated array must be an integer multiple of the alignment
a_p = c_aligned_alloc( int(balign,c_size_t), npad * c_sizeof(1.0_wp))
call c_f_pointer(a_p,a,[n])
print *, "mod(loc(a),balign) = ", mod(loc(a),balign)
call random_number(a)
call square(a)
print '(F10.3,2X)', a
nullify(a)
call c_free(a_p)
a_p = c_null_ptr
contains
subroutine square(a)
real(wp), intent(inout) :: a(:)
a = a**2
end subroutine
end program
The omp_alloc
function also works with gfortran v13, unfortunately, the allocators
directive for allocatable arrays is not supported yet:
! test_omp_alloc.f90
!
program test_aligned_alloc
use, intrinsic :: iso_c_binding
use omp_lib
implicit none
integer, parameter :: wp = c_float
integer(omp_memspace_handle_kind) :: a_memspace = omp_default_mem_space
type(omp_alloctrait) :: a_traits(1) = &
[omp_alloctrait(omp_atk_alignment,16)]
integer(omp_allocator_handle_kind) :: a_alloc
real(wp), pointer :: a(:)
type(c_ptr) :: a_p = c_null_ptr
integer :: i, n
a_alloc = omp_init_allocator( a_memspace, 1, a_traits)
n = 5
a_p = omp_alloc(n * c_sizeof(1.0_wp), a_alloc)
call c_f_pointer(a_p, a, [n])
print *, "mod(loc(a),16) = ", mod(loc(a),16)
call random_number(a)
call square(a)
print '(F10.3,2X)', a
! Edit 2: inserted lines to free memory
nullify(a)
call omp_free(a_p, a_alloc)
call omp_destroy_allocator(a_alloc)
contains
subroutine square(a)
real(wp), intent(inout) :: a(:)
a = a**2
end subroutine
end program
Even if you obtain aligned memory, GNU Fortran doesn’t have a directive to force aligned loads and stores. The !$omp simd aligned(...)
is there, but its unclear what type of variables are allowed to appear in it.
Currently, gfortran appears to follow the OpenMP 4.0 standard, which lists the restriction:
- The type of list items appearing in the aligned clause must be C_PTR or Cray Fortran pointer, or the list item must have the POINTER or ALLOCATABLE attribute.
In OpenMP 5.0, this is changed to:
- If a list item on the aligned clause has the ALLOCATABLE attribute, the allocation status must be allocated.
- If a list item on the aligned clause has the POINTER attribute, the association status must be associated.
- If the type of a list item on the aligned clause is either C_PTR or Cray pointer, the list item must be defined
Note there is no restriction regarding assumed-size or other arrays (on the stack).
In OpenMP 5.2 the aligned clause is changed again:
- Each list item must have C_PTR or Cray pointer type or have the POINTER or ALLOCATABLE attribute. Cray pointer support has been deprecated.
- If a list item has the ALLOCATABLE attribute, the allocation status must be allocated.
- If a list item has the POINTER attribute, the association status must be associated.
- If the type of a list item is either C_PTR or Cray pointer, it must be defined. Cray pointer support has been deprecated.
gfortran and the Intel Fortran compilers appears to interpret these differently: Compiler Explorer
Edit: In the upcoming OpenMP 6.0 standard (currently just a technical preview), the restrictions on aligned
items are changed again:
Each list item must be an array.