Here’s my attempt at a solution using C-Fortran array descriptors:

```
// f18_saxpy.c
#include <ISO_Fortran_binding.h>
#include "mkl.h" // Intel MKL
void f18_saxpy(CFI_cdesc_t *x, CFI_cdesc_t *y, float *a) {
int n, incx, incy;
float a_;
a_ = 1.0;
if (a)
a_ = *a;
n = y->dim[0].extent;
incx = 1;
incy = 1;
// Ideally, cast to CFI_index_t instead of int,
// but the standard BLAS interface uses int's for indexing
if (n > 1) {
incx = (int) ( x->dim[0].sm / x->elem_len );
incy = (int) ( y->dim[0].sm / y->elem_len );
}
cblas_saxpy(n, a_, (float *) x->base_addr, incx, (float *) y->base_addr, incy);
}
```

And here’s a driver program to test the routines:

```
! test_axpy.F90
module blas95
implicit none
private
public :: axpy
integer, parameter :: sp = kind(1.0e0)
interface axpy
#ifdef CFI
subroutine f18_saxpy( x, y, a ) bind(c,name='f18_saxpy')
import sp
real(sp), intent(in) :: x(:)
real(sp), intent(inout) :: y(:)
real(sp), intent(in), optional :: a
end subroutine
#else
module procedure blas95_saxpy
#endif
end interface
contains
subroutine blas95_saxpy( x, y, alpha )
real(sp), intent(in) :: x(:)
real(sp), intent(inout) :: y(:)
real(sp), intent(in), optional :: alpha
integer :: n, incx, incy
real(sp) :: alpha_
external :: saxpy
alpha_ = 1.0
if (present(alpha)) alpha_ = alpha
incx = 1
incy = 1
n = size(y)
if (n > 1) then
incx = (loc(x(2)) - loc(x(1))) / sizeof(x(1))
incy = (loc(y(2)) - loc(y(1))) / sizeof(y(1))
end if
call saxpy(n, alpha_, x(1), incx, y(1), incy)
end subroutine
end module
program test_axpy
use blas95, only: axpy
implicit none (type, external)
integer, parameter :: sp = kind(1.0e0)
real(sp) :: x(6), y(6), alpha
integer :: i
y = 0.0
x = [1.0,2.0,3.0,4.0,5.0,6.0]
call axpy( x, y )
write(*,'(6(F11.6,4X))') y
y = 0
alpha = 3.0
call axpy( x(1::2), y(::2), alpha )
write(*,'(6(F11.6,4X))') y
y = 0
alpha = 2.0
call axpy( x(2::2), y(2::2), alpha )
write(*,'(6(F11.6,4X))') y
y = 0
call axpy( x(2::3), y(2::3), 1.0 )
write(*,'(6(F11.6,4X))') y
y = 0
call axpy_second_element( x, y, 4.0 )
write(*,'(6(F11.6,4X))') y
y = 0
call axpy_second_element( x(::2), y(::2), 4.0 )
write(*,'(6(F11.6,4X))') y
contains
! Subroutine used to test sub-slicing
subroutine axpy_second_element( x, y, alpha )
real(sp), intent(in) :: x(:)
real(sp), intent(inout) :: y(:)
real(sp), intent(in), optional :: alpha
call axpy( x(::2), y(::2), alpha )
end subroutine
end program
```

I’ve only tested this with the Intel Fortran compiler (due to the convenient `-qmkl`

flag), and following command:

```
$ icc -qmkl -c -Wall f18_saxpy.c
$ ifort -DCFI test_axpy.F90 f18_saxpy.o -qmkl -o test_axpy
```

Omit the `-DCFI`

flag to test the Fortran implementation.

Here are instructions for the GNU compilers:

- Download
`cblas.tgz`

from the BLAS page at Netlib
- Extract the files to a folder, and locate
`cblas.h`

and `cblas_f77.h`

from the `include/`

folder, and `cblas_saxpy.c`

in the `src/`

folder. Copy the files into the folder containing the `test_axpy.F90`

driver.
- Replace
`mkl.h`

with `cblas.h`

in the include section of `f18_saxpy.c`

- Add interoperable type declarations in the Fortran interface to silence
`gfortran`

warnings:```
subroutine f18_saxpy( x, y, a ) bind(c,name='f18_saxpy')
use, intrinsic :: iso_c_binding, only: c_float
real(c_float), intent(in) :: x(:)
real(c_float), intent(inout) :: y(:)
real(c_float), intent(in), optional :: a
end subroutine
```

- Compile the CBLAS wrapper of the F77 routine, the
`f18_saxpy.c`

wrapper of the CBLAS wrapper, and finally the F95+ wrapper of the wrapper wrapper. Don’t forget to link with the system BLAS library.```
$ gcc -c -Wall cblas_saxpy.c
$ gcc -c -Wall f18_saxpy.c
$ gfortran -DCFI -Wall test_axpy.F90 f18_saxpy.o -o test_axpy -lblas
```

Amusingly, we have gone through 3 layers of indirection to achieve something which can be done easily in Fortran:

```
subroutine mf_saxpy( x, y, a )
real(sp), intent(in) :: x(:)
real(sp), intent(inout) :: y(:)
real(sp), intent(in), optional :: a
real(sp) :: a_
a_ = 1.0
if (present(a)) a_ = a
y = y + a_ * x
end subroutine
```