Derived type initialized with array sections giving unexpected results

Beliavsky · March 25, 2025, 2:53am

For the code

module kind_mod
  implicit none
  private
  public :: dp
  integer, parameter :: dp = kind(1.0d0)
end module kind_mod

module util_mod
  use kind_mod, only: dp
  implicit none
  private
  public :: default
  interface default
     module procedure default_int, default_real, default_logical, default_character
  end interface default

contains

  elemental function default_int(x, xopt) result(y)
    integer, intent(in) :: x
    integer, intent(in), optional :: xopt
    integer :: y
    if (present(xopt)) then
       y = xopt
    else
       y = x
    end if
  end function default_int

  elemental function default_real(x, xopt) result(y)
    real(kind=dp), intent(in) :: x
    real(kind=dp), intent(in), optional :: xopt
    real(kind=dp) :: y
    if (present(xopt)) then
       y = xopt
    else
       y = x
    end if
  end function default_real

  elemental function default_logical(x, xopt) result(y)
    logical, intent(in) :: x
    logical, intent(in), optional :: xopt
    logical :: y
    if (present(xopt)) then
       y = xopt
    else
       y = x
    end if
  end function default_logical

  elemental function default_character(x, xopt) result(y)
    character(len=*), intent(in) :: x
    character(len=*), intent(in), optional :: xopt
    character(len=100) :: y
    if (present(xopt)) then
       y = xopt
    else
       y = x
    end if
  end function default_character

end module util_mod

module dataframe_mod
  use kind_mod, only: dp
  use util_mod, only: default
  use iso_fortran_env, only: output_unit
  implicit none
  private
  public :: DataFrame, nrow, random, display, subset_stride
  integer, parameter :: nlen_columns = 100, nrows_print = 10
  logical, save :: blank_line_before_display = .true.

  interface display
     module procedure display_data
  end interface display

  type :: DataFrame
     integer, allocatable :: index(:)
     character(len=nlen_columns), allocatable :: columns(:)
     real(kind=dp), allocatable :: values(:,:)
     contains
        procedure :: display => display_data
  end type DataFrame

contains

  elemental function nrow(df) result(num_rows)
    type(DataFrame), intent(in) :: df
    integer :: num_rows
    if (allocated(df%values)) then
       num_rows = size(df%values, 1)
    else
       num_rows = -1
    end if
  end function nrow

  subroutine alloc(self, nr, nc)
    type(DataFrame), intent(out) :: self
    integer, intent(in) :: nr, nc
    allocate(self%index(nr), self%values(nr, nc))
  end subroutine alloc

  subroutine random(self, nr, nc)
    type(DataFrame), intent(out) :: self
    integer, intent(in) :: nr, nc
    integer :: i
    call alloc(self, nr, nc)
    call random_number(self%values)
    do i = 1, nr
       self%index(i) = i
    end do
    allocate(self%columns(nc))
    do i = 1, nc
       write(self%columns(i), "('C',i0)") i
    end do
  end subroutine random

  subroutine display_data(self, print_all, fmt_ir, fmt_header, fmt_trailer, title)
    class(DataFrame), intent(in) :: self
    logical, intent(in), optional :: print_all
    character(len=*), intent(in), optional :: fmt_ir, fmt_header, fmt_trailer, title
    integer :: total, i, n_top, n_bottom
    logical :: print_all_
    character(len=100) :: fmt_ir_, fmt_header_
    fmt_ir_ = default("(i10,*(1x,f10.4))", fmt_ir)
    fmt_header_ = default("(a10,*(1x,a10))", fmt_header)
    print_all_ = default(.false., print_all)
    total = size(self%index)
    if (blank_line_before_display) write(*,*)
    if (present(title)) write(*,"(a)") title
    write(*,fmt_header_) "index", (trim(self%columns(i)), i=1,size(self%columns))
    if (print_all_) then
       do i = 1, total
          write(*,fmt_ir_) self%index(i), self%values(i,:)
       end do
    else
       if (total <= nrows_print) then
          do i = 1, total
             write(*,fmt_ir_) self%index(i), self%values(i,:)
          end do
       else
          n_top = nrows_print / 2
          n_bottom = nrows_print - n_top
          do i = 1, n_top
             write(*,fmt_ir_) self%index(i), self%values(i,:)
          end do
          write(*,*) "   ... (", total - nrows_print, " rows omitted) ..."
          do i = total - n_bottom + 1, total
             write(*,fmt_ir_) self%index(i), self%values(i,:)
          end do
       end if
    end if
    if (present(fmt_trailer)) write(*,fmt_trailer)
  end subroutine display_data

  function subset_stride(df, stride) result(df_new)
    ! return section of df with specified stride of rows
    type(DataFrame), intent(in) :: df
    integer, intent(in) :: stride
    type(DataFrame) :: df_new
    print*
    print*,"df%index(1:nrow(df):stride)", df%index(1:nrow(df):stride)
    print*,"df%values(1:nrow(df):stride, :)", df%values(1:nrow(df):stride, :)
    df_new = DataFrame(index = df%index(1:nrow(df):stride), &
                       columns = df%columns, &
                       values  = df%values(1:nrow(df):stride, :))
  end function subset_stride

end module dataframe_mod

program xxdataframe
  use dataframe_mod, only: DataFrame, random, display, subset_stride
  use kind_mod, only: dp
  implicit none
  type(DataFrame) :: df, df_new
  call random(df, 8, 1)
  call display(df)
  df_new = subset_stride(df, 3)
  call display(df_new)
end program xxdataframe

compiled with gfortran 15.0.0 20241215, sample output is

     index         C1
         1     0.0000
         2     0.0255
         3     0.3525
         4     0.6669
         5     0.9631
         6     0.8383
         7     0.3354
         8     0.9153
 
 df%index(1:nrow(df):stride)           1           4           7
 df%values(1:nrow(df):stride, :)  3.920868194323862E-007  0.666914481524251     
  0.335355043646496     
 
     index         C1
         1     0.0000
         2     0.0255
         3     0.3525

I don’t understand why the first stride rows of the input dataframe are being returned instead of a section with that stride.

When I add parentheses in the last line of the function being tested, giving

    df_new = DataFrame(index = (df%index(1:nrow(df):stride)), &
                       columns = df%columns, &
                       values  = (df%values(1:nrow(df):stride, :)))

the results are what I expect, for example

     index         C1
         1     0.3943
         2     0.0404
         3     0.4847
         4     0.8717
         5     0.7385
         6     0.1128
         7     0.6277
         8     0.0979

 df%index(1:nrow(df):stride)           1           4           7
 df%values(1:nrow(df):stride, :)  0.39428190635368476       0.87171493770953412       0.62765023762467897     

     index         C1
         1     0.3943
         4     0.8717
         7     0.6277

FedericoPerini · March 25, 2025, 9:29am

Looking with the Compiler Explorer it would look like this is a gfortran bug (all recent versions have it). It’s weird because if you print *, df_new%index you get the right results, but then from inside the TBP, you get the wrong values of both index and values

Beliavsky · March 25, 2025, 7:49pm

Thanks. The following simplified code, in which the index and column components of DataFrame have been removed, exhibits the same problem with gfortran.

module kind_mod
  implicit none
  private
  public :: dp
  integer, parameter :: dp = kind(1.0d0)
end module kind_mod

module util_mod
  use kind_mod, only: dp
  implicit none
  private
  public :: default
  interface default
     module procedure default_int, default_real, default_logical, default_character
  end interface default

contains

  elemental function default_int(x, xopt) result(y)
    integer, intent(in) :: x
    integer, intent(in), optional :: xopt
    integer :: y
    if (present(xopt)) then
       y = xopt
    else
       y = x
    end if
  end function default_int

  elemental function default_real(x, xopt) result(y)
    real(kind=dp), intent(in) :: x
    real(kind=dp), intent(in), optional :: xopt
    real(kind=dp) :: y
    if (present(xopt)) then
       y = xopt
    else
       y = x
    end if
  end function default_real

  elemental function default_logical(x, xopt) result(y)
    logical, intent(in) :: x
    logical, intent(in), optional :: xopt
    logical :: y
    if (present(xopt)) then
       y = xopt
    else
       y = x
    end if
  end function default_logical

  elemental function default_character(x, xopt) result(y)
    character(len=*), intent(in) :: x
    character(len=*), intent(in), optional :: xopt
    character(len=100) :: y
    if (present(xopt)) then
       y = xopt
    else
       y = x
    end if
  end function default_character

end module util_mod

module dataframe_mod
  use kind_mod, only: dp
  use util_mod, only: default
  use iso_fortran_env, only: output_unit
  implicit none
  private
  public :: DataFrame, nrow, random, display, subset_stride
  integer, parameter :: nrows_print = 10
  logical, save :: blank_line_before_display = .true.

  interface display
     module procedure display_data
  end interface display

  type :: DataFrame
     real(kind=dp), allocatable :: values(:,:)
     contains
        procedure :: display => display_data
  end type DataFrame

contains

  elemental function nrow(df) result(num_rows)
    type(DataFrame), intent(in) :: df
    integer :: num_rows
    if (allocated(df%values)) then
       num_rows = size(df%values, 1)
    else
       num_rows = -1
    end if
  end function nrow

  subroutine alloc(self, nr, nc)
    type(DataFrame), intent(out) :: self
    integer, intent(in) :: nr, nc
    allocate(self%values(nr, nc))
  end subroutine alloc

  subroutine random(self, nr, nc)
    type(DataFrame), intent(out) :: self
    integer, intent(in) :: nr, nc
    call alloc(self, nr, nc)
    call random_number(self%values)
  end subroutine random

  subroutine display_data(self, print_all, fmt_ir, fmt_header, fmt_trailer, title)
    class(DataFrame), intent(in) :: self
    logical, intent(in), optional :: print_all
    character(len=*), intent(in), optional :: fmt_ir, fmt_header, fmt_trailer, title
    integer :: total, i, n_top, n_bottom
    logical :: print_all_
    character(len=100) :: fmt_ir_, fmt_header_
    fmt_ir_ = default("(1x,*(1x,f10.4))", fmt_ir)
    fmt_header_ = default("(*(1x,a10))", fmt_header)  ! Adjusted spacing
    print_all_ = default(.false., print_all)
    total = nrow(self)
    if (blank_line_before_display) write(*,*)
    if (present(title)) write(*,"(a)") title
    if (print_all_) then
       do i = 1, total
          write(*,fmt_ir_) self%values(i,:)
       end do
    else
       if (total <= nrows_print) then
          do i = 1, total
             write(*,fmt_ir_) self%values(i,:)
          end do
       else
          n_top = nrows_print / 2
          n_bottom = nrows_print - n_top
          do i = 1, n_top
             write(*,fmt_ir_) self%values(i,:)
          end do
          write(*,*) "   ... (", total - nrows_print, " rows omitted) ..."
          do i = total - n_bottom + 1, total
             write(*,fmt_ir_) self%values(i,:)
          end do
       end if
    end if
    if (present(fmt_trailer)) write(*,fmt_trailer)
  end subroutine display_data

  function subset_stride(df, stride) result(df_new)
    type(DataFrame), intent(in) :: df
    integer, intent(in) :: stride
    type(DataFrame) :: df_new
    df_new = DataFrame(values  = df%values(1:nrow(df):stride, :))
  end function subset_stride

end module dataframe_mod

program xxdataframe
  use dataframe_mod, only: DataFrame, random, display, subset_stride
  implicit none
  type(DataFrame) :: df, df_new
  call random(df, 8, 2)
  call display(df, title="df")
  df_new = subset_stride(df, 3)
  call display(df_new, title="subset_stride(df, 3)")
end program xxdataframe

giving sample output

df
      0.7699     0.6680
      0.4850     0.7904
      0.7379     0.5767
      0.3679     0.8049
      0.2329     0.4196
      0.7478     0.1467
      0.7671     0.6163
      0.4676     0.9425

subset_stride(df, 3)
      0.7699     0.6680
      0.4850     0.7904
      0.7379     0.5767

Ifx 2025.0.4 Build 20241205 also stumbles (@greenrongreen) , giving sample output

df
      0.0000     0.7959
      0.0255     0.8327
      0.3525     0.3450
      0.6669     0.8712
      0.9631     0.0899
      0.8383     0.8883
      0.3354     0.7010
      0.9153     0.7346
 
subset_stride(df, 3)
      0.0000     0.0000
      0.0255     0.0000
      0.3525     0.0000

The first column of subset_stride(df, 3) has the same bug as gfortran does, and the second column is set to zero.

FedericoPerini · March 26, 2025, 7:49am

If you override the default constructor with an equivalent function, the problem goes away. I wonder if there is any subtle Standard rule for initialization of allocatable components in derived types that is being applied here?

greenrongreen · March 26, 2025, 3:49pm

I will get a bug report for IFX.
It is the type constructor that is tripping up ifx. I reduced this to the following. You will see that if you replace the type constructor with an allocate and explicit initialization of df_new%values you will get correct results with IFX. I have a note below, this is a regression in ifx with 2025.0 and will also be in 2025.1 when it releases in the coming days

module dataframe_mod
  implicit none
  private
  public :: DataFrame, display, subset_stride

  type :: DataFrame
     real(kind=4), allocatable :: values(:,:)
  end type DataFrame

contains

  subroutine display(self)
    class(DataFrame) :: self
    integer :: i
    do i = 1, UBOUND(self%values,dim=1)
       write(*,"(1x,A,I1,*(1x,f10.4))") "row: ", i, self%values(i,:)
    end do
  end subroutine display

  function subset_stride(df) result(df_new)
    type(DataFrame) :: df
    type(DataFrame) :: df_new
    !...suspect below ...
    df_new = DataFrame(values  = df%values(1:8:3, :))
    !...can fix this by explicitly setting result df_new
! uncomment_for_working_testcase 
    !allocate(df_new%values(3,2))
    !df_new%values(1,:) = [ 1.1, 1.2 ]
    !df_new%values(2,:) = [ 4.1, 4.2 ]
    !df_new%values(3,:) = [ 7.1, 7.2 ]
  end function subset_stride

end module dataframe_mod

program xxdataframe
  use dataframe_mod
  implicit none
  type(DataFrame) :: df, df_new
  allocate(df%values(8,2) )
   df%values(1,:) = [ 1.1, 1.2 ]
   df%values(2,:) = [ 2.1, 2.2 ]
   df%values(3,:) = [ 3.1, 3.2 ]
   df%values(4,:) = [ 4.1, 4.2 ]
   df%values(5,:) = [ 5.1, 5.2 ]
   df%values(6,:) = [ 6.1, 6.2 ]
   df%values(7,:) = [ 7.1, 7.2 ]
   df%values(8,:) = [ 8.1, 8.2 ]
  write(*,*) "display initial values of df"
   call display(df)
  write(*,*)

  df_new = subset_stride(df)
  write(*,*) "print df_new, should have rows 1, 4, 7 "
  write(*,*) "shape(df_new%values)", shape(df_new%values)
  write(*,*) "bounds of df_new%values, dim 1: ", LBOUND(df_new%values,1), UBOUND(df_new%values,1)
  write(*,*) "bounds of df_new%values, dim 2: ", LBOUND(df_new%values,2), UBOUND(df_new%values,2)
  call display(df_new)
end program xxdataframe

A few interesting points in this triage: this is a regression in IFX. the last good version was 2024.2.0. It regressed in 2025.0.0. gfortran 11.4 does not error with this reproducer, so perhaps the gfortran issue is different than the one shown in ifx. Here is the erroneous output of ifx 2025.0 and the upcoming 2025.1 (too late to fix this one in v2025.1.0, its almost out of the door)

rm a.out ; ifx -O0  repro.f90 ; ./a.out

 display initial values of df
 row: 1     1.1000     1.2000
 row: 2     2.1000     2.2000
 row: 3     3.1000     3.2000
 row: 4     4.1000     4.2000
 row: 5     5.1000     5.2000
 row: 6     6.1000     6.2000
 row: 7     7.1000     7.2000
 row: 8     8.1000     8.2000
 
 print df_new, should have rows 1, 4, 7 
 shape(df_new%values)           3           2
 bounds of df_new%values, dim 1:            1           3
 bounds of df_new%values, dim 2:            1           2
 row: 1     1.1000     0.0000
 row: 2     2.1000     0.0000
 row: 3     3.1000     0.0000

another interesting point - this is a case where the new LLVM Address Sanitizer will find memory corruption caused by this bug. The option for this is -fsanitize=address and run the code.

rm a.out ; ifx -O0 -fsanitize=address repro.f90 ; ./a.out
 display initial values of df
 row: 1     1.1000     1.2000
 row: 2     2.1000     2.2000
 row: 3     3.1000     3.2000
 row: 4     4.1000     4.2000
 row: 5     5.1000     5.2000
 row: 6     6.1000     6.2000
 row: 7     7.1000     7.2000
 row: 8     8.1000     8.2000
 
 print df_new, should have rows 1, 4, 7 
 shape(df_new%values)           3           2
 bounds of df_new%values, dim 1:            1           3
 bounds of df_new%values, dim 2:            1           2
=================================================================
==212239==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x506000000120 at pc 0x00000051a32f bp 0x7fff9fe97030 sp 0x7fff9fe97028
READ of size 4 at 0x506000000120 thread T0
    #0 0x51a32e in dataframe_mod::display_ (/nfs/pdx/disks/cts1/quad/rwgreen/triage/f-discourse/a.out+0x51a32e) (BuildId: 3ab273d6e76114ff1028e32bec14e8ed21fcdfb3)
    #1 0x51e06b in MAIN (/nfs/pdx/disks/cts1/quad/rwgreen/triage/f-discourse/a.out+0x51e06b) (BuildId: 3ab273d6e76114ff1028e32bec14e8ed21fcdfb3)
    #2 0x43114c in main (/nfs/pdx/disks/cts1/quad/rwgreen/triage/f-discourse/a.out+0x43114c) (BuildId: 3ab273d6e76114ff1028e32bec14e8ed21fcdfb3)
    #3 0x7f3c7fee8d8f  (/lib/x86_64-linux-gnu/libc.so.6+0x29d8f) (BuildId: cd410b710f0f094c6832edd95931006d883af48e)
    #4 0x7f3c7fee8e3f in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x29e3f) (BuildId: cd410b710f0f094c6832edd95931006d883af48e)
    #5 0x431064 in _start (/nfs/pdx/disks/cts1/quad/rwgreen/triage/f-discourse/a.out+0x431064) (BuildId: 3ab273d6e76114ff1028e32bec14e8ed21fcdfb3)

0x506000000120 is located 0 bytes after 64-byte region [0x5060000000e0,0x506000000120)
allocated by thread T0 here:
    #0 0x4d8af3 in malloc (/nfs/pdx/disks/cts1/quad/rwgreen/triage/f-discourse/a.out+0x4d8af3) (BuildId: 3ab273d6e76114ff1028e32bec14e8ed21fcdfb3)
    #1 0x579944 in _mm_malloc (/nfs/pdx/disks/cts1/quad/rwgreen/triage/f-discourse/a.out+0x579944) (BuildId: 3ab273d6e76114ff1028e32bec14e8ed21fcdfb3)
    #2 0x51da45 in MAIN (/nfs/pdx/disks/cts1/quad/rwgreen/triage/f-discourse/a.out+0x51da45) (BuildId: 3ab273d6e76114ff1028e32bec14e8ed21fcdfb3)
    #3 0x43114c in main (/nfs/pdx/disks/cts1/quad/rwgreen/triage/f-discourse/a.out+0x43114c) (BuildId: 3ab273d6e76114ff1028e32bec14e8ed21fcdfb3)
    #4 0x7f3c7fee8d8f  (/lib/x86_64-linux-gnu/libc.so.6+0x29d8f) (BuildId: cd410b710f0f094c6832edd95931006d883af48e)

SUMMARY: AddressSanitizer: heap-buffer-overflow (/nfs/pdx/disks/cts1/quad/rwgreen/triage/f-discourse/a.out+0x51a32e) (BuildId: 3ab273d6e76114ff1028e32bec14e8ed21fcdfb3) in dataframe_mod::display_
Shadow bytes around the buggy address:
  0x505ffffffe80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x505fffffff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x505fffffff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  0x506000000000: fa fa fa fa 00 00 00 00 00 00 00 fa fa fa fa fa
  0x506000000080: fd fd fd fd fd fd fd fd fa fa fa fa 00 00 00 00
=>0x506000000100: 00 00 00 00[fa]fa fa fa fa fa fa fa fa fa fa fa
  0x506000000180: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x506000000200: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x506000000280: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x506000000300: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
  0x506000000380: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
Shadow byte legend (one shadow byte represents 8 application bytes):
  Addressable:           00
  Partially addressable: 01 02 03 04 05 06 07 
  Heap left redzone:       fa
  Freed heap region:       fd
  Stack left redzone:      f1
  Stack mid redzone:       f2
  Stack right redzone:     f3
  Stack after return:      f5
  Stack use after scope:   f8
  Global redzone:          f9
  Global init order:       f6
  Poisoned by user:        f7
  Container overflow:      fc
  Array cookie:            ac
  Intra object redzone:    bb
  ASan internal:           fe
  Left alloca redzone:     ca
  Right alloca redzone:    cb
==212239==ABORTING

greenrongreen · March 26, 2025, 6:45pm

Bug ID is CMPLRLLVM-66457

Beliavsky · April 2, 2025, 6:37pm

For gfortran the bug report is

greenrongreen · April 29, 2025, 1:09am

We have a fix for this. We are close to code freeze for the 2025.2 Update but we got this into our 2025.2 code branch, as I know it is important. This Update is due to release mid-summer - with a strong caveat: Mid summer is our current schedule.
I have good news.
The root cause is when the rhs is a record constructor. We can work around this by disabling the inline assignment optimization phase. There is an internal undocumented and UNSUPPORTED compiler option that can disable inline assignment optimization. I want to emphasize UNSUPPORTED - this compiler option can disappear without notice at any update or patch. It will never show up in documentation or the -help output. So you can use this for validation but I would not make this a part of your production build options. Good for testing. It could possibly impact performance (though in this case I rather doubt it will hurt performance ). That said, here is the option:
Linux -switch disable_opt_inline_assign
Windows: /switch:disable_opt_inline_assign
I verifed this with the 2025.1 compiler.

I want to thank you for sending this issue to Intel. Regressions like this are regrettable but happen. To quote the title of a great book “To Engineer is Human”.

Topic		Replies	Views
Strange behaviour of an array of types constituted by two or more integers	5	325	November 16, 2021
Type component with len attribute must be default integer kind Help	17	1149	October 25, 2021
Doubts about polymorphism in Fortran	3	295	February 28, 2025
Automatic array of derived type with allocatable component GNU	2	225	August 15, 2024
Fortran Polymorphism Assignment Issue Help	4	388	January 10, 2024

Derived type initialized with array sections giving unexpected results

Related topics