Mercury Coder is a new LLM that uses diffusion (used in image generation) rather than autoregression to generate code, as explained in this video. It is extremely fast since it generates the whole code at once, but it did get stuck on the prompt about simulating and fitting a mixture of normals. For the prompt
Define a Fortran derived type for a dataframe with character variable column names and integer indices. Simulate such a dataframe with random uniform data and print statistics on the columns. Declare real variables as real(kind=dp), where dp is a module parmeter set to kind(1.0d0)
it generated a working code once I told it to use fixed-length rather than deferred length character variables for the column names.
module DataFrameModule
implicit none
integer, parameter :: dp = kind(1.0d0)
integer, parameter :: nlen = 10 ! Define the length of column names
type :: DataFrame
character(len=nlen), allocatable :: column_names(:)
real(kind=dp), allocatable :: data(:, :)
integer :: n_rows, n_cols
contains
procedure :: initialize => initialize_dataframe
procedure :: simulate_data => simulate_dataframe_data
procedure :: print_stats => print_dataframe_stats
end type DataFrame
contains
subroutine initialize_dataframe(this, n_rows, n_cols, column_names)
class(DataFrame), intent(inout) :: this
integer, intent(in) :: n_rows, n_cols
character(len=*), intent(in) :: column_names(n_cols)
this%n_rows = n_rows
this%n_cols = n_cols
allocate(this%column_names(n_cols))
this%column_names = column_names
allocate(this%data(n_rows, n_cols))
end subroutine initialize_dataframe
subroutine simulate_dataframe_data(this)
class(DataFrame), intent(inout) :: this
call random_number(this%data)
end subroutine simulate_dataframe_data
subroutine print_dataframe_stats(this)
class(DataFrame), intent(in) :: this
real(kind=dp) :: mean, std_dev
integer :: j
do j = 1, this%n_cols
call calculate_mean_std(this%data(:, j), mean, std_dev)
print *, "Column: ", trim(this%column_names(j))
print *, "Mean: ", mean
print *, "Standard Deviation: ", std_dev
print *
end do
end subroutine print_dataframe_stats
subroutine calculate_mean_std(data, mean, std_dev)
real(kind=dp), intent(in) :: data(:)
real(kind=dp), intent(out) :: mean, std_dev
real(kind=dp) :: sum, sum_sq
integer :: n, i
n = size(data)
sum = 0.0_dp
sum_sq = 0.0_dp
do i = 1, n
sum = sum + data(i)
sum_sq = sum_sq + data(i)**2
end do
mean = sum / n
std_dev = sqrt((sum_sq / n) - mean**2)
end subroutine calculate_mean_std
end module DataFrameModule
program Main
use DataFrameModule
implicit none
type(DataFrame) :: df
character(len=nlen), dimension(3) :: column_names = ["A", "B", "C"]
integer :: n_rows, n_cols
n_rows = 100
n_cols = 3
call df%initialize(n_rows, n_cols, column_names)
call df%simulate_data()
call df%print_stats()
end program Main
giving sample output
Column: A
Mean: 0.47517106404222736
Standard Deviation: 0.31178191704286989
Column: B
Mean: 0.49751105740387586
Standard Deviation: 0.30548776910706921
Column: C
Mean: 0.50007198678270182
Standard Deviation: 0.29607418178862621
It is fun to experiment with given its speed, but I think other LLMs are a bit better for more involved coding problems.