Hello all,
Not sure this is ready, but it is working. This took a LOT of time to figure out because almost every tutorial was just that little bit off. The whole idea was to use multiple threads to shuffle multiple sorted lists into a very long fully sorted list. At first it really didn’t work and took WAY longer than a single threaded sort. Now it works and suspiciously takes far less time… Curious…
! Parallel Sort Test by Frank Meyer
! Created Dec 2021
! Version 0.1a
! Updates:
program ParallelSort
use iso_fortran_env , only: atomic_int_kind, lock_type
implicit none
! Global type and variable declarations
! Structure to house the data to be sorted
type :: t_sortelement
integer :: i_data
type(t_sortelement), pointer :: p_next, p_prev
end type t_sortelement
! Structure to organize a list of lists which will be used to perform the merge sorting.
type :: t_tagelement
integer :: i_debug
type(t_sortelement), pointer :: p_list, p_ilist, p_outlist
type(t_tagelement), pointer :: p_next, p_prev
end type t_tagelement
! Structure to help with moving data.
type :: t_syncmessage
integer :: i_small
integer :: i_large
integer :: i_iterations
end type t_syncmessage
! Normal variables for each threads to operate.
integer :: i_elements, i_all, i_me, i_need, i_doing
integer :: i_loop1, i_loop2, i_rand, i_state, i_left, i_right, i_iterations
real :: r_rand
logical :: l_notdone
integer :: i_ourdebug, i_ourout
character(50) :: c_filename, c_threadnm, c_temp
integer :: int_reging
integer :: i_neighbours, i_neighbourl
! Coarrays for the number of elements, this image's smallest/largest number and happiness.
integer(atomic_int_kind) :: i_small[*], i_large[*], i_unhappy[*]
type(t_syncmessage) :: t_messages[*]
! Pointers to hold our lists.
type(t_sortelement), pointer :: p_listroot, p_listnow, p_listnew, p_listtemp
type(t_tagelement), pointer :: p_tagroot, p_tagnow, p_tagnew
! Pointers to help with merging.
type(t_sortelement), pointer :: p_righthand, p_lefthand, p_centerhand, p_temphand
type(t_syncmessage), pointer :: t_localcopy
! Assign starting values to variables.
i_elements = 19
i_me = this_image()
i_all = num_images()
i_need = i_all
int_reging = i_all
i_state = 0
l_notdone = .true.
i_unhappy = 2
i_left = i_me - 1
i_right = i_me + 1
i_neighbours = 0
i_neighbourl = 99999999
! Be sure of null() in our lists.
p_listroot => null()
p_listnow => null()
p_listnew => null()
p_listtemp => null()
! Deny entry if only 1 image!
if (i_all .eq. 1) then
print *, "Please restart with more than 1 image!"
call exit(0)
end if
! Program code goes here!
! Starting with using thread 1 to do some startup tasks.
if (i_me .eq. 1) then
print *, "************************************************************************"
print *, "Welcome to the Parallel Sort Program by Frank Meyer"
print *, "************************************************************************"
print "(3/,a)", "We hope this works...."
print "(a,$)", "How many elements? "
read *, i_elements
end if
call co_broadcast(i_elements, 1)
sync all ! Make sure everyone knows the total elements we are to deal with.
! If total number of elements does not divide equally by our thread count, then add to the last threads quota.
i_doing = i_elements / i_all
if (i_me .eq. i_all) then
i_doing = i_doing + mod(i_elements, i_all)
end if
! Open our debug file for writing. Use a write statement to create the file name
write(c_threadnm, "(i4.4)") i_me
c_threadnm = trim(c_threadnm)
write(c_filename, "(a5,a4,a4)") "debug", c_threadnm, ".txt"
print *, "Our debug file will be: ", c_filename
open(newunit=i_ourdebug, file=c_filename, status="replace")
! Ready list for first creation.
allocate(p_listroot)
p_listroot%p_next => null()
p_listroot%p_prev => null()
p_listnow => p_listroot
! Use each thread to create some of the total list elements.
! Add each new list item to the bottom of the current list.
do i_loop1 = 1, i_doing
call random_number(r_rand) ! Get random number,
r_rand = r_rand * 1000000 ! make it worthwhile,
i_rand = int(r_rand)
p_listnow%i_data = i_rand ! Put data into element.
p_listnew => null() ! Create new element.
allocate(p_listnew)
p_listnow%p_next => p_listnew
p_listnew%p_prev => p_listnow
p_listnew%p_next => null()
p_listnow => p_listnew
end do
! Burn last because we don't actually need it.
p_listnow => p_listnow%p_prev
p_listnow%p_next => null()
deallocate(p_listnew)
! Because I couldn't think of how to do this in a multiple processor way, I've just decided to sort my list
! and then shuffle data up and down until 1<2<3<4<x. It's probably not the right way to do this, but
! it's what my brain came up with. The end will be very atomic and probably very slow!
! Edit: This might be the slowest way to do this! My single threaded version of this can easily beat it!
! Make each thread perform it's own import sort. I'm not worried at this time to check for already
! sorted list as that should be very improbable! When done we should have lists of lists.
p_listnow => p_listroot
allocate(p_tagroot)
p_tagnow => p_tagroot
do i_loop1 = 1, i_doing
if (i_state .eq. 0) then ! This should create the next link in the chain of lists
allocate(p_tagnew) ! and put the current list item into it.
p_tagnow%p_next => p_tagnew
p_tagnew%p_prev => p_tagnow
p_tagnew%p_next => null()
p_tagnow => p_tagnew
p_tagnow%p_list => p_listnow
p_tagnow%p_ilist => p_listnow
p_listnew => p_listnow%p_next
p_listnow%p_prev => null() ! Remove it from the list it was in.
p_listnow%p_next => null() ! Hint next operation of the loop.
if (.not. associated(p_listnew)) then
exit
end if
if (p_listnew%i_data .lt. p_listnow%i_data) then
i_state = 1 ! Next is smaller than first item.
else
i_state = 2 ! Next is larger than first item.
end if
else
if (i_state .eq. 1) then ! Insert the next item in the list to the front of this list.
p_listtemp =>p_listnew%p_next ! Save current next!
p_listnow%p_prev => p_listnew
p_listnew%p_next => p_listnow
p_listnew%p_prev => null()
p_tagnow%p_list => p_listnew ! Hint next operation of the loop.
p_listnow => p_listnew
p_listnew => p_listtemp
if (.not. associated(p_listnew)) then
exit
end if
if (p_listnew%i_data .gt. p_listnow%i_data) then
i_state = 0 ! Next item reverses current flow; create new list.
p_listnow => p_listnew
end if
end if
if (i_state .eq. 2) then ! Insert the next item in the list to the last of this list.
p_listtemp =>p_listnew%p_next ! Save current next!
p_listnow%p_next => p_listnew
p_listnew%p_prev => p_listnow
p_listnew%p_next => null()
p_tagnow%p_ilist => p_listnew ! Hint next operation of the loop
p_listnow => p_listnew
p_listnew => p_listtemp
if (.not. associated(p_listnew)) then
exit
end if
if (p_listnew%i_data .lt. p_listnow%i_data) then
i_state = 0 ! Next item reverses current flow; create new list.
p_listnow => p_listnew
end if
end if
end if
end do
! Lets see what is about to sort. Printing full list.
p_tagnew => p_tagroot%p_next
i_loop1 = 1
do while(associated(p_tagnew))
i_loop2 = 1
p_tagnew%i_debug = i_loop1
p_lefthand => p_tagnew%p_list
do while(associated(p_lefthand))
write(i_ourdebug, "(i3, i3, i10)") i_loop1, i_loop2, p_lefthand%i_data
i_loop2 = i_loop2 + 1
p_lefthand => p_lefthand%p_next
end do
i_loop1 = i_loop1 + 1
p_tagnew => p_tagnew%p_next
end do
! We should now have a list of lists for each image. We do not need to synchronize the images now, just start
! the merging and wait for the synchronization until all the images have only one list left. Then, just before
! the call to sync all, use the coarray to set my top and bottom numbers.
! Cute trick here; I create a circle of lists and take turns steping forward and deallocating
! each second list header. Then when we have only one list left it will be linked to itself!
p_tagnow => p_tagroot%p_next ! Start at the beginning and work forward.
p_tagnew => p_tagroot%p_next
deallocate(p_tagroot) ! Don't need this pointer right now.
do while(associated(p_tagnew%p_next)) ! Where's the end?
p_tagnew => p_tagnew%p_next
end do
p_tagnew%p_next => p_tagnow ! Found! Now create the ring.
p_tagnow%p_prev => p_tagnew
p_tagnew => p_tagnow%p_next ! Set the pointers for the start of the sort.
do while (.not. associated(p_tagnew, p_tagnow)) ! Start the sort!
p_lefthand => p_tagnow%p_list ! Left hand and
p_righthand => p_tagnew%p_list ! Right hand get added one by one
if (p_lefthand%i_data < p_righthand%i_data) then
p_centerhand => p_lefthand ! to the center hand.
p_lefthand => p_lefthand%p_next
else
p_centerhand => p_righthand ! to the center hand.
p_righthand => p_righthand%p_next
end if
p_tagnow%p_outlist => p_centerhand ! And set the center hand as our outlist.
l_notdone = .true.
do while(l_notdone) ! None, one, or any left in each hand needs to be outlisted.
if (associated(p_lefthand) .and. associated(p_righthand)) then
if (p_lefthand%i_data < p_righthand%i_data) then
p_centerhand%p_next => p_lefthand ! Left hand adds to center hand.
p_lefthand%p_prev => p_centerhand
p_temphand => p_lefthand%p_next
p_lefthand => p_temphand
else
p_centerhand%p_next => p_righthand ! Right hand adds to center hand.
p_righthand%p_prev => p_centerhand
p_temphand => p_righthand%p_next
p_righthand => p_temphand
end if
p_centerhand => p_centerhand%p_next
p_centerhand%p_next => null()
else
! One hand or the other is done. Add the other to center and finish out.
If (associated(p_lefthand)) then
p_centerhand%p_next => p_lefthand ! Left hand is remaining.
p_lefthand%p_prev => p_centerhand
else
p_centerhand%p_next => p_righthand ! Right hand is remaining.
p_righthand%p_prev => p_centerhand
end if
l_notdone = .false.
do while (associated(p_centerhand%p_next)) ! Either way get the inverse
p_centerhand => p_centerhand%p_next ! list ready.
end do
p_tagnow%p_ilist => p_centerhand
end if
end do
p_tagnow%p_list => p_tagnow%p_outlist ! Move out to in,
p_tagnow%p_next => p_tagnew%p_next ! move right hand out,
deallocate(p_tagnew) ! remove the each second list,
p_tagnow => p_tagnow%p_next ! and finish on the next next.
p_tagnew => p_tagnow%p_next
end do
p_tagroot => p_tagnow ! We can only be here when sorted, so make the sorted list our root.
! Print the entire sorted list to debug.
write(i_ourdebug, *) "Our entire sorted list is;", i_me
p_lefthand => p_tagroot%p_list
do while(associated(p_lefthand))
write(i_ourdebug, *) p_lefthand%i_data
p_lefthand => p_lefthand%p_next
end do
! Before we hit the sync all we should post our up and down numbers. This was the
! most rediculous set of issues in the whole build! Errors thrown for no reason
! and NO good instructions anywhere! Finally realized it was the lack of good error
! reporting or manuals and started asking the community for help... But after
! trial and error a _possible_ answer was found. I still don't trust this, but...
! It works...
! Using the root's forward and inverse lists allows finding the small and large easy!
! We should all be ready to swap items that do not make us happy.
i_iterations = 0
l_notdone = .true.
do while (l_notdone)
! Post our new numbers to the local copy then sync.
i_iterations = i_iterations + 1 ! Added to watch the number of iterations.
p_lefthand => p_tagroot%p_list
p_righthand => p_tagroot%p_ilist
i_small = p_lefthand%i_data
i_large = p_righthand%i_data
allocate(t_localcopy)
t_localcopy%i_small = i_small
t_localcopy%i_large = i_large
t_localcopy%i_iterations = i_iterations
t_messages = t_localcopy
deallocate(t_localcopy)
! Sync! Sync! It's bound to help keep everything in line!
call execute_command_line('') ! And be sure to cause I/O to dump out!
sync all
! Pushing didn't work, how about pulling?
! Depending who we are, we should check our neighbours numbers.
allocate(t_localcopy)
if (i_me .gt. 1) then ! All but first has a left!
write(i_ourdebug, *) "Pulling left."
t_localcopy = t_messages[i_left]
do while(t_localcopy%i_iterations .ne. i_iterations)
call sleep(1)
t_localcopy = t_messages[i_left]
write(i_ourdebug, *) "again."
end do
i_neighbourl = t_messages[i_left]%i_large
if (i_small .ge. i_neighbourl) then
i_unhappy = 0
else
! Unhappy? Then delete from our list and add the remote data.
i_unhappy = 2
call SubNumberLeft(i_neighbourl, p_tagroot, i_ourdebug)
end if
end if
deallocate(t_localcopy)
allocate(t_localcopy)
if (i_me .lt. i_all) then ! All but last has a right!
write(i_ourdebug, *) "Pulling right."
t_localcopy = t_messages[i_right]
do while(t_localcopy%i_iterations .ne. i_iterations)
call sleep(1)
t_localcopy = t_messages[i_right]
write(i_ourdebug, *) "again."
end do
i_neighbours = t_messages[i_right]%i_small
if (i_large .le. i_neighbours) then
i_unhappy = 0
else
! Do the same here, but from the right.
i_unhappy = i_unhappy + 1
call SubNumberRight(i_neighbours, p_tagroot, i_ourdebug)
end if
end if
deallocate(t_localcopy)
write(i_ourdebug, *) "Iterations; ", i_iterations, i_neighbours, i_neighbourl
sync all ! Be aware of the race condition to these numbers!
! Debugging text...
write(i_ourdebug, *) "Happinesses: ", i_unhappy
! Everything needs to be synced again, but before we do that we should output some bugcheck
! data and check for happiness. If the max unhappy was zero, then we must be done!
call co_max(i_unhappy)
if (i_unhappy .eq. 0) then
l_notdone = .false.
else
l_notdone = .true.
end if
! Post a new list to our debug either way.
write(i_ourdebug, *) "Our new sorted list is;", i_me
p_lefthand => p_tagroot%p_list
p_righthand => p_lefthand%p_next
do while(associated(p_lefthand))
write(i_ourdebug, *) i_me, p_lefthand%i_data
p_lefthand => p_lefthand%p_next
end do
! Sync! Sync! It's bound to help keep everything in line!
call execute_command_line('') ! And be sure to cause I/O to dump out!
sync all
end do
! Close our debug and open our final answer!
close(i_ourdebug)
write(c_filename, "(a6,a4,a4)") "output", c_threadnm, ".txt"
print *, "Our output file will be: ", c_filename
open(newunit=i_ourout, file=c_filename, status="replace")
! Ok! All done! Chances are there are a LOT of numbers to display here so if we just left the threads
! to throw their numbers down, this would look like number salad. An orderly print of all this will
! take time. I could make each thread wait their turn and then print... Sounds like a good idea!
! Edit: That was silly! Lets print all this to a set of files.
write(i_ourout, "(a, i8, a)") "Reporting from image ", i_me, ":"
write(i_ourout, *) "Final report after ", i_iterations, " iterations!"
p_lefthand => p_tagroot%p_list
i_loop2 = 1
do while (associated(p_lefthand))
write(i_ourdebug, *) i_loop2, p_lefthand%i_data
i_loop2 = i_loop2 + 1
p_lefthand => p_lefthand%p_next
end do
! Close our last file
close(i_ourout)
! That's all folkes! Have a great life!
! Now on to the subroutines:
! I tried very hard not to separate any work out of the main program, but this just kept causing me
! issues so I have separated it out to make it more understandable. What these two almost identical subroutines
! do is take something from either the left or the right and add it to our current sorted list in replacement
! for what ever we popped off either end.
contains
subroutine SubNumberLeft(i_newdata, p_tagroot, i_ourdebug)
implicit none
! Declare incoming variables and scope.
integer, intent(in) :: i_newdata, i_ourdebug
type(t_tagelement), pointer, intent(inout) :: p_tagroot
! Declare local variables
type(t_sortelement), pointer :: p_list, p_ilist, p_listnew, p_listtemp
integer :: i_olddata
logical :: l_working, l_unsure
! Initialize local variables
l_working = .true.
! We need to sub the item to the left so remove the current left from the list.
p_list => p_tagroot%p_list
p_listtemp => p_list%p_next
write(i_ourdebug, *) i_me, " is deleting: ", p_list%i_data, " for ", i_newdata
deallocate(p_list)
p_tagroot%p_list => p_listtemp
p_listtemp%p_prev => null()
! Now create a new element and try to put it in place in the list.
! Try the easy answer first and work backwards using the double linked list.
allocate(p_listnew)
p_listnew%i_data = i_newdata
p_ilist => p_tagroot%p_ilist
i_olddata = p_ilist%i_data
! Is this the new right hand number? Check and link in if so.
if (i_olddata .lt. i_newdata) then
p_ilist%p_next => p_listnew
p_listnew%p_prev => p_ilist
p_listnew%p_next => null()
p_tagroot%p_ilist => p_listnew
else
! If not the right most number, then a number before that. Start moving up the list to see
! where this goes.
do while (l_working)
p_ilist => p_ilist%p_prev
if (associated(p_ilist)) then ! p_prev will be null when we reach the first element.
i_olddata = p_ilist%i_data ! Otherwise get data for testing.
if (i_olddata .lt. i_newdata) then ! If data fits, link into the list.
p_listtemp => p_ilist%p_next
p_ilist%p_next => p_listnew
p_listtemp%p_prev => p_listnew
p_listnew%p_next => p_listtemp
p_listnew%p_prev => p_ilist
l_working = .false.
end if
else
! We have reached the start of the list, so link it as the new first element.
p_listtemp => p_tagroot%p_list
p_tagroot%p_list => p_listnew
p_listnew%p_prev => null()
p_listnew%p_next => p_listtemp
p_listtemp%p_prev => p_listnew
l_working = .false.
end if
end do
end if
end subroutine SubNumberLeft
subroutine SubNumberRight(i_newdata, p_tagroot, i_ourdebug)
implicit none
! Declare incoming variables and scope.
integer, intent(in) :: i_newdata, i_ourdebug
type(t_tagelement), pointer, intent(inout) :: p_tagroot
! Declare local variables
type(t_sortelement), pointer :: p_list, p_ilist, p_listnew, p_listtemp
integer :: i_olddata
logical :: l_working
! Initialize local variables
l_working = .true.
! We need to sub the number to the right so remove the current right from the list.
p_ilist => p_tagroot%p_ilist
p_listtemp => p_ilist%p_prev
write(i_ourdebug, *) i_me, " is deleting: ", p_ilist%i_data, " for ", i_newdata
deallocate(p_ilist)
p_tagroot%p_ilist => p_listtemp
p_listtemp%p_next => null()
! Now create a new element and try to put it in place in the list.
! Try the easy answer first and work forwards using the double linked list.
allocate(p_listnew)
p_listnew%i_data = i_newdata
p_list => p_tagroot%p_list
i_olddata = p_list%i_data
! Is this the new left hand number? Check and link in if so.
if (i_olddata .gt. i_newdata) then
p_list%p_prev => p_listnew
p_listnew%p_next => p_list
p_listnew%p_prev => null()
p_tagroot%p_list => p_listnew
else
! If not the left most number, then a number after that. Start moving down the list to see
! where this goes.
do while (l_working)
p_list => p_list%p_next
if (associated(p_list)) then ! p_next will be null when we reach the last element.
i_olddata = p_list%i_data ! Otherwise get data for testing.
if (i_olddata .gt. i_newdata) then ! If data fits, link into the list.
p_listtemp => p_list%p_prev
p_list%p_prev => p_listnew
p_listtemp%p_next => p_listnew
p_listnew%p_prev => p_listtemp
p_listnew%p_next => p_list
l_working = .false.
end if
else
! We have reached the end of the list, so link it as the new last element.
p_listtemp => p_tagroot%p_ilist
p_tagroot%p_ilist => p_listnew
p_listnew%p_next => null()
p_listnew%p_prev => p_listtemp
p_listtemp%p_next => p_listnew
l_working = .false.
end if
end do
end if
end subroutine SubNumberRight
end program
Link to download the file: ParallelSort.f90 - Google Drive
Knarfnarf