It is quite useful but it doesn’t seem to do timing. Its interesting to the branch coverage and generally how often things are called. For example check out this line:
72605614: 100: h = mod(h+1, l)
In the context of the code it means the hash I’m using generated about 72,605,614 conflicts.
Timings would be much better though…
-: 0:Source:1brc_hash.f90
-: 0:Graph:1brc_hash.gcno
-: 0:Data:1brc_hash.gcda
-: 0:Runs:1
-: 1:module row_types
-: 2: implicit none
-: 3:
-: 4: type :: row_ptr
-: 5: type(row), pointer :: p => null()
-: 6: end type row_ptr
-: 7:
-: 8: type :: row
-: 9: character(len=:), allocatable :: key
-: 10: real :: min
-: 11: real :: max
-: 12: real :: sum
-: 13: integer :: count
-: 14: end type row
#####: 15:end module row_types
------------------
__row_types_MOD___copy_row_types_Row:
#####: 15:end module row_types
------------------
__row_types_MOD___final_row_types_Row:
#####: 15:end module row_types
------------------
__row_types_MOD___copy_row_types_Row_ptr:
#####: 15:end module row_types
------------------
-: 16:
-: 17:module builder
-: 18: use row_types
-: 19: implicit none
-: 20:
-: 21:contains
-: 22:
1000000000: 23: pure function str2real (str) result(f)
-: 24: character(len=*), intent(in) :: str
-: 25: integer :: i, off
-: 26: integer, parameter :: zero = ichar('0')
-: 27: real :: f
-: 28:
1000000000: 29: off = merge (2, 1, str(1:1)=='-')
1000000000: 30: i = index(str(off:),'.')
1000000000: 31: if (i == 2) then
99579868: 32: f = ichar(str(off:off)) - zero
-: 33: else
900420132: 34: f = (ichar(str(off:off)) - zero)*10 + ichar(str(off+1:off+1)) - zero
-: 35: end if
1000000000: 36: f = f + (ichar(str(off+i:off+i)) - zero) / 10.0
1000000000: 37: if (off == 2) f = -f
2000000000: 38: end function str2real
-: 39:
154784*: 40: subroutine update(buffer, hash_tbl)
-: 41: character(len=*), intent(in) :: buffer
-: 42: type(row_ptr), intent(inout) :: hash_tbl(:)
-: 43: character(len=1), parameter :: cr = achar(10)
-: 44: integer :: i, j, k
-: 45: real :: f
-: 46:
154784: 47: i = 1
1000154784: 48: do while (i <= len(buffer))
1000000000: 49: j = index(buffer(i:), ';')
1000000000: 50: k = index(buffer(i+j:), cr)
1000000000: 51: f = str2real (buffer(i+j:i+j+k-2))
1000000000: 52: call update_hash_tbl(buffer(i:i+j-2), f, hash_tbl)
1000000000: 53: i = i+j+k
-: 54: end do
154784: 55: end subroutine update
-: 56:
1000000000: 57: pure function hash(key,m) result(h)
-: 58: use, intrinsic :: iso_fortran_env, only: int64
-: 59: integer(int64), parameter :: prime = 16777619_int64
-: 60: integer(int64), parameter :: basis = 2166136261_int64
-: 61: integer(int64) :: h
-: 62: character(len=*), intent(in) :: key
-: 63: integer, intent(in) :: m
-: 64: integer :: i
1000000000: 65: h = basis
10435166426: 66: do i = 1, len(key)
9435166426: 67: h = ieor(h, iachar(key(i:i), int64))
10435166426: 68: h = mod(h * prime, 2_int64**32)
-: 69: end do
1000000000: 70: h = mod (h,m)
2000000000: 71: end function hash
-: 72:
1000000000*: 73: subroutine update_hash_tbl(key, val, hash_tbl)
-: 74: character(len=*), intent(in) :: key
-: 75: type(row_ptr), intent(inout) :: hash_tbl(:)
-: 76: type(row), pointer :: vals
-: 77: integer :: h, l
-: 78: real, intent(in) :: val
1000000000: 79: l = size(hash_tbl)
1000000000: 80: h = hash(key, l)
72605614: 81: do
1072605614: 82: vals => hash_tbl(h)%p
1072605614: 83: if (.not. associated(vals)) then
8875*: 84: allocate (hash_tbl(h)%p)
8875: 85: vals => hash_tbl(h)%p
8875*: 86: allocate(character(len=len(key)) :: vals%key)
8875*: 87: vals%key = key
8875: 88: vals%min = val
8875: 89: vals%max = val
8875: 90: vals%sum = val
8875: 91: vals%count = 1
8875: 92: exit
1072596739: 93: else if (vals%key == key) then
999991125: 94: if (val < vals%min) vals%min = val
999991125: 95: if (val > vals%max) vals%max = val
999991125: 96: vals%sum = vals%sum + val
999991125: 97: vals%count = vals%count + 1
999991125: 98: exit
-: 99: else
72605614: 100: h = mod(h+1, l)
-: 101: end if
-: 102: end do
1000000000: 103: end subroutine update_hash_tbl
-: 104:
1*: 105: recursive subroutine display(hash_tbl)
-: 106: type(row_ptr), intent(in) :: hash_tbl(:)
-: 107: type(row), pointer :: vals
-: 108: integer :: i
65536: 109: do i = 1, size(hash_tbl)
65536: 110: if (associated(hash_tbl(i)%p)) then
8875: 111: vals => hash_tbl(i)%p
8875: 112: print '(A,F5.1,F5.1,F5.1)', vals%key, &
17750: 113: vals%min, vals%max, vals%sum / vals%count
-: 114: end if
-: 115: end do
1: 116: end subroutine display
-: 117:
-: 118:end module builder
-: 119:
1: 120:program one_brc
1: 121: use row_types
-: 122: use builder
-: 123: implicit none
-: 124:
-: 125: integer, parameter :: buffer_size = 102400, tail_len=100
-: 126: integer, parameter :: hash_tbl_size = 65535
1: 127: character(len=:), allocatable :: buffer
-: 128: integer(kind=8) :: fd, read_size, off, start
65536: 129: type(row_ptr) :: hash_tbl(hash_tbl_size)
-: 130:
-: 131: open(newunit=fd, file='measurements.txt', access='stream', &
1: 132: form='unformatted', status='old')
1*: 133: inquire(unit=fd, size=read_size)
-: 134:
154784*: 135: do while (read_size > 0)
154784*: 136: allocate(character(len=min(read_size, buffer_size)) :: buffer)
154784*: 137: read(fd) buffer
154784: 138: if (read_size <= buffer_size) then
1: 139: call update(buffer, hash_tbl)
1: 140: exit
-: 141: end if
154783: 142: start = len(buffer)-tail_len
154783: 143: off = start + index(buffer(start:), achar(10))-1
154783: 144: call update(buffer(1:off), hash_tbl)
154783: 145: call fseek(fd, off - len(buffer), 1)
154783: 146: read_size = read_size - off
154783*: 147: deallocate(buffer)
-: 148: end do
1: 149: call display(hash_tbl)
1: 150:end program one_brc