@@ -9,12 +9,13 @@ using Base.Order
9
9
import Base. Sort: sort!
10
10
import DataStructures: heapify!, percolate_down!
11
11
12
- export HeapSort, TimSort, RadixSort, CombSort
12
+ export HeapSort, TimSort, RadixSort, CombSort, PagedMergeSort
13
13
14
14
struct HeapSortAlg <: Algorithm end
15
15
struct TimSortAlg <: Algorithm end
16
16
struct RadixSortAlg <: Algorithm end
17
17
struct CombSortAlg <: Algorithm end
18
+ struct PagedMergeSortAlg <: Algorithm end
18
19
19
20
function maybe_optimize (x:: Algorithm )
20
21
isdefined (Base. Sort, :InitialOptimizations ) ? Base. Sort. InitialOptimizations (x) : x
@@ -51,6 +52,34 @@ Characteristics:
51
52
"""
52
53
const CombSort = maybe_optimize (CombSortAlg ())
53
54
55
+ """
56
+ PagedMergeSort
57
+
58
+ Indicates that a sorting function should use the paged merge sort
59
+ algorithm. Paged merge sort uses is a merge sort, that uses different
60
+ merge routines to achieve stable sorting with a scratch space of size O(√n).
61
+ The merge routine for merging large subarrays merges
62
+ pages of size O(√n) almost in place, before reordering them using a page table.
63
+ At deeper recursion levels, where the scratch space is big enough,
64
+ normal merging is used, where one input is copied into the scratch space.
65
+ When the scratch space is large enough to hold the complete subarray,
66
+ the input is merged interleaved from both sides, which increases performance
67
+ for random data.
68
+
69
+ Characteristics:
70
+ - *stable*: does preserve the ordering of elements which
71
+ compare equal (e.g. "a" and "A" in a sort of letters which
72
+ ignores case).
73
+ - *`O(√n)`* auxilary memory usage.
74
+ - *`O(n log n)`* garuanteed runtime.
75
+
76
+ ## References
77
+ - Dvořák, S., Ďurian, B. (1986). Towards an efficient merging. In: Gruska, J., Rovan, B., Wiedermann,
78
+ J. (eds) Mathematical Foundations of Computer Science 1986. MFCS 1986. Lecture Notes in Computer Science, vol 233.
79
+ Springer, Berlin, Heidelberg. https://doi.org/10.1007/BFb0016253
80
+ - https://max-arbuzov.blogspot.com/2021/10/merge-sort-with-osqrtn-auxiliary-memory.html
81
+ """
82
+ const PagedMergeSort = maybe_optimize (PagedMergeSortAlg ())
54
83
55
84
# # Heap sort
56
85
@@ -652,4 +681,252 @@ else
652
681
end
653
682
end
654
683
684
+ # ##
685
+ # PagedMergeSort
686
+ # ##
687
+
688
+ # unsafe version of copyto!
689
+ # as workaround for https://github.com/JuliaLang/julia/issues/50900
690
+ function _unsafe_copyto! (dest, doffs, src, soffs, n)
691
+ @inbounds for i in 0 : n- 1
692
+ dest[doffs + i] = src[soffs + i]
693
+ end
694
+ dest
695
+ end
696
+
697
+ function _unsafe_copyto! (dest:: Array , doffs, src:: Array , soffs, n)
698
+ unsafe_copyto! (dest, doffs, src, soffs, n)
699
+ end
700
+
701
+ # merge v[lo:m] and v[m+1:hi] ([A;B]) using scratch[1:1+hi-lo]
702
+ # This is faster than merge! but requires twice as much auxiliary memory.
703
+ function twoended_merge! (v:: AbstractVector{T} , lo:: Integer , m:: Integer , hi:: Integer , o:: Ordering , scratch:: AbstractVector{T} ) where T
704
+ @assert lo ≤ m ≤ hi
705
+ @assert abs ((m- lo) - (hi- (m+ 1 ))) ≤ 1 " twoended_merge! only supports balanced merges"
706
+ len = 1 + hi - lo
707
+ # input array indices
708
+ a_lo = lo
709
+ a_hi = m
710
+ b_lo = m + 1
711
+ b_hi = hi
712
+ # output array indices
713
+ k_lo = 1
714
+ k_hi = len
715
+ @inbounds begin
716
+ # two ended merge
717
+ while k_lo <= len ÷ 2
718
+ if lt (o, v[b_lo], v[a_lo])
719
+ scratch[k_lo] = v[b_lo]
720
+ b_lo += 1
721
+ else
722
+ scratch[k_lo] = v[a_lo]
723
+ a_lo += 1
724
+ end
725
+ k_lo += 1
726
+ if ! lt (o, v[b_hi], v[a_hi])
727
+ scratch[k_hi] = v[b_hi]
728
+ b_hi -= 1
729
+ else
730
+ scratch[k_hi] = v[a_hi]
731
+ a_hi -= 1
732
+ end
733
+ k_hi -= 1
734
+ end
735
+ # if the input length is odd,
736
+ # one item remains
737
+ if a_lo <= a_hi
738
+ scratch[k_lo] = v[a_lo]
739
+ elseif b_lo <= b_hi
740
+ scratch[k_lo] = v[b_lo]
741
+ end
742
+ # copy back from t to v
743
+ offset = lo- 1
744
+ for i = 1 : len
745
+ v[offset+ i] = scratch[i]
746
+ end
747
+ end
748
+ end
749
+
750
+ # core merging loop used throughout PagedMergeSort
751
+ Base. @propagate_inbounds function merge! (f:: Function ,
752
+ target:: AbstractVector{T} , source_a:: AbstractVector{T} , source_b:: AbstractVector{T} ,
753
+ o:: Ordering , a:: Integer , b:: Integer , k:: Integer ) where T
754
+ @inbounds while f (a,b,k)
755
+ if lt (o, source_b[b], source_a[a])
756
+ target[k] = source_b[b]
757
+ b += 1
758
+ else
759
+ target[k] = source_a[a]
760
+ a += 1
761
+ end
762
+ k += 1
763
+ end
764
+ a,b,k
765
+ end
766
+
767
+ # merge v[lo:m] and v[m+1:hi] using scratch[1:1+m-lo]
768
+ # based on Base.Sort MergeSort
769
+ function merge! (v:: AbstractVector{T} , lo:: Integer , m:: Integer , hi:: Integer , o:: Ordering , scratch:: AbstractVector{T} ) where {T}
770
+ _unsafe_copyto! (scratch, 1 , v, lo, m - lo + 1 )
771
+ f (_, b, k) = k < b <= hi
772
+ a, b, k = merge! (f, v, scratch, v, o, 1 , m + 1 , lo)
773
+ _unsafe_copyto! (v, k, scratch, a, b - k)
774
+ end
775
+
776
+ struct Pages
777
+ current:: Int # current page being merged into
778
+ currentNumber:: Int # number of current page (=index in pageLocations)
779
+ nextA:: Int # next possible page in A
780
+ nextB:: Int # next possible page in B
781
+ end
782
+
783
+ next_page_A (pages:: Pages ) = Pages (pages. nextA, pages. currentNumber + 1 , pages. nextA + 1 , pages. nextB)
784
+ next_page_B (pages:: Pages ) = Pages (pages. nextB, pages. currentNumber + 1 , pages. nextA, pages. nextB + 1 )
785
+
786
+ Base. @propagate_inbounds function next_page! (pageLocations, pages, pagesize, lo, a)
787
+ if a > pages. nextA * pagesize + lo
788
+ pages = next_page_A (pages)
789
+ else
790
+ pages = next_page_B (pages)
791
+ end
792
+ pageLocations[pages. currentNumber] = pages. current
793
+ pages
794
+ end
795
+
796
+ Base. @propagate_inbounds function permute_pages! (f, v, pageLocations, page_offset, pagesize, page)
797
+ while f (page)
798
+ plc = pageLocations[page- 3 ] # plc has data belonging to page
799
+ pageLocations[page- 3 ] = page
800
+ _unsafe_copyto! (v, page_offset (page) + 1 , v, page_offset (plc) + 1 , pagesize)
801
+ page = plc
802
+ end
803
+ page
804
+ end
805
+
806
+ # merge v[lo:m] (A) and v[m+1:hi] (B) using scratch[] in O(sqrt(n)) space
807
+ function paged_merge! (v:: AbstractVector{T} , lo:: Integer , m:: Integer , hi:: Integer , o:: Ordering , scratch:: AbstractVector{T} , pageLocations:: AbstractVector{<:Integer} ) where {T}
808
+ @assert lo < m < hi
809
+ lenA = 1 + m - lo
810
+ lenB = hi - m
811
+
812
+ # this function only supports merges with length(A) <= length(B),
813
+ # which is guaranteed by pagedmergesort!
814
+ @assert lenA <= lenB
815
+
816
+ # regular merge if scratch is big enough
817
+ lenA <= length (scratch) && return merge! (v, lo, m, hi, o, scratch)
818
+
819
+ len = lenA + lenB
820
+ pagesize = isqrt (len)
821
+ nPages = len ÷ pagesize # a partial page at the end does not count
822
+ @assert length (scratch) >= 3 pagesize
823
+ @assert length (pageLocations) >= nPages - 3
824
+
825
+ @inline page_offset (page) = (page - 1 ) * pagesize + lo - 1
826
+
827
+ @inbounds begin
828
+ # #################
829
+ # merge
830
+ # #################
831
+ # merge the first 3 pages into scratch
832
+ a, b, _ = merge! ((_, _, k) -> k <= 3 pagesize, scratch, v, v, o, lo, m + 1 , 1 )
833
+ # initialize variables for merging into pages
834
+ pages = Pages (- 17 , 0 , 1 , (m - lo) ÷ pagesize + 2 ) # first argument is unused
835
+ # more efficient loop while more than pagesize elements of A and B are remaining
836
+ while_condition1 (offset) = (_, _, k) -> k <= offset + pagesize
837
+ while a < m - pagesize && b < hi - pagesize
838
+ pages = next_page! (pageLocations, pages, pagesize, lo, a)
839
+ offset = page_offset (pages. current)
840
+ a, b, _ = merge! (while_condition1 (offset), v, v, v, o, a, b, offset + 1 )
841
+ end
842
+ # merge until either A or B is empty or the last page is reached
843
+ k, offset = nothing , nothing
844
+ while_condition2 (offset) = (a, b, k) -> k <= offset + pagesize && a <= m && b <= hi
845
+ while a <= m && b <= hi && pages. currentNumber + 3 < nPages
846
+ pages = next_page! (pageLocations, pages, pagesize, lo, a)
847
+ offset = page_offset (pages. current)
848
+ a, b, k = merge! (while_condition2 (offset), v, v, v, o, a, b, offset + 1 )
849
+ end
850
+ # if the last page is reached, merge the remaining elements into the final partial page
851
+ if pages. currentNumber + 3 == nPages && a <= m && b <= hi
852
+ a, b, k = merge! ((a, b, _) -> a <= m && b <= hi, v, v, v, o, a, b, nPages * pagesize + lo)
853
+ _unsafe_copyto! (v, k, v, a <= m ? a : b, hi - k + 1 )
854
+ else
855
+ use_a = a <= m
856
+ # copy the incomplete page
857
+ partial_page_size = offset + pagesize - k + 1
858
+ _unsafe_copyto! (v, k, v, use_a ? a : b, partial_page_size)
859
+ use_a && (a += partial_page_size)
860
+ use_a || (b += partial_page_size)
861
+ # copy the remaining full pages
862
+ while use_a ? a <= m - pagesize + 1 : b <= hi - pagesize + 1
863
+ pages = next_page! (pageLocations, pages, pagesize, lo, a)
864
+ offset = page_offset (pages. current)
865
+ _unsafe_copyto! (v, offset + 1 , v, use_a ? a : b, pagesize)
866
+ use_a && (a += pagesize)
867
+ use_a || (b += pagesize)
868
+ end
869
+ # copy the final partial page only if sourcing from A.
870
+ # If sourcing from B, it is already in place.
871
+ use_a && _unsafe_copyto! (v, hi - m + a, v, a, m - a + 1 )
872
+ end
873
+
874
+ # #################
875
+ # rearrange pages
876
+ # #################
877
+ # copy pages belonging to the 3 permutation chains ending with a page in the scratch space
878
+ nextA, nextB = pages. nextA, pages. nextB
879
+
880
+ for _ in 1 : 3
881
+ page = (nextB > nPages ? (nextA += 1 ) : (nextB += 1 )) - 1
882
+ page = permute_pages! (> (3 ), v, pageLocations, page_offset, pagesize, page)
883
+ _unsafe_copyto! (v, page_offset (page) + 1 , scratch, (page - 1 ) * pagesize + 1 , pagesize)
884
+ end
885
+
886
+ # copy remaining permutation cycles
887
+ for donePageIndex = 5 : nPages
888
+ # linear scan through pageLocations to make sure no cycle is missed
889
+ page = pageLocations[donePageIndex- 3 ]
890
+ page == donePageIndex && continue
891
+
892
+ # copy the data belonging to donePageIndex into scratch
893
+ _unsafe_copyto! (scratch, 1 , v, page_offset (page) + 1 , pagesize)
894
+
895
+ # follow the cycle starting with the newly freed page
896
+ permute_pages! (!= (donePageIndex), v, pageLocations, page_offset, pagesize, page)
897
+ _unsafe_copyto! (v, page_offset (donePageIndex) + 1 , scratch, 1 , pagesize)
898
+ end
899
+ end
900
+ end
901
+
902
+ # midpoint was added to Base.sort in version 1.4 and later moved to Base
903
+ # -> redefine for compatibility with earlier versions
904
+ midpoint (lo:: Integer , hi:: Integer ) = lo + ((hi - lo) >>> 0x01 )
905
+
906
+ function pagedmergesort! (v:: AbstractVector{T} , lo:: Integer , hi:: Integer , o:: Ordering , scratch:: AbstractVector{T} , pageLocations) where {T}
907
+ len = hi + 1 - lo
908
+ if len <= Base. SMALL_THRESHOLD
909
+ return Base. Sort. sort! (v, lo, hi, Base. Sort. InsertionSortAlg (), o)
910
+ end
911
+ m = midpoint (lo, hi - 1 ) # hi-1: ensure midpoint is rounded down. OK, because lo < hi is satisfied here
912
+ pagedmergesort! (v, lo, m, o, scratch, pageLocations)
913
+ pagedmergesort! (v, m + 1 , hi, o, scratch, pageLocations)
914
+ if len <= length (scratch)
915
+ twoended_merge! (v, lo, m, hi, o, scratch)
916
+ else
917
+ paged_merge! (v, lo, m, hi, o, scratch, pageLocations)
918
+ end
919
+ return v
920
+ end
921
+
922
+ function sort! (v:: AbstractVector , lo:: Integer , hi:: Integer , :: PagedMergeSortAlg , o:: Ordering )
923
+ lo >= hi && return v
924
+ n = hi + 1 - lo
925
+ pagesize = isqrt (n)
926
+ scratch = Vector {eltype(v)} (undef, 3 pagesize)
927
+ nPages = n ÷ pagesize
928
+ pageLocations = Vector {Int} (undef, max (0 , nPages - 3 ))
929
+ pagedmergesort! (v, lo, hi, o, scratch, pageLocations)
930
+ return v
931
+ end
655
932
end # module
0 commit comments