[m-dev.] For review: change array__sort/1 to use samsort

Ralph Becket rbeck at microsoft.com
Tue Feb 13 02:32:05 AEDT 2001


Estimated hours taken: 3

library/array.m:
	Removed mergesort in favour of samsort which has much better
	performance on mostly sorted data and is within a hair as
	good on random data.

Index: array.m
===================================================================
RCS file: /home/mercury1/repository/mercury/library/array.m,v
retrieving revision 1.84
diff -u -r1.84 array.m
--- array.m	2001/02/08 17:20:47	1.84
+++ array.m	2001/02/12 15:26:39
@@ -1187,106 +1187,10 @@
 :- pragma type_spec(array__sort/1, T = int).
 :- pragma type_spec(array__sort/1, T = string).
 
-array__sort(A) = merge_sort(A).
+array__sort(A) = samsort_subarray(A, array__min(A), array__max(A)).
 
 
%---------------------------------------------------------------------------
---%
 
-    % Merge sort an array.
-    %
-:- func merge_sort(array(T)) = array(T).
-:- mode merge_sort(array_di) = array_uo is det.
-
-:- pragma type_spec(merge_sort/1, T = int).
-:- pragma type_spec(merge_sort/1, T = string).
-
-merge_sort(A) =
-    merge_sort_2(A, array__copy(A), 1, array__min(A), array__max(A)).
-
-
-
-    % Keep performing merging passes and doubling the size of the
-    % sorted subarrays until we're done.
-    %
-:- func merge_sort_2(array(T), array(T), int, int, int) = array(T).
-:- mode merge_sort_2(array_ui, array_di, in, in, in) = array_uo is det.
-
-:- pragma type_spec(merge_sort_2/5, T = int).
-:- pragma type_spec(merge_sort_2/5, T = string).
-
-merge_sort_2(A, B, N, Lo, Hi) =
-    ( if N >= Hi
-      then A
-      else merge_sort_2(merge_sort_3(A, B, Lo, N, Hi), A, N + N, Lo, Hi)
-    ).
-
-
-
-    % Perform a merge operation over each successive pair of
-    % sorted subarrays.
-    %
-:- func merge_sort_3(array(T), array(T), int, int, int) = array(T).
-:- mode merge_sort_3(array_ui, array_di, in, in, in) = array_uo is det.
-
-:- pragma type_spec(merge_sort_3/5, T = int).
-:- pragma type_spec(merge_sort_3/5, T = string).
-
-merge_sort_3(A, B0, I, N, Hi) = B :-
-    (      if I + N > Hi then
-                B  = copy_subarray(A, B0, I, Hi, I)
-      else if I + N + N > Hi then
-                B  = merge_subarrays(A, B0, I, I+N-1, I+N, Hi, I)
-      else
-                B1 = merge_subarrays(A, B0, I, I+N-1, I+N, I+N+N-1, I),
-                B  = merge_sort_3(A, B1, I+N+N, N, Hi)
-    ).
-
-%--------------------------------------------------------------------------
----%
-
-    % merges the two sorted consecutive subarrays Lo1 .. Hi1 and
-    % Lo2 .. Hi2 from A into the subarray starting at I in B.
-    % 
-:- func merge_subarrays(array(T), array(T), int, int, int, int, int) =
array(T).
-:- mode merge_subarrays(array_ui, array_di, in, in, in, in, in) = array_uo
-            is det.
-
-:- pragma type_spec(merge_subarrays/7, T = int).
-:- pragma type_spec(merge_subarrays/7, T = string).
-
-merge_subarrays(A, B0, Lo1, Hi1, Lo2, Hi2, I) = B :-
-    (      if Lo1 > Hi1 then B = copy_subarray(A, B0, Lo2, Hi2, I)
-      else if Lo2 > Hi2 then B = copy_subarray(A, B0, Lo1, Hi1, I)
-      else
-        X1 = A ^ elem(Lo1),
-        X2 = A ^ elem(Lo2),
-        compare(R, X1, X2),
-        (
-            R = (<),
-            B = merge_subarrays(A, B0^elem(I) := X1, Lo1+1, Hi1, Lo2, Hi2,
I+1)
-        ;
-            R = (=),
-            B = merge_subarrays(A, B0^elem(I) := X1, Lo1+1, Hi1, Lo2, Hi2,
I+1)
-        ;
-            R = (>),
-            B = merge_subarrays(A, B0^elem(I) := X2, Lo1, Hi1, Lo2+1, Hi2,
I+1)
-        )
-    ).
-
-%--------------------------------------------------------------------------
----%
-
-:- func copy_subarray(array(T), array(T), int, int, int) = array(T).
-:- mode copy_subarray(array_ui, array_di, in, in, in) = array_uo is det.
-
-:- pragma type_spec(copy_subarray/5, T = int).
-:- pragma type_spec(copy_subarray/5, T = string).
-
-copy_subarray(A, B, Lo, Hi, I) =
-    ( if Lo > Hi
-      then B
-      else copy_subarray(A, B ^ elem(I) := A ^ elem(Lo), Lo + 1, Hi, I + 1)
-    ).
-
-%
----------------------------------------------------------------------------
%
-
 array__random_permutation(A0, A, RS0, RS) :-
 	Lo = array__min(A0),
 	Hi = array__max(A0),
@@ -1357,5 +1261,204 @@
 			else foldr_0(Fn, A, Fn(A ^ elem(I), X), Min, I - 1)
 	).
 
-%
----------------------------------------------------------------------------
%
 %
----------------------------------------------------------------------------
%
+
+    % SAMsort (smooth applicative merge) invented by R.A. O'Keefe.
+    %
+    % SAMsort is a mergesort variant that works by identifying contiguous
+    % monotonic sequences and merging them, thereby taking advantage of
+    % any existing order in the input sequence.
+    %
+:- func samsort_subarray(array(T), int, int) = array(T).
+:- mode samsort_subarray(array_di, in, in) = array_uo is det.
+
+:- pragma type_spec(samsort_subarray/3, T = int).
+:- pragma type_spec(samsort_subarray/3, T = string).
+
+samsort_subarray(A0, Lo, Hi) = A :-
+    samsort_up(0, A0, _, array__copy(A0), A, Lo, Hi, Lo).
+
+
+
+:- pred samsort_up(int, array(T), array(T), array(T), array(T), int, int,
int).
+:- mode samsort_up(in, array_di, array_uo, array_di, array_uo, in, in, in)
+            is det.
+
+:- pragma type_spec(samsort_up/8, T = int).
+:- pragma type_spec(samsort_up/8, T = string).
+
+    % Precondition:
+    %   We are N levels from the bottom (leaf nodes) of the tree.
+    %   A0 is sorted from Lo .. I - 1.
+    %   A0 and B0 are identical from I .. Hi.
+    % Postcondition:
+    %   B is sorted from Lo .. Hi.
+    %
+samsort_up(N, A0, A, B0, B, Lo, Hi, I) :-
+
+    ( if I > Hi then
+
+        A = B0,
+        B = A0
+
+      else if N > 0 then
+
+        samsort_down(N - 1, B0, B1, A0, A1, I, Hi, J),
+
+            % A1 is sorted from I .. J - 1.
+            % A1 and B1 are identical from J .. Hi.
+
+        B2 = merge_subarrays(A1, B1, Lo, I - 1, I, J - 1, Lo),
+        A2 = A1,
+
+            % B2 is sorted from Lo .. J - 1.
+
+        samsort_up(N + 1, B2, B, A2, A, Lo, Hi, J)
+
+      else /* N = 0, I = Lo */
+
+        copy_run_ascending(A0, B0, B1, Lo, Hi, J),
+
+            % B1 is sorted from Lo .. J - 1.
+
+        samsort_up(N + 1, B1, B, A0, A, Lo, Hi, J)
+    ).
+
+
+
+:- pred samsort_down(int,array(T),array(T),array(T),array(T),int,int,int).
+:- mode samsort_down(in, array_di, array_uo, array_di, array_uo, in, in,
out)
+            is det.
+
+:- pragma type_spec(samsort_down/8, T = int).
+:- pragma type_spec(samsort_down/8, T = string).
+
+    % Precondition:
+    %   We are N levels from the bottom (leaf nodes) of the tree.
+    %   A0 and B0 are identical from Lo .. Hi.
+    % Postcondition:
+    %   B is sorted from Lo .. I - 1.
+    %   A and B are identical from I .. Hi.
+    %
+samsort_down(N, A0, A, B0, B, Lo, Hi, I) :-
+
+    ( if Lo > Hi then
+
+        A = A0,
+        B = B0,
+        I = Lo
+
+      else if N > 0 then
+
+        samsort_down(N - 1, B0, B1, A0, A1, Lo, Hi, J),
+        samsort_down(N - 1, B1, B2, A1, A2, J,  Hi, I),
+
+            % A2 is sorted from Lo .. J - 1.
+            % A2 is sorted from J  .. I - 1.
+
+        A = A2,
+        B = merge_subarrays(A2, B2, Lo, J - 1, J, I - 1, Lo)
+
+            % B is sorted from Lo .. I - 1.
+
+      else
+
+        A = A0,
+        copy_run_ascending(A0, B0, B, Lo, Hi, I)
+
+            % B is sorted from Lo .. I - 1.
+    ).
+
+%--------------------------------------------------------------------------
----%
+
+:- pred copy_run_ascending(array(T), array(T), array(T), int, int, int).
+:- mode copy_run_ascending(array_ui, array_di, array_uo, in, in, out) is
det.
+
+:- pragma type_spec(copy_run_ascending/6, T = int).
+:- pragma type_spec(copy_run_ascending/6, T = string).
+
+copy_run_ascending(A, B0, B, Lo, Hi, I) :-
+    ( if Lo < Hi, compare((>), A ^ elem(Lo), A ^ elem(Lo + 1)) then
+        I = search_until((<), A, Lo, Hi),
+        B = copy_subarray_reverse(A, B0, Lo, I - 1, I - 1)
+      else
+        I = search_until((>), A, Lo, Hi),
+        B = copy_subarray(A, B0, Lo, I - 1, Lo)
+    ).
+
+%--------------------------------------------------------------------------
----%
+
+:- func search_until(comparison_result, array(T), int, int) = int.
+:- mode search_until(in, array_ui, in, in) = out is det.
+
+:- pragma type_spec(search_until/4, T = int).
+:- pragma type_spec(search_until/4, T = string).
+
+search_until(R, A, Lo, Hi) =
+    ( if Lo < Hi, not compare(R, A ^ elem(Lo), A ^ elem(Lo + 1))
+      then search_until(R, A, Lo + 1, Hi)
+      else Lo + 1
+    ).
+
+%--------------------------------------------------------------------------
----%
+
+:- func copy_subarray(array(T), array(T), int, int, int) = array(T).
+:- mode copy_subarray(array_ui, array_di, in, in, in) = array_uo is det.
+
+:- pragma type_spec(copy_subarray/5, T = int).
+:- pragma type_spec(copy_subarray/5, T = string).
+
+copy_subarray(A, B, Lo, Hi, I) =
+    ( if Lo =< Hi
+      then copy_subarray(A, B ^ elem(I) := A ^ elem(Lo), Lo + 1, Hi, I + 1)
+      else B
+    ).
+
+%--------------------------------------------------------------------------
----%
+
+:- func copy_subarray_reverse(array(T), array(T), int, int, int) =
array(T).
+:- mode copy_subarray_reverse(array_ui, array_di, in, in, in) = array_uo is
det.
+
+:- pragma type_spec(copy_subarray_reverse/5, T = int).
+:- pragma type_spec(copy_subarray_reverse/5, T = string).
+
+copy_subarray_reverse(A, B, Lo, Hi, I) =
+    ( if Lo =< Hi
+      then copy_subarray_reverse(A, B ^ elem(I) := A ^ elem(Lo), Lo+1, Hi,
I-1)
+      else B
+    ).
+
+%--------------------------------------------------------------------------
----%
+
+    % merges the two sorted consecutive subarrays Lo1 .. Hi1 and
+    % Lo2 .. Hi2 from A into the subarray starting at I in B.
+    % 
+:- func merge_subarrays(array(T), array(T), int, int, int, int, int) =
array(T).
+:- mode merge_subarrays(array_ui, array_di, in, in, in, in, in) = array_uo
+            is det.
+
+:- pragma type_spec(merge_subarrays/7, T = int).
+:- pragma type_spec(merge_subarrays/7, T = string).
+
+merge_subarrays(A, B0, Lo1, Hi1, Lo2, Hi2, I) = B :-
+    (      if Lo1 > Hi1 then B = copy_subarray(A, B0, Lo2, Hi2, I)
+      else if Lo2 > Hi2 then B = copy_subarray(A, B0, Lo1, Hi1, I)
+      else
+        X1 = A ^ elem(Lo1),
+        X2 = A ^ elem(Lo2),
+        compare(R, X1, X2),
+        (
+            R = (<),
+            B = merge_subarrays(A, B0^elem(I) := X1, Lo1+1, Hi1, Lo2, Hi2,
I+1)
+        ;
+            R = (=),
+            B = merge_subarrays(A, B0^elem(I) := X1, Lo1+1, Hi1, Lo2, Hi2,
I+1)
+        ;
+            R = (>),
+            B = merge_subarrays(A, B0^elem(I) := X2, Lo1, Hi1, Lo2+1, Hi2,
I+1)
+        )
+    ).
+
+%--------------------------------------------------------------------------
----%
+%--------------------------------------------------------------------------
----%

--
Ralph Becket      |      MSR Cambridge      |      rbeck at microsoft.com 
--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to:       mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions:          mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------



More information about the developers mailing list