[m-rev.] for review: Avoid intermediate strings in string.replace_all.

Peter Wang novalazy at gmail.com
Thu Nov 7 15:21:19 AEDT 2019


library/string.m:
    Implement string.replace_all using unsafe_append_string_pieces to
    avoid intermediate strings. Use unsafe_sub_string_search_start to
    avoid repeated range checks.
---
 library/string.m | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/library/string.m b/library/string.m
index 95d944926..149047677 100644
--- a/library/string.m
+++ b/library/string.m
@@ -4826,17 +4826,19 @@ replace(Str, Pat, Subst, Result) :-
     ],
     unsafe_append_string_pieces(Pieces, Result).
 
-replace_all(S1, S2, S3) = S4 :-
-    replace_all(S1, S2, S3, S4).
+replace_all(Str, Pat, Subst) = Result :-
+    replace_all(Str, Pat, Subst, Result).
 
 replace_all(Str, Pat, Subst, Result) :-
     ( if Pat = "" then
         replace_all_empty_pat(Str, Subst, Result)
     else
-        PatLength = length(Pat),
-        replace_all_loop(Str, Pat, Subst, PatLength, 0, [], ReversedChunks),
-        Chunks = list.reverse(ReversedChunks),
-        Result = append_list(Chunks)
+        % Using substring instead of string avoids two calls to string.length
+        % every time that SubstPiece appears in Pieces.
+        SubstPiece = substring(Subst, 0, length(Subst)),
+        replace_all_loop(Str, Pat, length(Pat), SubstPiece, 0, [], RevPieces),
+        list.reverse(RevPieces, Pieces),
+        unsafe_append_string_pieces(Pieces, Result)
     ).
 
 :- pred replace_all_empty_pat(string::in, string::in, string::uo) is det.
@@ -4883,19 +4885,19 @@ prepend_code_units(Str, FirstIndex, Index, Codes0, Codes) :-
         prepend_code_units(Str, FirstIndex, Index - 1, Codes1, Codes)
     ).
 
-:- pred replace_all_loop(string::in, string::in, string::in,
-    int::in, int::in, list(string)::in, list(string)::out) is det.
+:- pred replace_all_loop(string::in, string::in, int::in, string_piece::in,
+    int::in, list(string_piece)::in, list(string_piece)::out) is det.
 
-replace_all_loop(Str, Pat, Subst, PatLength, BeginAt,
-        RevChunks0, RevChunks) :-
-    ( if sub_string_search_start(Str, Pat, BeginAt, Index) then
-        Initial = unsafe_between(Str, BeginAt, Index),
-        Start = Index + PatLength,
-        replace_all_loop(Str, Pat, Subst, PatLength, Start,
-            [Subst, Initial | RevChunks0], RevChunks)
+replace_all_loop(Str, Pat, PatLength, SubstPiece, BeginAt,
+        RevPieces0, RevPieces) :-
+    ( if unsafe_sub_string_search_start(Str, Pat, BeginAt, PatStart) then
+        InitialPiece = substring(Str, BeginAt, PatStart),
+        RevPieces1 = [SubstPiece, InitialPiece | RevPieces0],
+        replace_all_loop(Str, Pat, PatLength, SubstPiece, PatStart + PatLength,
+            RevPieces1, RevPieces)
     else
-        EndString = unsafe_between(Str, BeginAt, length(Str)),
-        RevChunks = [EndString | RevChunks0]
+        TailPiece = substring(Str, BeginAt, length(Str)),
+        RevPieces = [TailPiece | RevPieces0]
     ).
 
 %---------------------%
-- 
2.23.0



More information about the reviews mailing list