[m-rev.] for review: Make version_hash_table maintain concurrency (un)safeness.

Peter Wang novalazy at gmail.com
Wed Jul 19 16:34:24 AEST 2023


When a version hash table expanded its underlying version array,
it previously would always create a concurrency-safe version array
even if the hash table itself was originally created with a
version_hash_table.unsafe_init* functions.

This change makes it so that when an "unsafe" version hash table
expands, it will create an "unsafe" version array internally.

library/version_array.m:
    Export a predicate has_lock/1 for use by version_hash_table.m

    Replace occurrences of max(VA) with size(VA) - 1. The latter makes
    it more obvious that an index may start out negative.

    Other minor style changes.

    Fix typos.

library/version_hash_table.m:
    When expanding a hash table, create a new version array with
    version_array.init or version_array.unsafe_init
    depending on whether the old version array had an internal lock.

    Replace field access expressions with predicate calls.
---
 library/version_array.m      | 142 ++++++++++++++++++++++++++---------
 library/version_hash_table.m |  42 ++++++-----
 2 files changed, 132 insertions(+), 52 deletions(-)

diff --git a/library/version_array.m b/library/version_array.m
index c5a88a937..74f419717 100644
--- a/library/version_array.m
+++ b/library/version_array.m
@@ -2,7 +2,7 @@
 % vim: ts=4 sw=4 et ft=mercury
 %---------------------------------------------------------------------------%
 % Copyright (C) 2004-2012 The University of Melbourne.
-% Copyright (C) 2014-2022 The Mercury Team.
+% Copyright (C) 2014-2023 The Mercury Team.
 % This file is distributed under the terms specified in COPYING.LIB.
 %---------------------------------------------------------------------------%
 %
@@ -70,7 +70,7 @@
 
 %---------------------------------------------------------------------------%
 
-    % empty_array returns the empty array.
+    % empty returns the empty array.
     %
 :- func empty = version_array(T).
 
@@ -137,7 +137,7 @@
     %
 :- func size(version_array(T)) = int.
 
-    % max(Z) = size(A) - 1.
+    % max(A) = size(A) - 1.
     % Returns -1 for an empty array.
     %
 :- func max(version_array(T)) = int.
@@ -263,12 +263,29 @@
 %---------------------------------------------------------------------------%
 %---------------------------------------------------------------------------%
 
+:- implementation.
+
+% Everything below here is not intended to be part of the public interface,
+% and will not be included in the Mercury library reference manual.
+
+%---------------------------------------------------------------------------%
+
+:- interface.
+
+    % Succeeds if the version array has an internal lock to protect against
+    % concurrent updates. This predicate is privately exported so that
+    % version_hash_table does not need an extra flag to record whether the
+    % version_array was created with a lock or not.
+    %
+:- pred has_lock(version_array(T)::in) is semidet.
+
+%---------------------------------------------------------------------------%
+%---------------------------------------------------------------------------%
+
 % The first implementation of version arrays used nb_references.
 % This incurred three memory allocations for every update. This version
 % works at a lower level, but only performs one allocation per update.
 
-%---------------------------------------------------------------------------%
-
 :- implementation.
 
 :- import_module int.
@@ -553,11 +570,13 @@ resize(N, X, VA, resize(VA, N, X)).
 
 %---------------------------------------------------------------------------%
 
-copy(VA) =
-    ( if size(VA) = 0 then
-        VA
+copy(VA0) = VA :-
+    Size = size(VA0),
+    ( if Size = 0 then
+        VA = VA0
     else
-        resize(VA, size(VA), lookup(VA, 0))
+        lookup(VA0, 0, Init),
+        resize(Size, Init, VA0, VA)
     ).
 
 %---------------------------------------------------------------------------%
@@ -574,10 +593,11 @@ foldl(F, VA, Acc0) = Acc :-
 :- pred do_foldl_func((func(T1, T2) = T2)::in,
     version_array(T1)::in, int::in, int::in, T2::in, T2::out) is det.
 
-do_foldl_func(F, VA, Lo, Hi, !Acc) :-
-    ( if Lo < Hi then
-        !:Acc = F(lookup(VA, Lo), !.Acc),
-        do_foldl_func(F, VA, Lo + 1, Hi, !Acc)
+do_foldl_func(F, VA, I, Size, !Acc) :-
+    ( if I < Size then
+        lookup(VA, I, X),
+        !:Acc = F(X, !.Acc),
+        do_foldl_func(F, VA, I + 1, Size, !Acc)
     else
         true
     ).
@@ -598,10 +618,11 @@ foldl(P, VA, !Acc) :-
 :- mode do_foldl_pred(pred(in, di, uo) is semidet, in, in, in, di, uo)
     is semidet.
 
-do_foldl_pred(P, VA, Lo, Hi, !Acc) :-
-    ( if Lo < Hi then
-        P(lookup(VA, Lo), !Acc),
-        do_foldl_pred(P, VA, Lo + 1, Hi, !Acc)
+do_foldl_pred(P, VA, I, Size, !Acc) :-
+    ( if I < Size then
+        lookup(VA, I, X),
+        P(X, !Acc),
+        do_foldl_pred(P, VA, I + 1, Size, !Acc)
     else
         true
     ).
@@ -626,10 +647,11 @@ foldl2(P, VA, !Acc1, !Acc2) :-
 :- mode do_foldl2(pred(in, in, out, di, uo) is semidet, in, in, in,
     in, out, di, uo) is semidet.
 
-do_foldl2(P, VA, Lo, Hi, !Acc1, !Acc2) :-
-    ( if Lo < Hi then
-        P(lookup(VA, Lo), !Acc1, !Acc2),
-        do_foldl2(P, VA, Lo + 1, Hi, !Acc1, !Acc2)
+do_foldl2(P, VA, I, Size, !Acc1, !Acc2) :-
+    ( if I < Size then
+        lookup(VA, I, X),
+        P(X, !Acc1, !Acc2),
+        do_foldl2(P, VA, I + 1, Size, !Acc1, !Acc2)
     else
         true
     ).
@@ -637,15 +659,16 @@ do_foldl2(P, VA, Lo, Hi, !Acc1, !Acc2) :-
 %---------------------------------------------------------------------------%
 
 foldr(F, VA, Acc0) = Acc :-
-    do_foldr_func(F, VA, max(VA), Acc0, Acc).
+    do_foldr_func(F, VA, size(VA) - 1, Acc0, Acc).
 
 :- pred do_foldr_func((func(T1, T2) = T2)::in, version_array(T1)::in,
     int::in, T2::in, T2::out) is det.
 
-do_foldr_func(F, VA, Hi, !Acc) :-
-    ( if 0 =< Hi then
-        !:Acc = F(lookup(VA, Hi), !.Acc),
-        do_foldr_func(F, VA, Hi - 1, !Acc)
+do_foldr_func(F, VA, I, !Acc) :-
+    ( if I >= 0 then
+        lookup(VA, I, X),
+        !:Acc = F(X, !.Acc),
+        do_foldr_func(F, VA, I - 1, !Acc)
     else
         true
     ).
@@ -653,7 +676,7 @@ do_foldr_func(F, VA, Hi, !Acc) :-
 %---------------------------------------------------------------------------%
 
 foldr(P, VA, !Acc) :-
-    do_foldr_pred(P, VA, max(VA), !Acc).
+    do_foldr_pred(P, VA, size(VA) - 1, !Acc).
 
 :- pred do_foldr_pred(pred(T1, T2, T2), version_array(T1), int, T2, T2).
 :- mode do_foldr_pred(pred(in, in, out) is det, in, in, in, out) is det.
@@ -668,7 +691,8 @@ foldr(P, VA, !Acc) :-
 
 do_foldr_pred(P, VA, I, !Acc) :-
     ( if I >= 0 then
-        P(lookup(VA, I), !Acc),
+        lookup(VA, I, X),
+        P(X, !Acc),
         do_foldr_pred(P, VA, I - 1, !Acc)
     else
         true
@@ -677,7 +701,7 @@ do_foldr_pred(P, VA, I, !Acc) :-
 %---------------------------------------------------------------------------%
 
 foldr2(P, VA, !Acc1, !Acc2) :-
-    do_foldr2(P, VA, max(VA), !Acc1, !Acc2).
+    do_foldr2(P, VA, size(VA) - 1, !Acc1, !Acc2).
 
 :- pred do_foldr2(pred(T1, T2, T2, T3, T3), version_array(T1), int,
     T2, T2, T3, T3).
@@ -696,7 +720,8 @@ foldr2(P, VA, !Acc1, !Acc2) :-
 
 do_foldr2(P, VA, I, !Acc1, !Acc2) :-
     ( if I >= 0 then
-        P(lookup(VA, I), !Acc1, !Acc2),
+        lookup(VA, I, X),
+        P(X, !Acc1, !Acc2),
         do_foldr2(P, VA, I - 1, !Acc1, !Acc2)
     else
         true
@@ -772,9 +797,9 @@ unsafe_rewind(VA, unsafe_rewind(VA)).
 :- pragma terminates(pred(eq_version_array/2)).
 
 eq_version_array(VAa, VAb) :-
-    N = max(VAa),
-    N = max(VAb),
-    eq_version_array_2(N, VAa, VAb).
+    N = size(VAa),
+    N = size(VAb),
+    eq_version_array_2(N - 1, VAa, VAb).
 
 :- pred eq_version_array_2(int::in,
     version_array(T)::in, version_array(T)::in) is semidet.
@@ -1372,6 +1397,7 @@ using System;
 :- pragma foreign_code("C#", "
 
 public interface ML_va {
+    bool has_lock();
     object get(int I);
     ML_va set(int I, object X);
     ML_va resize(int N, object X);
@@ -1396,6 +1422,10 @@ public class ML_sva : ML_va {
 
     private ML_sva() {}
 
+    public bool has_lock() {
+        return true;
+    }
+
     public object get(int I) {
         lock (va_lock) {
             return version_array.get(I);
@@ -1479,6 +1509,10 @@ public class ML_uva : ML_va {
         return va;
     }
 
+    public bool has_lock() {
+        return false;
+    }
+
     public ML_va resize(int N, object X) {
         return resize_uva(N, X);
     }
@@ -1691,6 +1725,7 @@ import jmercury.runtime.MercuryBitmap;
 :- pragma foreign_code("Java", "
 
 public interface ML_va {
+    public boolean has_lock();
     public Object get(int I) throws ArrayIndexOutOfBoundsException;
     public ML_va set(int I, Object X);
     public ML_va resize(int N, Object X);
@@ -1716,7 +1751,11 @@ public static class ML_sva implements ML_va, java.io.Serializable {
         lock = new Lock();
     }
 
-    private ML_sva() {};
+    private ML_sva() {}
+
+    public boolean has_lock() {
+        return true;
+    }
 
     public Object get(int I) throws ArrayIndexOutOfBoundsException {
         synchronized (lock) {
@@ -1797,6 +1836,10 @@ public static class ML_uva implements ML_va, java.io.Serializable {
         return va;
     }
 
+    public boolean has_lock() {
+        return false;
+    }
+
     public ML_uva resize(int N, Object X) {
         ML_uva  VA0 = this;
         ML_uva  latest;
@@ -2007,6 +2050,37 @@ out_of_bounds_error(Index, Max, PredName) :-
 
 version_array_to_doc(A) = pretty_printer.version_array_to_doc(A).
 
+%---------------------------------------------------------------------------%
+
+:- pragma foreign_proc("C",
+    has_lock(VA::in),
+    [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+#ifdef MR_THREAD_SAFE
+    SUCCESS_INDICATOR = (VA->lock != NULL) ? MR_TRUE : MR_FALSE;
+#else
+    // The following means has_lock(VA) will fail in non-.par C grades even if
+    // VA was created with a 'safe' init function. That is acceptable for the
+    // use in version_hash_table.m, but if we wanted to publicly export
+    // has_lock, it is something we might want to change.
+    SUCCESS_INDICATOR = MR_FALSE;
+#endif
+").
+
+:- pragma foreign_proc("C#",
+    has_lock(VA::in),
+    [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+    SUCCESS_INDICATOR = VA.has_lock();
+").
+
+:- pragma foreign_proc("Java",
+    has_lock(VA::in),
+    [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+    SUCCESS_INDICATOR = VA.has_lock();
+").
+
 %---------------------------------------------------------------------------%
 :- end_module version_array.
 %---------------------------------------------------------------------------%
diff --git a/library/version_hash_table.m b/library/version_hash_table.m
index d4c7a823c..ba0e1d75f 100644
--- a/library/version_hash_table.m
+++ b/library/version_hash_table.m
@@ -2,7 +2,7 @@
 % vim: ft=mercury ts=4 sw=4 et
 %---------------------------------------------------------------------------%
 % Copyright (C) 2004-2006, 2010-2012 The University of Melbourne.
-% Copyright (C) 2013-2015, 2017-2022 The Mercury team.
+% Copyright (C) 2013-2015, 2017-2023 The Mercury team.
 % This file is distributed under the terms specified in COPYING.LIB.
 %---------------------------------------------------------------------------%
 %
@@ -324,7 +324,7 @@ search(HT, K, V) :-
     promise_equivalent_solutions [Buckets] (
         Buckets = HT ^ ht_buckets
     ),
-    AL = Buckets ^ elem(H),
+    version_array.lookup(Buckets, H, AL),
     alist_search(AL, K, V).
 
 :- pred alist_search(hash_table_alist(K, V)::in, K::in, V::out) is semidet.
@@ -367,7 +367,7 @@ set(HT0, K, V) = HT :-
             Buckets0] (
         HT0 = ht(NumOccupants0, MaxOccupants, HashPred, Buckets0)
     ),
-    AL0 = Buckets0 ^ elem(H),
+    version_array.lookup(Buckets0, H, AL0),
     (
         AL0 = ht_nil,
         AL = ht_single(K, V),
@@ -391,7 +391,7 @@ set(HT0, K, V) = HT :-
             MayExpand = yes
         )
     ),
-    Buckets = Buckets0 ^ elem(H) := AL,
+    version_array.set(H, AL, Buckets0, Buckets),
     (
         MayExpand = no,
         HT = ht(NumOccupants0, MaxOccupants, HashPred, Buckets)
@@ -439,7 +439,7 @@ det_insert(HT0, K, V) = HT :-
     (
         HT0 = ht(NumOccupants0, MaxOccupants, HashPred, Buckets0)
     ),
-    AL0 = Buckets0 ^ elem(H),
+    version_array.lookup(Buckets0, H, AL0),
     (
         AL0 = ht_nil,
         AL = ht_single(K, V)
@@ -454,7 +454,7 @@ det_insert(HT0, K, V) = HT :-
             AL = ht_cons(K, V, AL0)
         )
     ),
-    Buckets = Buckets0 ^ elem(H) := AL,
+    version_array.set(H, AL, Buckets0, Buckets),
     NumOccupants = NumOccupants0 + 1,
     ( if NumOccupants > MaxOccupants then
         HT = expand(NumOccupants, MaxOccupants, HashPred, Buckets)
@@ -471,13 +471,13 @@ det_update(HT0, K, V) = HT :-
     promise_equivalent_solutions [Buckets0] (
         Buckets0 = HT0 ^ ht_buckets
     ),
-    AL0 = Buckets0 ^ elem(H),
+    version_array.lookup(Buckets0, H, AL0),
     ( if alist_replace(AL0, K, V, AL1) then
         AL = AL1
     else
         error($pred, "key not found")
     ),
-    Buckets = Buckets0 ^ elem(H) := AL,
+    version_array.set(H, AL, Buckets0, Buckets),
     promise_equivalent_solutions [HT] (
         HT = HT0 ^ ht_buckets := Buckets
     ).
@@ -493,9 +493,9 @@ delete(HT0, K) = HT :-
     (
         HT0 = ht(NumOccupants0, MaxOccupants, HashPred, Buckets0)
     ),
-    AL0 = Buckets0 ^ elem(H),
+    version_array.lookup(Buckets0, H, AL0),
     ( if alist_remove(AL0, K, AL) then
-        Buckets = Buckets0 ^ elem(H) := AL,
+        version_array.set(H, AL, Buckets0, Buckets),
         NumOccupants = NumOccupants0 - 1,
         HT = ht(NumOccupants, MaxOccupants, HashPred, Buckets)
     else
@@ -551,22 +551,24 @@ to_assoc_list_2(X, AL0, AL) :-
     ).
 
 from_assoc_list(HashPred, N, MaxOccupants, AList) = HT :-
-    from_assoc_list_2(AList, init(HashPred, N, MaxOccupants), HT).
+    HT0 = init(HashPred, N, MaxOccupants),
+    from_assoc_list_2(AList, HT0, HT).
 
 from_assoc_list(HashPred, AList) = HT :-
-    from_assoc_list_2(AList, init_default(HashPred), HT).
+    HT0 = init_default(HashPred),
+    from_assoc_list_2(AList, HT0, HT).
 
 :- pred from_assoc_list_2(assoc_list(K, V)::in,
     version_hash_table(K, V)::in, version_hash_table(K, V)::out) is det.
 
 from_assoc_list_2([], !HT).
 from_assoc_list_2([K - V | T], !HT) :-
-    !HT ^ elem(K) := V,
+    set(K, V, !HT),
     from_assoc_list_2(T, !HT).
 
 %---------------------------------------------------------------------------%
 
-    % Hash tables expand by doubling in size.
+    % Hash tables expand by doubling the number of buckets.
     %
     % Ensuring expand/4 is _not_ inlined into version_hash_table.det_insert,
     % etc. actually makes those predicates more efficient.
@@ -584,7 +586,11 @@ expand(NumOccupants, MaxOccupants0, HashPred, Buckets0) = HT :-
     NumBuckets0 = size(Buckets0),
     NumBuckets = NumBuckets0 + NumBuckets0,
     MaxOccupants = MaxOccupants0 + MaxOccupants0,
-    Buckets1 = version_array.init(NumBuckets, ht_nil),
+    ( if version_array.has_lock(Buckets0) then
+        Buckets1 = version_array.init(NumBuckets, ht_nil)
+    else
+        Buckets1 = version_array.unsafe_init(NumBuckets, ht_nil)
+    ),
     reinsert_bindings(0, Buckets0, HashPred, NumBuckets, Buckets1, Buckets),
     HT = ht(NumOccupants, MaxOccupants, HashPred, Buckets).
 
@@ -596,7 +602,7 @@ reinsert_bindings(I, OldBuckets, HashPred, NumBuckets, !Buckets) :-
     ( if I >= size(OldBuckets) then
         true
     else
-        AL = OldBuckets ^ elem(I),
+        version_array.lookup(OldBuckets, I, AL),
         reinsert_alist(AL, HashPred, NumBuckets, !Buckets),
         reinsert_bindings(I + 1, OldBuckets, HashPred, NumBuckets, !Buckets)
     ).
@@ -621,7 +627,7 @@ reinsert_alist(AL, HashPred, NumBuckets, !Buckets) :-
 
 unsafe_insert(K, V, HashPred, NumBuckets, Buckets0, Buckets) :-
     find_slot_2(HashPred, K, NumBuckets, H),
-    AL0 = Buckets0 ^ elem(H),
+    version_array.lookup(Buckets0, H, AL0),
     (
         AL0 = ht_nil,
         AL = ht_single(K, V)
@@ -631,7 +637,7 @@ unsafe_insert(K, V, HashPred, NumBuckets, Buckets0, Buckets) :-
         ),
         AL = ht_cons(K, V, AL0)
     ),
-    Buckets = Buckets0 ^ elem(H) := AL.
+    version_array.set(H, AL, Buckets0, Buckets).
 
 %---------------------------------------------------------------------------%
 
-- 
2.39.0



More information about the reviews mailing list