[m-rev.] for review: Make version_hash_table maintain concurrency (un)safeness.
Peter Wang
novalazy at gmail.com
Wed Jul 19 16:34:24 AEST 2023
When a version hash table expanded its underlying version array,
it previously would always create a concurrency-safe version array
even if the hash table itself was originally created with a
version_hash_table.unsafe_init* functions.
This change makes it so that when an "unsafe" version hash table
expands, it will create an "unsafe" version array internally.
library/version_array.m:
Export a predicate has_lock/1 for use by version_hash_table.m
Replace occurrences of max(VA) with size(VA) - 1. The latter makes
it more obvious that an index may start out negative.
Other minor style changes.
Fix typos.
library/version_hash_table.m:
When expanding a hash table, create a new version array with
version_array.init or version_array.unsafe_init
depending on whether the old version array had an internal lock.
Replace field access expressions with predicate calls.
---
library/version_array.m | 142 ++++++++++++++++++++++++++---------
library/version_hash_table.m | 42 ++++++-----
2 files changed, 132 insertions(+), 52 deletions(-)
diff --git a/library/version_array.m b/library/version_array.m
index c5a88a937..74f419717 100644
--- a/library/version_array.m
+++ b/library/version_array.m
@@ -2,7 +2,7 @@
% vim: ts=4 sw=4 et ft=mercury
%---------------------------------------------------------------------------%
% Copyright (C) 2004-2012 The University of Melbourne.
-% Copyright (C) 2014-2022 The Mercury Team.
+% Copyright (C) 2014-2023 The Mercury Team.
% This file is distributed under the terms specified in COPYING.LIB.
%---------------------------------------------------------------------------%
%
@@ -70,7 +70,7 @@
%---------------------------------------------------------------------------%
- % empty_array returns the empty array.
+ % empty returns the empty array.
%
:- func empty = version_array(T).
@@ -137,7 +137,7 @@
%
:- func size(version_array(T)) = int.
- % max(Z) = size(A) - 1.
+ % max(A) = size(A) - 1.
% Returns -1 for an empty array.
%
:- func max(version_array(T)) = int.
@@ -263,12 +263,29 @@
%---------------------------------------------------------------------------%
%---------------------------------------------------------------------------%
+:- implementation.
+
+% Everything below here is not intended to be part of the public interface,
+% and will not be included in the Mercury library reference manual.
+
+%---------------------------------------------------------------------------%
+
+:- interface.
+
+ % Succeeds if the version array has an internal lock to protect against
+ % concurrent updates. This predicate is privately exported so that
+ % version_hash_table does not need an extra flag to record whether the
+ % version_array was created with a lock or not.
+ %
+:- pred has_lock(version_array(T)::in) is semidet.
+
+%---------------------------------------------------------------------------%
+%---------------------------------------------------------------------------%
+
% The first implementation of version arrays used nb_references.
% This incurred three memory allocations for every update. This version
% works at a lower level, but only performs one allocation per update.
-%---------------------------------------------------------------------------%
-
:- implementation.
:- import_module int.
@@ -553,11 +570,13 @@ resize(N, X, VA, resize(VA, N, X)).
%---------------------------------------------------------------------------%
-copy(VA) =
- ( if size(VA) = 0 then
- VA
+copy(VA0) = VA :-
+ Size = size(VA0),
+ ( if Size = 0 then
+ VA = VA0
else
- resize(VA, size(VA), lookup(VA, 0))
+ lookup(VA0, 0, Init),
+ resize(Size, Init, VA0, VA)
).
%---------------------------------------------------------------------------%
@@ -574,10 +593,11 @@ foldl(F, VA, Acc0) = Acc :-
:- pred do_foldl_func((func(T1, T2) = T2)::in,
version_array(T1)::in, int::in, int::in, T2::in, T2::out) is det.
-do_foldl_func(F, VA, Lo, Hi, !Acc) :-
- ( if Lo < Hi then
- !:Acc = F(lookup(VA, Lo), !.Acc),
- do_foldl_func(F, VA, Lo + 1, Hi, !Acc)
+do_foldl_func(F, VA, I, Size, !Acc) :-
+ ( if I < Size then
+ lookup(VA, I, X),
+ !:Acc = F(X, !.Acc),
+ do_foldl_func(F, VA, I + 1, Size, !Acc)
else
true
).
@@ -598,10 +618,11 @@ foldl(P, VA, !Acc) :-
:- mode do_foldl_pred(pred(in, di, uo) is semidet, in, in, in, di, uo)
is semidet.
-do_foldl_pred(P, VA, Lo, Hi, !Acc) :-
- ( if Lo < Hi then
- P(lookup(VA, Lo), !Acc),
- do_foldl_pred(P, VA, Lo + 1, Hi, !Acc)
+do_foldl_pred(P, VA, I, Size, !Acc) :-
+ ( if I < Size then
+ lookup(VA, I, X),
+ P(X, !Acc),
+ do_foldl_pred(P, VA, I + 1, Size, !Acc)
else
true
).
@@ -626,10 +647,11 @@ foldl2(P, VA, !Acc1, !Acc2) :-
:- mode do_foldl2(pred(in, in, out, di, uo) is semidet, in, in, in,
in, out, di, uo) is semidet.
-do_foldl2(P, VA, Lo, Hi, !Acc1, !Acc2) :-
- ( if Lo < Hi then
- P(lookup(VA, Lo), !Acc1, !Acc2),
- do_foldl2(P, VA, Lo + 1, Hi, !Acc1, !Acc2)
+do_foldl2(P, VA, I, Size, !Acc1, !Acc2) :-
+ ( if I < Size then
+ lookup(VA, I, X),
+ P(X, !Acc1, !Acc2),
+ do_foldl2(P, VA, I + 1, Size, !Acc1, !Acc2)
else
true
).
@@ -637,15 +659,16 @@ do_foldl2(P, VA, Lo, Hi, !Acc1, !Acc2) :-
%---------------------------------------------------------------------------%
foldr(F, VA, Acc0) = Acc :-
- do_foldr_func(F, VA, max(VA), Acc0, Acc).
+ do_foldr_func(F, VA, size(VA) - 1, Acc0, Acc).
:- pred do_foldr_func((func(T1, T2) = T2)::in, version_array(T1)::in,
int::in, T2::in, T2::out) is det.
-do_foldr_func(F, VA, Hi, !Acc) :-
- ( if 0 =< Hi then
- !:Acc = F(lookup(VA, Hi), !.Acc),
- do_foldr_func(F, VA, Hi - 1, !Acc)
+do_foldr_func(F, VA, I, !Acc) :-
+ ( if I >= 0 then
+ lookup(VA, I, X),
+ !:Acc = F(X, !.Acc),
+ do_foldr_func(F, VA, I - 1, !Acc)
else
true
).
@@ -653,7 +676,7 @@ do_foldr_func(F, VA, Hi, !Acc) :-
%---------------------------------------------------------------------------%
foldr(P, VA, !Acc) :-
- do_foldr_pred(P, VA, max(VA), !Acc).
+ do_foldr_pred(P, VA, size(VA) - 1, !Acc).
:- pred do_foldr_pred(pred(T1, T2, T2), version_array(T1), int, T2, T2).
:- mode do_foldr_pred(pred(in, in, out) is det, in, in, in, out) is det.
@@ -668,7 +691,8 @@ foldr(P, VA, !Acc) :-
do_foldr_pred(P, VA, I, !Acc) :-
( if I >= 0 then
- P(lookup(VA, I), !Acc),
+ lookup(VA, I, X),
+ P(X, !Acc),
do_foldr_pred(P, VA, I - 1, !Acc)
else
true
@@ -677,7 +701,7 @@ do_foldr_pred(P, VA, I, !Acc) :-
%---------------------------------------------------------------------------%
foldr2(P, VA, !Acc1, !Acc2) :-
- do_foldr2(P, VA, max(VA), !Acc1, !Acc2).
+ do_foldr2(P, VA, size(VA) - 1, !Acc1, !Acc2).
:- pred do_foldr2(pred(T1, T2, T2, T3, T3), version_array(T1), int,
T2, T2, T3, T3).
@@ -696,7 +720,8 @@ foldr2(P, VA, !Acc1, !Acc2) :-
do_foldr2(P, VA, I, !Acc1, !Acc2) :-
( if I >= 0 then
- P(lookup(VA, I), !Acc1, !Acc2),
+ lookup(VA, I, X),
+ P(X, !Acc1, !Acc2),
do_foldr2(P, VA, I - 1, !Acc1, !Acc2)
else
true
@@ -772,9 +797,9 @@ unsafe_rewind(VA, unsafe_rewind(VA)).
:- pragma terminates(pred(eq_version_array/2)).
eq_version_array(VAa, VAb) :-
- N = max(VAa),
- N = max(VAb),
- eq_version_array_2(N, VAa, VAb).
+ N = size(VAa),
+ N = size(VAb),
+ eq_version_array_2(N - 1, VAa, VAb).
:- pred eq_version_array_2(int::in,
version_array(T)::in, version_array(T)::in) is semidet.
@@ -1372,6 +1397,7 @@ using System;
:- pragma foreign_code("C#", "
public interface ML_va {
+ bool has_lock();
object get(int I);
ML_va set(int I, object X);
ML_va resize(int N, object X);
@@ -1396,6 +1422,10 @@ public class ML_sva : ML_va {
private ML_sva() {}
+ public bool has_lock() {
+ return true;
+ }
+
public object get(int I) {
lock (va_lock) {
return version_array.get(I);
@@ -1479,6 +1509,10 @@ public class ML_uva : ML_va {
return va;
}
+ public bool has_lock() {
+ return false;
+ }
+
public ML_va resize(int N, object X) {
return resize_uva(N, X);
}
@@ -1691,6 +1725,7 @@ import jmercury.runtime.MercuryBitmap;
:- pragma foreign_code("Java", "
public interface ML_va {
+ public boolean has_lock();
public Object get(int I) throws ArrayIndexOutOfBoundsException;
public ML_va set(int I, Object X);
public ML_va resize(int N, Object X);
@@ -1716,7 +1751,11 @@ public static class ML_sva implements ML_va, java.io.Serializable {
lock = new Lock();
}
- private ML_sva() {};
+ private ML_sva() {}
+
+ public boolean has_lock() {
+ return true;
+ }
public Object get(int I) throws ArrayIndexOutOfBoundsException {
synchronized (lock) {
@@ -1797,6 +1836,10 @@ public static class ML_uva implements ML_va, java.io.Serializable {
return va;
}
+ public boolean has_lock() {
+ return false;
+ }
+
public ML_uva resize(int N, Object X) {
ML_uva VA0 = this;
ML_uva latest;
@@ -2007,6 +2050,37 @@ out_of_bounds_error(Index, Max, PredName) :-
version_array_to_doc(A) = pretty_printer.version_array_to_doc(A).
+%---------------------------------------------------------------------------%
+
+:- pragma foreign_proc("C",
+ has_lock(VA::in),
+ [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+#ifdef MR_THREAD_SAFE
+ SUCCESS_INDICATOR = (VA->lock != NULL) ? MR_TRUE : MR_FALSE;
+#else
+ // The following means has_lock(VA) will fail in non-.par C grades even if
+ // VA was created with a 'safe' init function. That is acceptable for the
+ // use in version_hash_table.m, but if we wanted to publicly export
+ // has_lock, it is something we might want to change.
+ SUCCESS_INDICATOR = MR_FALSE;
+#endif
+").
+
+:- pragma foreign_proc("C#",
+ has_lock(VA::in),
+ [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+ SUCCESS_INDICATOR = VA.has_lock();
+").
+
+:- pragma foreign_proc("Java",
+ has_lock(VA::in),
+ [will_not_call_mercury, promise_pure, thread_safe, tabled_for_io],
+"
+ SUCCESS_INDICATOR = VA.has_lock();
+").
+
%---------------------------------------------------------------------------%
:- end_module version_array.
%---------------------------------------------------------------------------%
diff --git a/library/version_hash_table.m b/library/version_hash_table.m
index d4c7a823c..ba0e1d75f 100644
--- a/library/version_hash_table.m
+++ b/library/version_hash_table.m
@@ -2,7 +2,7 @@
% vim: ft=mercury ts=4 sw=4 et
%---------------------------------------------------------------------------%
% Copyright (C) 2004-2006, 2010-2012 The University of Melbourne.
-% Copyright (C) 2013-2015, 2017-2022 The Mercury team.
+% Copyright (C) 2013-2015, 2017-2023 The Mercury team.
% This file is distributed under the terms specified in COPYING.LIB.
%---------------------------------------------------------------------------%
%
@@ -324,7 +324,7 @@ search(HT, K, V) :-
promise_equivalent_solutions [Buckets] (
Buckets = HT ^ ht_buckets
),
- AL = Buckets ^ elem(H),
+ version_array.lookup(Buckets, H, AL),
alist_search(AL, K, V).
:- pred alist_search(hash_table_alist(K, V)::in, K::in, V::out) is semidet.
@@ -367,7 +367,7 @@ set(HT0, K, V) = HT :-
Buckets0] (
HT0 = ht(NumOccupants0, MaxOccupants, HashPred, Buckets0)
),
- AL0 = Buckets0 ^ elem(H),
+ version_array.lookup(Buckets0, H, AL0),
(
AL0 = ht_nil,
AL = ht_single(K, V),
@@ -391,7 +391,7 @@ set(HT0, K, V) = HT :-
MayExpand = yes
)
),
- Buckets = Buckets0 ^ elem(H) := AL,
+ version_array.set(H, AL, Buckets0, Buckets),
(
MayExpand = no,
HT = ht(NumOccupants0, MaxOccupants, HashPred, Buckets)
@@ -439,7 +439,7 @@ det_insert(HT0, K, V) = HT :-
(
HT0 = ht(NumOccupants0, MaxOccupants, HashPred, Buckets0)
),
- AL0 = Buckets0 ^ elem(H),
+ version_array.lookup(Buckets0, H, AL0),
(
AL0 = ht_nil,
AL = ht_single(K, V)
@@ -454,7 +454,7 @@ det_insert(HT0, K, V) = HT :-
AL = ht_cons(K, V, AL0)
)
),
- Buckets = Buckets0 ^ elem(H) := AL,
+ version_array.set(H, AL, Buckets0, Buckets),
NumOccupants = NumOccupants0 + 1,
( if NumOccupants > MaxOccupants then
HT = expand(NumOccupants, MaxOccupants, HashPred, Buckets)
@@ -471,13 +471,13 @@ det_update(HT0, K, V) = HT :-
promise_equivalent_solutions [Buckets0] (
Buckets0 = HT0 ^ ht_buckets
),
- AL0 = Buckets0 ^ elem(H),
+ version_array.lookup(Buckets0, H, AL0),
( if alist_replace(AL0, K, V, AL1) then
AL = AL1
else
error($pred, "key not found")
),
- Buckets = Buckets0 ^ elem(H) := AL,
+ version_array.set(H, AL, Buckets0, Buckets),
promise_equivalent_solutions [HT] (
HT = HT0 ^ ht_buckets := Buckets
).
@@ -493,9 +493,9 @@ delete(HT0, K) = HT :-
(
HT0 = ht(NumOccupants0, MaxOccupants, HashPred, Buckets0)
),
- AL0 = Buckets0 ^ elem(H),
+ version_array.lookup(Buckets0, H, AL0),
( if alist_remove(AL0, K, AL) then
- Buckets = Buckets0 ^ elem(H) := AL,
+ version_array.set(H, AL, Buckets0, Buckets),
NumOccupants = NumOccupants0 - 1,
HT = ht(NumOccupants, MaxOccupants, HashPred, Buckets)
else
@@ -551,22 +551,24 @@ to_assoc_list_2(X, AL0, AL) :-
).
from_assoc_list(HashPred, N, MaxOccupants, AList) = HT :-
- from_assoc_list_2(AList, init(HashPred, N, MaxOccupants), HT).
+ HT0 = init(HashPred, N, MaxOccupants),
+ from_assoc_list_2(AList, HT0, HT).
from_assoc_list(HashPred, AList) = HT :-
- from_assoc_list_2(AList, init_default(HashPred), HT).
+ HT0 = init_default(HashPred),
+ from_assoc_list_2(AList, HT0, HT).
:- pred from_assoc_list_2(assoc_list(K, V)::in,
version_hash_table(K, V)::in, version_hash_table(K, V)::out) is det.
from_assoc_list_2([], !HT).
from_assoc_list_2([K - V | T], !HT) :-
- !HT ^ elem(K) := V,
+ set(K, V, !HT),
from_assoc_list_2(T, !HT).
%---------------------------------------------------------------------------%
- % Hash tables expand by doubling in size.
+ % Hash tables expand by doubling the number of buckets.
%
% Ensuring expand/4 is _not_ inlined into version_hash_table.det_insert,
% etc. actually makes those predicates more efficient.
@@ -584,7 +586,11 @@ expand(NumOccupants, MaxOccupants0, HashPred, Buckets0) = HT :-
NumBuckets0 = size(Buckets0),
NumBuckets = NumBuckets0 + NumBuckets0,
MaxOccupants = MaxOccupants0 + MaxOccupants0,
- Buckets1 = version_array.init(NumBuckets, ht_nil),
+ ( if version_array.has_lock(Buckets0) then
+ Buckets1 = version_array.init(NumBuckets, ht_nil)
+ else
+ Buckets1 = version_array.unsafe_init(NumBuckets, ht_nil)
+ ),
reinsert_bindings(0, Buckets0, HashPred, NumBuckets, Buckets1, Buckets),
HT = ht(NumOccupants, MaxOccupants, HashPred, Buckets).
@@ -596,7 +602,7 @@ reinsert_bindings(I, OldBuckets, HashPred, NumBuckets, !Buckets) :-
( if I >= size(OldBuckets) then
true
else
- AL = OldBuckets ^ elem(I),
+ version_array.lookup(OldBuckets, I, AL),
reinsert_alist(AL, HashPred, NumBuckets, !Buckets),
reinsert_bindings(I + 1, OldBuckets, HashPred, NumBuckets, !Buckets)
).
@@ -621,7 +627,7 @@ reinsert_alist(AL, HashPred, NumBuckets, !Buckets) :-
unsafe_insert(K, V, HashPred, NumBuckets, Buckets0, Buckets) :-
find_slot_2(HashPred, K, NumBuckets, H),
- AL0 = Buckets0 ^ elem(H),
+ version_array.lookup(Buckets0, H, AL0),
(
AL0 = ht_nil,
AL = ht_single(K, V)
@@ -631,7 +637,7 @@ unsafe_insert(K, V, HashPred, NumBuckets, Buckets0, Buckets) :-
),
AL = ht_cons(K, V, AL0)
),
- Buckets = Buckets0 ^ elem(H) := AL.
+ version_array.set(H, AL, Buckets0, Buckets).
%---------------------------------------------------------------------------%
--
2.39.0
More information about the reviews
mailing list