[m-rev.] for review: improve hash tables

Peter Wang novalazy at gmail.com
Mon Jul 19 13:17:59 AEST 2010


[This should be safe for 10.04 as well]

Branches: main

Speed up hash tables using a "fat" list representation for bucket chains,
instead of assoc_list.  The benefits are less type_info constructions (an
assoc_list requires a type_info for list and a type_info for pair) and,
likely, better locality.  This change speeds up hash_table_test.m by 25%,
on an x86-64 machine.

For non-version hash tables, also use unsafe array operations to avoid
bounds checks and the remaining type_info constructions.  This improves on
the previous change by another 20%.

library/hash_table.m:
        Use fat list representation.

        Use unsafe array operations.

library/version_hash_table.m:
        Use fat list representation.

diff --git a/library/hash_table.m b/library/hash_table.m
index 7989291..e535123 100644
--- a/library/hash_table.m
+++ b/library/hash_table.m
@@ -223,11 +223,17 @@
                 num_occupants           :: int,
                 max_occupants           :: int,
                 hash_pred               :: hash_pred(K),
-                buckets                 :: array(assoc_list(K, V))
+                buckets                 :: array(hash_table_alist(K, V))
             ).
 
 :- implementation.
 
+:- type buckets(K, V) == array(hash_table_alist(K, V)).
+
+:- type hash_table_alist(K, V)
+    --->    ht_nil
+    ;       ht_cons(K, V, hash_table_alist(K, V)).
+
 %-----------------------------------------------------------------------------%
 
 new(HashPred, N, MaxOccupancy) = HT :-
@@ -242,7 +248,7 @@ new(HashPred, N, MaxOccupancy) = HT :-
       else
             NumBuckets   = 1 << N,
             MaxOccupants = ceiling_to_int(float(NumBuckets) * MaxOccupancy),
-            Buckets      = init(NumBuckets, []),
+            Buckets      = init(NumBuckets, ht_nil),
             HT           = ht(0, MaxOccupants, HashPred, Buckets)
     ).
 
@@ -277,15 +283,17 @@ find_slot_2(HashPred, K, NumBuckets, H) :-
 
 set(!.HT, K, V) = !:HT :-
     H = find_slot(!.HT, K),
-    AL0 = !.HT ^ buckets ^ elem(H),
-    ( if assoc_list.remove(AL0, K, _, AL1) then
-        AL = [K - V | AL1],
+    Buckets0 = !.HT ^ buckets,
+    array.unsafe_lookup(Buckets0, H, AL0),
+    ( if alist_replace(AL0, K, V, AL1) then
+        AL = AL1,
         MayExpand = no
       else
-        AL = [K - V | AL0],
+        AL = ht_cons(K, V, AL0),
         MayExpand = yes
     ),
-    !HT ^ buckets ^ elem(H) := AL,
+    array.unsafe_set(Buckets0, H, AL, Buckets),
+    !HT ^ buckets := Buckets,
     (
         MayExpand = no
     ;
@@ -297,26 +305,48 @@ set(!.HT, K, V) = !:HT :-
 
 set(K, V, HT, set(HT, K, V)).
 
+:- pred alist_replace(hash_table_alist(K, V)::in, K::in, V::in,
+    hash_table_alist(K, V)::out) is semidet.
+
+alist_replace(ht_cons(HK, HV, T), K, V, AList) :-
+    ( if HK = K then
+        AList = ht_cons(K, V, T)
+      else
+        alist_replace(T, K, V, AList0),
+        AList = ht_cons(HK, HV, AList0)
+    ).
+
 %-----------------------------------------------------------------------------%
 
 search(HT, K, search(HT, K)).
 
 search(HT, K) = V :-
     H = find_slot(HT, K),
-    AL = HT ^ buckets ^ elem(H),
-    assoc_list.search(AL, K, V).
+    array.unsafe_lookup(HT ^ buckets, H, AL),
+    alist_search(AL, K, V).
+
+:- pred alist_search(hash_table_alist(K, V)::in, K::in, V::out) is semidet.
+
+alist_search(ht_cons(HK, HV, T), K, V) :-
+    ( if HK = K then
+        HV = V
+      else
+        alist_search(T, K, V)
+    ).
 
 %-----------------------------------------------------------------------------%
 
 det_insert(!.HT, K, V) = !:HT :-
     H = find_slot(!.HT, K),
-    AL0 = !.HT ^ buckets ^ elem(H),
-    ( if assoc_list.search(AL0, K, _) then
+    Buckets0 = !.HT ^ buckets,
+    array.unsafe_lookup(Buckets0, H, AL0),
+    ( if alist_search(AL0, K, _) then
         throw(software_error("hash_table.det_insert: key already present"))
       else
-        AL = [K - V | AL0]
+        AL = ht_cons(K, V, AL0)
     ),
-    !HT ^ buckets ^ elem(H) := AL,
+    array.unsafe_set(Buckets0, H, AL, Buckets),
+    !HT ^ buckets := Buckets,
     increase_occupants(!HT).
 
 det_insert(K, V, HT, det_insert(HT, K, V)).
@@ -325,13 +355,15 @@ det_insert(K, V, HT, det_insert(HT, K, V)).
 
 det_update(HT0, K, V) = HT :-
     H = find_slot(HT0, K),
-    AL0 = HT0 ^ buckets ^ elem(H),
-    ( if assoc_list.remove(AL0, K, _, AL1) then
-        AL = [K - V | AL1]
+    Buckets0 = HT0 ^ buckets,
+    array.unsafe_lookup(Buckets0, H, AL0),
+    ( if alist_replace(AL0, K, V, AL1) then
+        AL = AL1
       else
         throw(software_error("hash_table.det_update: key not found"))
     ),
-    HT = HT0 ^ buckets ^ elem(H) := AL.
+    array.unsafe_set(Buckets0, H, AL, Buckets),
+    HT = HT0 ^ buckets := Buckets.
 
 det_update(K, V, HT, det_update(HT, K, V)).
 
@@ -349,10 +381,10 @@ elem(K, HT) = lookup(HT, K).
 
 delete(HT0, K) = HT :-
     H = find_slot(HT0, K),
-    AL0 = HT0 ^ buckets ^ elem(H),
-    ( if assoc_list.remove(AL0, K, _, AL) then
+    array.unsafe_lookup(HT0 ^ buckets, H, AL0),
+    ( if alist_remove(AL0, K, AL) then
         HT0 = ht(NumOccupants0, MaxOccupants, HashPred, Buckets0),
-        Buckets = Buckets0 ^ elem(H) := AL,
+        array.unsafe_set(Buckets0, H, AL, Buckets),
         NumOccupants = NumOccupants0 - 1,
         HT = ht(NumOccupants, MaxOccupants, HashPred, Buckets)
       else
@@ -361,10 +393,29 @@ delete(HT0, K) = HT :-
 
 delete(K, HT, delete(HT, K)).
 
+:- pred alist_remove(hash_table_alist(K, V)::in, K::in,
+    hash_table_alist(K, V)::out) is semidet.
+
+alist_remove(ht_cons(HK, HV, T), K, AList) :-
+    ( if HK = K then
+        AList = T
+      else
+        alist_remove(T, K, AList0),
+        AList = ht_cons(HK, HV, AList0)
+    ).
+
 %-----------------------------------------------------------------------------%
 
 to_assoc_list(HT) =
-    foldl(list.append, HT ^ buckets, []).
+    foldl(to_assoc_list_2, HT ^ buckets, []).
+
+:- func to_assoc_list_2(hash_table_alist(K, V), assoc_list(K, V))
+    = assoc_list(K, V).
+
+to_assoc_list_2(ht_nil, AList) = AList.
+
+to_assoc_list_2(ht_cons(K, V, T), AList) =
+    to_assoc_list_2(T, [K - V | AList]).
 
 
 from_assoc_list(HP, AList) = from_assoc_list_2(AList, new_default(HP)).
@@ -404,45 +455,43 @@ expand(HT0, HT) :-
     NumBuckets = NumBuckets0 + NumBuckets0,
     MaxOccupants = MaxOccupants0 + MaxOccupants0,
 
-    Buckets1 = init(NumBuckets, []),
+    Buckets1 = init(NumBuckets, ht_nil),
     reinsert_bindings(0, Buckets0, HashPred, NumBuckets, Buckets1, Buckets),
 
     HT = ht(NumOccupants0 + 1, MaxOccupants, HashPred, Buckets).
 
-:- pred reinsert_bindings(int, array(assoc_list(K, V)), hash_pred(K),
-    int, array(assoc_list(K, V)), array(assoc_list(K,V))).
-:- mode reinsert_bindings(in, array_ui, in(hash_pred),
-    in, array_di, array_uo) is det.
+:- pred reinsert_bindings(int::in, buckets(K, V)::array_ui,
+    hash_pred(K)::in(hash_pred), int::in,
+    buckets(K, V)::array_di, buckets(K, V)::array_uo) is det.
 
 reinsert_bindings(I, OldBuckets, HashPred, NumBuckets, !Buckets) :-
     ( if I >= size(OldBuckets) then
         true
       else
-        AL = OldBuckets ^ elem(I),
-        reinsert_assoc_list(AL, HashPred, NumBuckets, !Buckets),
+        array.unsafe_lookup(OldBuckets, I, AL),
+        reinsert_alist(AL, HashPred, NumBuckets, !Buckets),
         reinsert_bindings(I + 1, OldBuckets, HashPred, NumBuckets, !Buckets)
     ).
 
-:- pred reinsert_assoc_list(assoc_list(K, V), hash_pred(K),
-    int, array(assoc_list(K, V)), array(assoc_list(K, V))).
-:- mode reinsert_assoc_list(in, in(hash_pred),
-    in, array_di, array_uo) is det.
+:- pred reinsert_alist(hash_table_alist(K, V)::in, hash_pred(K)::in(hash_pred),
+    int::in, buckets(K, V)::array_di, buckets(K, V)::array_uo) is det.
 
-reinsert_assoc_list([], _, _, !Buckets).
-reinsert_assoc_list([KV | KVs], HashPred, NumBuckets, !Buckets) :-
-    unsafe_insert(KV, HashPred, NumBuckets, !Buckets),
-    reinsert_assoc_list(KVs, HashPred, NumBuckets, !Buckets).
+reinsert_alist(AL, HashPred, NumBuckets, !Buckets) :-
+    (
+        AL = ht_nil
+    ;
+        AL = ht_cons(K, V, T),
+        unsafe_insert(K, V, HashPred, NumBuckets, !Buckets),
+        reinsert_alist(T, HashPred, NumBuckets, !Buckets)
+    ).
 
-:- pred unsafe_insert(pair(K, V), hash_pred(K),
-    int, array(assoc_list(K, V)), array(assoc_list(K, V))).
-:- mode unsafe_insert(in, in(hash_pred),
-    in, array_di, array_uo) is det.
+:- pred unsafe_insert(K::in, V::in, hash_pred(K)::in(hash_pred), int::in,
+    buckets(K, V)::array_di, buckets(K, V)::array_uo) is det.
 
-unsafe_insert(KV, HashPred, NumBuckets, Buckets0, Buckets) :-
-    KV = K - _,
+unsafe_insert(K, V, HashPred, NumBuckets, Buckets0, Buckets) :-
     find_slot_2(HashPred, K, NumBuckets, H),
-    AL0 = Buckets0 ^ elem(H),
-    Buckets = Buckets0 ^ elem(H) := [KV | AL0].
+    array.unsafe_lookup(Buckets0, H, AL0),
+    array.unsafe_set(Buckets0, H, ht_cons(K, V, AL0), Buckets).
 
 %-----------------------------------------------------------------------------%
 
@@ -565,12 +614,12 @@ munge(N, X) =
 fold(F, HT, X0) = X :-
     foldl(fold_f(F), HT ^ buckets, X0, X).
 
-:- pred fold_f(func(K, V, T) = T, assoc_list(K, V), T, T).
+:- pred fold_f(func(K, V, T) = T, hash_table_alist(K, V), T, T).
 :- mode fold_f(func(in, in, in) = out is det, in, in, out) is det.
 :- mode fold_f(func(in, in, di) = uo is det, in, di, uo) is det.
 
-fold_f(_F, [], !A).
-fold_f(F, [K - V | KVs], !A) :-
+fold_f(_F, ht_nil, !A).
+fold_f(F, ht_cons(K, V, KVs), !A) :-
     F(K, V, !.A) = !:A,
     fold_f(F, KVs, !A).
 
@@ -578,12 +627,12 @@ fold_f(F, [K - V | KVs], !A) :-
 fold(P, HT, !A) :-
     foldl(fold_p(P), HT ^ buckets, !A).
 
-:- pred fold_p(pred(K, V, T, T), assoc_list(K, V), T, T).
+:- pred fold_p(pred(K, V, T, T), hash_table_alist(K, V), T, T).
 :- mode fold_p(pred(in, in, in, out) is det, in, in, out) is det.
 :- mode fold_p(pred(in, in, di, uo) is det, in, di, uo) is det.
 
-fold_p(_P, [], !A).
-fold_p(P, [K - V | KVs], !A) :-
+fold_p(_P, ht_nil, !A).
+fold_p(P, ht_cons(K, V, KVs), !A) :-
     P(K, V, !A),
     fold_p(P, KVs, !A).
 
diff --git a/library/version_hash_table.m b/library/version_hash_table.m
index c965282..c77b869 100644
--- a/library/version_hash_table.m
+++ b/library/version_hash_table.m
@@ -196,9 +196,15 @@
                 num_occupants           :: int,
                 max_occupants           :: int,
                 hash_pred               :: hash_pred(K),
-                buckets                 :: version_array(assoc_list(K, V))
+                buckets                 :: buckets(K, V)
             ).
 
+:- type buckets(K, V) == version_array(hash_table_alist(K, V)).
+
+:- type hash_table_alist(K, V)
+    --->    ht_nil
+    ;       ht_cons(K, V, hash_table_alist(K, V)).
+
 %-----------------------------------------------------------------------------%
 
 new(HashPred, N, MaxOccupancy) = new_2(HashPred, N, MaxOccupancy, yes).
@@ -222,10 +228,10 @@ new_2(HashPred, N, MaxOccupancy, NeedSafety) = HT :-
             MaxOccupants = ceiling_to_int(float(NumBuckets) * MaxOccupancy),
             (
                 NeedSafety = yes,
-                Buckets = version_array.new(NumBuckets, [])
+                Buckets = version_array.new(NumBuckets, ht_nil)
             ;
                 NeedSafety = no,
-                Buckets = version_array.unsafe_new(NumBuckets, [])
+                Buckets = version_array.unsafe_new(NumBuckets, ht_nil)
             ),
             HT = ht(0, MaxOccupants, HashPred, Buckets)
     ).
@@ -280,11 +286,11 @@ find_slot_2(HashPred, K, NumBuckets, H) :-
 set(!.HT, K, V) = !:HT :-
     H = find_slot(!.HT, K),
     AL0 = !.HT ^ buckets ^ elem(H),
-    ( if assoc_list.remove(AL0, K, _, AL1) then
-        AL = [K - V | AL1],
+    ( if alist_replace(AL0, K, V, AL1) then
+        AL = AL1,
         MayExpand = no
       else
-        AL = [K - V | AL0],
+        AL = ht_cons(K, V, AL0),
         MayExpand = yes
     ),
     !HT ^ buckets ^ elem(H) := AL,
@@ -299,6 +305,17 @@ set(!.HT, K, V) = !:HT :-
 
 set(K, V, HT, set(HT, K, V)).
 
+:- pred alist_replace(hash_table_alist(K, V)::in, K::in, V::in,
+    hash_table_alist(K, V)::out) is semidet.
+
+alist_replace(ht_cons(HK, HV, T), K, V, AList) :-
+    ( if HK = K then
+        AList = ht_cons(K, V, T)
+      else
+        alist_replace(T, K, V, AList0),
+        AList = ht_cons(HK, HV, AList0)
+    ).
+
 %-----------------------------------------------------------------------------%
 
 search(HT, K, search(HT, K)).
@@ -306,18 +323,27 @@ search(HT, K, search(HT, K)).
 search(HT, K) = V :-
     H = find_slot(HT, K),
     AL = HT ^ buckets ^ elem(H),
-    assoc_list.search(AL, K, V).
+    alist_search(AL, K, V).
+
+:- pred alist_search(hash_table_alist(K, V)::in, K::in, V::out) is semidet.
+
+alist_search(ht_cons(HK, HV, T), K, V) :-
+    ( if HK = K then
+        HV = V
+      else
+        alist_search(T, K, V)
+    ).
 
 %-----------------------------------------------------------------------------%
 
 det_insert(!.HT, K, V) = !:HT :-
     H = find_slot(!.HT, K),
     AL0 = !.HT ^ buckets ^ elem(H),
-    ( if assoc_list.search(AL0, K, _) then
+    ( if alist_search(AL0, K, _) then
         throw(software_error(
             "version_hash_table.det_insert: key already present"))
       else
-        AL = [K - V | AL0]
+        AL = ht_cons(K, V, AL0)
     ),
     !HT ^ buckets ^ elem(H) := AL,
     increase_occupants(!HT).
@@ -329,8 +355,8 @@ det_insert(K, V, HT, det_insert(HT, K, V)).
 det_update(HT0, K, V) = HT :-
     H = find_slot(HT0, K),
     AL0 = HT0 ^ buckets ^ elem(H),
-    ( if assoc_list.remove(AL0, K, _, AL1) then
-        AL = [K - V | AL1]
+    ( if alist_replace(AL0, K, V, AL1) then
+        AL = AL1
       else
         throw(software_error("version_hash_table.det_update: key not found"))
     ),
@@ -353,7 +379,7 @@ elem(K, HT) = lookup(HT, K).
 delete(HT0, K) = HT :-
     H = find_slot(HT0, K),
     AL0 = HT0 ^ buckets ^ elem(H),
-    ( if assoc_list.remove(AL0, K, _, AL) then
+    ( if alist_remove(AL0, K, AL) then
         HT0 = ht(NumOccupants0, MaxOccupants, HashPred, Buckets0),
         Buckets = Buckets0 ^ elem(H) := AL,
         NumOccupants = NumOccupants0 - 1,
@@ -364,10 +390,29 @@ delete(HT0, K) = HT :-
 
 delete(K, HT, delete(HT, K)).
 
+:- pred alist_remove(hash_table_alist(K, V)::in, K::in,
+    hash_table_alist(K, V)::out) is semidet.
+
+alist_remove(ht_cons(HK, HV, T), K, AList) :-
+    ( if HK = K then
+        AList = T
+      else
+        alist_remove(T, K, AList0),
+        AList = ht_cons(HK, HV, AList0)
+    ).
+
 %-----------------------------------------------------------------------------%
 
 to_assoc_list(HT) =
-    foldl(list.append, HT ^ buckets, []).
+    foldl(to_assoc_list_2, HT ^ buckets, []).
+
+:- func to_assoc_list_2(hash_table_alist(K, V), assoc_list(K, V))
+    = assoc_list(K, V).
+
+to_assoc_list_2(ht_nil, AList) = AList.
+
+to_assoc_list_2(ht_cons(K, V, T), AList) =
+    to_assoc_list_2(T, [K - V | AList]).
 
 
 from_assoc_list(HP, AList) = from_assoc_list_2(AList, new_default(HP)).
@@ -407,45 +452,43 @@ expand(HT0, HT) :-
     MaxOccupants = MaxOccupants0 + MaxOccupants0,
 
     unsafe_hash_pred_cast(HashPred0, HashPred),
-    Buckets1 = init(NumBuckets, []),
+    Buckets1 = init(NumBuckets, ht_nil),
     reinsert_bindings(0, Buckets0, HashPred, NumBuckets, Buckets1, Buckets),
 
     HT = ht(NumOccupants0 + 1, MaxOccupants, HashPred, Buckets).
 
-:- pred reinsert_bindings(int, version_array(assoc_list(K, V)), hash_pred(K),
-    int, version_array(assoc_list(K, V)), version_array(assoc_list(K,V))).
-:- mode reinsert_bindings(in, in, in(hash_pred),
-    in, in, out) is det.
+:- pred reinsert_bindings(int::in, buckets(K, V)::in,
+    hash_pred(K)::in(hash_pred), int::in,
+    buckets(K, V)::in, buckets(K, V)::out) is det.
 
 reinsert_bindings(I, OldBuckets, HashPred, NumBuckets, !Buckets) :-
     ( if I >= size(OldBuckets) then
         true
       else
         AL = OldBuckets ^ elem(I),
-        reinsert_assoc_list(AL, HashPred, NumBuckets, !Buckets),
+        reinsert_alist(AL, HashPred, NumBuckets, !Buckets),
         reinsert_bindings(I + 1, OldBuckets, HashPred, NumBuckets, !Buckets)
     ).
 
-:- pred reinsert_assoc_list(assoc_list(K, V), hash_pred(K),
-    int, version_array(assoc_list(K, V)), version_array(assoc_list(K, V))).
-:- mode reinsert_assoc_list(in, in(hash_pred),
-    in, in, out) is det.
+:- pred reinsert_alist(hash_table_alist(K, V)::in, hash_pred(K)::in(hash_pred),
+    int::in, buckets(K, V)::in, buckets(K, V)::out) is det.
 
-reinsert_assoc_list([], _, _, !Buckets).
-reinsert_assoc_list([KV | KVs], HashPred, NumBuckets, !Buckets) :-
-    unsafe_insert(KV, HashPred, NumBuckets, !Buckets),
-    reinsert_assoc_list(KVs, HashPred, NumBuckets, !Buckets).
+reinsert_alist(AL, HashPred, NumBuckets, !Buckets) :-
+    (
+        AL = ht_nil
+    ;
+        AL = ht_cons(K, V, T),
+        unsafe_insert(K, V, HashPred, NumBuckets, !Buckets),
+        reinsert_alist(T, HashPred, NumBuckets, !Buckets)
+    ).
 
-:- pred unsafe_insert(pair(K, V), hash_pred(K),
-    int, version_array(assoc_list(K, V)), version_array(assoc_list(K, V))).
-:- mode unsafe_insert(in, in(hash_pred),
-    in, in, out) is det.
+:- pred unsafe_insert(K::in, V::in, hash_pred(K)::in(hash_pred), int::in,
+    buckets(K, V)::in, buckets(K, V)::out) is det.
 
-unsafe_insert(KV, HashPred, NumBuckets, Buckets0, Buckets) :-
-    KV = K - _,
+unsafe_insert(K, V, HashPred, NumBuckets, Buckets0, Buckets) :-
     find_slot_2(HashPred, K, NumBuckets, H),
     AL0 = Buckets0 ^ elem(H),
-    Buckets = Buckets0 ^ elem(H) := [KV | AL0].
+    Buckets = Buckets0 ^ elem(H) := ht_cons(K, V, AL0).
 
 %-----------------------------------------------------------------------------%
 
@@ -568,12 +611,12 @@ munge(N, X) =
 fold(F, HT, X0) = X :-
     foldl(fold_f(F), HT ^ buckets, X0, X).
 
-:- pred fold_f(func(K, V, T) = T, assoc_list(K, V), T, T).
+:- pred fold_f(func(K, V, T) = T, hash_table_alist(K, V), T, T).
 :- mode fold_f(func(in, in, in) = out is det, in, in, out) is det.
 :- mode fold_f(func(in, in, di) = uo is det, in, di, uo) is det.
 
-fold_f(_F, [], !A).
-fold_f(F, [K - V | KVs], !A) :-
+fold_f(_F, ht_nil, !A).
+fold_f(F, ht_cons(K, V, KVs), !A) :-
     F(K, V, !.A) = !:A,
     fold_f(F, KVs, !A).
 
@@ -581,12 +624,12 @@ fold_f(F, [K - V | KVs], !A) :-
 fold(P, HT, !A) :-
     foldl(fold_p(P), HT ^ buckets, !A).
 
-:- pred fold_p(pred(K, V, T, T), assoc_list(K, V), T, T).
+:- pred fold_p(pred(K, V, T, T), hash_table_alist(K, V), T, T).
 :- mode fold_p(pred(in, in, in, out) is det, in, in, out) is det.
 :- mode fold_p(pred(in, in, di, uo) is det, in, di, uo) is det.
 
-fold_p(_P, [], !A).
-fold_p(P, [K - V | KVs], !A) :-
+fold_p(_P, ht_nil, !A).
+fold_p(P, ht_cons(K, V, KVs), !A) :-
     P(K, V, !A),
     fold_p(P, KVs, !A).
 

--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to:       mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions:          mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------



More information about the reviews mailing list