[m-rev.] for review: rationalise hash functions across the standard library

Julien Fischer jfischer at opturion.com
Tue Feb 11 11:42:34 AEDT 2020


For review by Peter.

We discussed this on the developers list back in August 2019.

--------------------------------------------------------------------

Rationalise hash functions across the standard library.

Currently, the hash functions for (some of) the primitive types are defined in
three places:

    1. The hash_table module.
    2. The version_hash_table module (duplicates of the above).
    3. Some (but not all) of the library modules for the primitive types.

This change makes the library module for a primitive type provide the hash
function for that type and deprecates the versions in the hash table modules.

Additionally, deprecate the "generic" has functions in the hash table modules.

library/hash_table.m:
library/version_hash_table.m:
     As above.

library/char.m:
library/int.m:
library/uint.m:
     Add hash/1 and hash/2.

library/float.m:
     Add hash/2.

library/robdd.m:
     Replace a call to the deprecated function.

NEWS:
     Announce the above additions and deprecations.

tests/hard_coded/hash_table_delete.m:
tests/hard_coded/hash_table_test.m:
tests/hard_coded/version_hash_table_delete.m:
tests/hard_coded/version_hash_table_test.m:
     Conform to the above change.

Julien.

diff --git a/NEWS b/NEWS
index 488d2c3..f056468 100644
--- a/NEWS
+++ b/NEWS
@@ -4,13 +4,43 @@ NEWS since Mercury 20.01.x
  Changes to the Mercury standard library
  ---------------------------------------

+### Changes to the `char` module
+
+* The following function and predicate have been added:
+
+    - func `hash/1`
+    - pred `hash/2`
+
+### Changes to the `float` module
+
+* The following predicate has been added:
+
+    - pred `hash/2`
+
  ### Changes to the `hash_table` module

+* The following predicates have been deprecated and will be removed in a future
+  release:
+
+    - pred `int_hash/2`
+    - pred `uint_hash/2`
+    - pred `float_hash/2`
+    - pred `char_hash/2`
+    - pred `string_hash/2`
+    - pred `generic_hash/2`
+
  * The following obsolete functions have been removed:

      - func `new/3`
      - func `new_default/1`

+### Changes to the `int` module
+
+* The following function and predicate have been added:
+
+    - func `hash/1`
+    - pred `hash/2`
+
  ### Changes to the `map` module

  * The following function and predicate have been added:
@@ -18,12 +48,31 @@ Changes to the Mercury standard library
      - func `keys_as_set/1`
      - pred `keys_as_set/2`

+### Changes to the `uint` module
+
+* The following function and predicate have been added:
+
+    - func `hash/1`
+    - pred `hash/2`
+
  ### Changes to the `version_array` module

  * The following obsolete function has been removed:

      - func `unsafe_new/2`

+### Changes to the `version_hash_table` module
+
+* The following predicates have been deprecated and will be removed in a future
+  release:
+
+    - pred `int_hash/2`
+    - pred `uint_hash/2`
+    - pred `float_hash/2`
+    - pred `char_hash/2`
+    - pred `string_hash/2`
+    - pred `generic_hash/2`
+
  NEWS for Mercury 20.01.1
  ========================

diff --git a/library/char.m b/library/char.m
index 04ecac3..59e2459 100644
--- a/library/char.m
+++ b/library/char.m
@@ -397,6 +397,16 @@
  :- pred det_int_to_digit(int::in, char::out) is det.

  %---------------------------------------------------------------------------%
+%
+% Computing hashes of charrs.
+%
+
+    % Compute a hash value for a char.
+    %
+:- func hash(char) = int.
+:- pred hash(char::in, int::out) is det.
+
+%---------------------------------------------------------------------------%
  %---------------------------------------------------------------------------%

  :- implementation.
@@ -404,6 +414,7 @@
  :- import_module int.
  :- import_module require.
  :- import_module term_io.
+:- import_module uint.

  :- instance enum(character) where [
      (to_int(X) = Y :-
@@ -1162,5 +1173,13 @@ int_to_extended_digit(34, 'Y').
  int_to_extended_digit(35, 'Z').

  %---------------------------------------------------------------------------%
+
+hash(C) = H :-
+    uint.hash(uint.cast_from_int(char.to_int(C)), H).
+
+hash(C, H) :-
+    H = hash(C).
+
+%---------------------------------------------------------------------------%
  :- end_module char.
  %---------------------------------------------------------------------------%
diff --git a/library/float.m b/library/float.m
index 0d246d8..120ef09 100644
--- a/library/float.m
+++ b/library/float.m
@@ -2,7 +2,7 @@
  % vim: ft=mercury ts=4 sw=4 et
  %---------------------------------------------------------------------------%
  % Copyright (C) 1994-1998,2001-2008,2010, 2012 The University of Melbourne.
-% Copyright (C) 2013-2016, 2018 The Mercury team.
+% Copyright (C) 2013-2016, 2018-2020 The Mercury team.
  % This file is distributed under the terms specified in COPYING.LIB.
  %---------------------------------------------------------------------------%
  %
@@ -224,6 +224,7 @@
      % Compute a non-negative integer hash value for a float.
      %
  :- func hash(float) = int.
+:- pred hash(float::in, int::out) is det.

  %---------------------------------------------------------------------------%
  %
@@ -973,6 +974,9 @@ multiply_by_pow(Scale0, Base, Exp) = Result :-
      H = erlang:phash2(F)
  ").

+hash(F, H) :-
+    H = hash(F).
+
  %---------------------------------------------------------------------------%

  is_infinite(F) :-
diff --git a/library/hash_table.m b/library/hash_table.m
index 5b412cd..81a6281 100644
--- a/library/hash_table.m
+++ b/library/hash_table.m
@@ -254,11 +254,17 @@

      % Default hash_preds for ints and strings and everything (buwahahaha!)
      %
+:- pragma obsolete(int_hash/2, [int.hash/2]).
  :- pred int_hash(int::in, int::out) is det.
+:- pragma obsolete(uint_hash/2, [uint.hash/2]).
  :- pred uint_hash(uint::in, int::out) is det.
+:- pragma obsolete(float_hash/2, [float.hash/2]).
  :- pred float_hash(float::in, int::out) is det.
+:- pragma obsolete(char_hash/2, [char.hash/2]).
  :- pred char_hash(char::in, int::out) is det.
+:- pragma obsolete(string_hash/2, [string.hash/2]).
  :- pred string_hash(string::in, int::out) is det.
+:- pragma obsolete(generic_hash/2).
  :- pred generic_hash(T::in, int::out) is det.

  %---------------------------------------------------------------------------%
@@ -774,50 +780,23 @@ fold3_p(P, List, !A, !B, !C) :-

  %---------------------------------------------------------------------------%

-% The integer hash functions below are originally from:
-%
-%  http://www.concentric.net/~Ttwang/tech/inthash.htm
-%
-% The above link is now dead; the last version can be found at:
-%
-%  https://web.archive.org/web/20121102023700/http://www.concentric.net/~Ttwang/tech/inthash.htm
-%
-% The algorithms from that page that we use are:
-%
-%   public int hash32shiftmult(int key)
-%   public long hash64shift(long key)
-
  int_hash(Key, Hash) :-
      UKey = uint.cast_from_int(Key),
      uint_hash(UKey, Hash).

-uint_hash(!.Key, Hash) :-
-    C2 = 0x_27d4_eb2d_u, % A prime or odd constant.
-    ( if bits_per_uint = 32 then
-        !:Key = (!.Key `xor` 61_u) `xor` (!.Key >> 16),
-        !:Key = !.Key + (!.Key << 3),
-        !:Key = !.Key `xor` (!.Key >> 4),
-        !:Key = !.Key * C2,
-        !:Key = !.Key `xor` (!.Key >> 15)
-    else
-        !:Key = (\ !.Key) + (!.Key << 21), % !:Key = (!.Key << 21) - !.Key - 1
-        !:Key = !.Key `xor` (!.Key >> 24),
-        !:Key = (!.Key + (!.Key << 3)) + (!.Key << 8), % !.Key * 265
-        !:Key = !.Key `xor` (!.Key >> 14),
-        !:Key = (!.Key + (!.Key << 2)) + (!.Key << 4), % !.Key * 21
-        !:Key = !.Key `xor` (!.Key >> 28),
-        !:Key = !.Key + (!.Key << 31)
-    ),
-    Hash = uint.cast_to_int(!.Key).
+uint_hash(Key, Hash) :-
+    uint.hash(Key, Hash).

-float_hash(F, float.hash(F)).
+float_hash(F, Hash) :-
+    float.hash(F, Hash).
      % There are almost certainly better ones out there...

  char_hash(C, H) :-
+    char.hash(C, H).
      % There are almost certainly better ones out there...
-    int_hash(char.to_int(C), H).

-string_hash(S, string.hash(S)).
+string_hash(S, H) :-
+    string.hash(S, H).
      % There are almost certainly better ones out there...

  %---------------------------------------------------------------------------%
diff --git a/library/int.m b/library/int.m
index 071c18b..f2e6c9e 100644
--- a/library/int.m
+++ b/library/int.m
@@ -2,7 +2,7 @@
  % vim: ft=mercury ts=4 sw=4 et
  %---------------------------------------------------------------------------%
  % Copyright (C) 1994-2012 The University of Melbourne.
-% Copyright (C) 2013-2018 The Mercury team.
+% Copyright (C) 2013-2018, 2020 The Mercury team.
  % This file is distributed under the terms specified in COPYING.LIB.
  %---------------------------------------------------------------------------%
  %
@@ -473,6 +473,16 @@
  :- func int_to_doc(int) = pretty_printer.doc.

  %---------------------------------------------------------------------------%
+%
+% Computing hashes of ints.
+%
+
+    % Compute a hash value for an int.
+    %
+:- func hash(int) = int.
+:- pred hash(int::in, int::out) is det.
+
+%---------------------------------------------------------------------------%
  %---------------------------------------------------------------------------%

  :- implementation.
@@ -483,13 +493,18 @@

  %---------------------------------------------------------------------------%

+% XXX the module qualification in these promises is necessary because
+% otherwise the compiler gets confused by the import of the uint module
+% in the implementation section of this one.
+
      % commutativity and associativity of +
-:- promise all [A, B, C]        ( C = B + A <=> C = A + B ).
-:- promise all [A, B, C, ABC]   ( ABC = (A + B) + C <=> ABC = A + (B + C) ).
+:- promise all [A, B, C] ( C = int.(B + A) <=> C = int.(A + B) ).
+:- promise all [A, B, C, ABC] ( ABC = int.(A + B) + C <=>
+    ABC = A + int.(B + C) ).

      % commutativity and associativity of *
-:- promise all [A, B, C]        ( C = B * A <=> C = A * B ).
-:- promise all [A, B, C, ABC]   ( ABC = (A * B) * C <=> ABC = A * (B * C) ).
+:- promise all [A, B, C] ( C = int.(B * A) <=> C = A * B ).
+:- promise all [A, B, C, ABC] ( ABC = int.(A * B) * C <=> ABC = A * (B * C) ).

  %---------------------------------------------------------------------------%

@@ -530,6 +545,7 @@
  :- import_module exception.
  :- import_module math.
  :- import_module string.
+:- import_module uint.

  %---------------------------------------------------------------------------%

@@ -1056,6 +1072,15 @@ int_to_doc(X) = str(string.int_to_string(X)).

  %---------------------------------------------------------------------------%

+hash(Int) = Hash :-
+    UInt = uint.cast_from_int(Int),
+    Hash = uint.hash(UInt).
+
+hash(Int, Hash) :-
+    Hash = int.hash(Int).
+
+%---------------------------------------------------------------------------%
+
  :- pragma inline(floor_to_multiple_of_bits_per_int/1).

  floor_to_multiple_of_bits_per_int(X) = Floor :-
diff --git a/library/robdd.m b/library/robdd.m
index b825430..5eded7a 100644
--- a/library/robdd.m
+++ b/library/robdd.m
@@ -407,7 +407,6 @@

  :- import_module assoc_list.
  :- import_module bool.
-:- import_module hash_table.
  :- import_module int.
  :- import_module multi_map.
  :- import_module pair.
@@ -1178,7 +1177,7 @@ restrict_true_false_vars_2(TrueVars0, FalseVars0, R0, R, Seen0, Seen) :-
  :- pred robdd_hash(robdd(T)::in, int::out) is det.

  robdd_hash(R, H) :-
-    int_hash(node_num(R), H).
+    int.hash(node_num(R), H).

  restrict_filter(P, F0) =
      restrict_filter(P, (pred(_::in) is semidet :- true), F0).
diff --git a/library/uint.m b/library/uint.m
index d9172cb..604e90e 100644
--- a/library/uint.m
+++ b/library/uint.m
@@ -1,7 +1,7 @@
  %---------------------------------------------------------------------------%
  % vim: ft=mercury ts=4 sw=4 et
  %---------------------------------------------------------------------------%
-% Copyright (C) 2016-2018 The Mercury team.
+% Copyright (C) 2016-2020 The Mercury team.
  % This file is distributed under the terms specified in COPYING.LIB.
  %---------------------------------------------------------------------------%
  %
@@ -187,6 +187,16 @@
  :- func uint_to_doc(uint) = pretty_printer.doc.

  %---------------------------------------------------------------------------%
+%
+% Computing hashes of uints.
+%
+
+    % Compute a hash value for a uint.
+    %
+:- func hash(uint) = int.
+:- pred hash(uint::in, int::out) is det.
+
+%---------------------------------------------------------------------------%
  %---------------------------------------------------------------------------%

  :- implementation.
@@ -422,5 +432,42 @@ odd(X) :-
  uint_to_doc(X) = str(string.uint_to_string(X)).

  %---------------------------------------------------------------------------%
+
+% The integer hash functions below are originally from:
+%
+%  http://www.concentric.net/~Ttwang/tech/inthash.htm
+%
+% The above link is now dead; the last version can be found at:
+%
+%  https://web.archive.org/web/20121102023700/http://www.concentric.net/~Ttwang/tech/inthash.htm
+%
+% The algorithms from that page that we use are:
+%
+%   public int hash32shiftmult(int key)
+%   public long hash64shift(long key)
+
+hash(!.Key) = Hash :-
+    C2 = 0x_27d4_eb2d_u, % A prime or odd constant.
+    ( if bits_per_uint = 32 then
+        !:Key = (!.Key `xor` 61_u) `xor` (!.Key >> 16),
+        !:Key = !.Key + (!.Key << 3),
+        !:Key = !.Key `xor` (!.Key >> 4),
+        !:Key = !.Key * C2,
+        !:Key = !.Key `xor` (!.Key >> 15)
+    else
+        !:Key = (\ !.Key) + (!.Key << 21), % !:Key = (!.Key << 21) - !.Key - 1
+        !:Key = !.Key `xor` (!.Key >> 24),
+        !:Key = (!.Key + (!.Key << 3)) + (!.Key << 8), % !.Key * 265
+        !:Key = !.Key `xor` (!.Key >> 14),
+        !:Key = (!.Key + (!.Key << 2)) + (!.Key << 4), % !.Key * 21
+        !:Key = !.Key `xor` (!.Key >> 28),
+        !:Key = !.Key + (!.Key << 31)
+    ),
+    Hash = uint.cast_to_int(!.Key).
+
+hash(UInt, Hash) :-
+    Hash = hash(UInt).
+
+%---------------------------------------------------------------------------%
  :- end_module uint.
  %---------------------------------------------------------------------------%
diff --git a/library/version_hash_table.m b/library/version_hash_table.m
index 50bab20..db128f2 100644
--- a/library/version_hash_table.m
+++ b/library/version_hash_table.m
@@ -2,7 +2,7 @@
  % vim: ft=mercury ts=4 sw=4 et
  %---------------------------------------------------------------------------%
  % Copyright (C) 2004-2006, 2010-2012 The University of Melbourne.
-% Copyright (C) 2013-2015, 2017-2018 The Mercury team.
+% Copyright (C) 2013-2015, 2017-2020 The Mercury team.
  % This file is distributed under the terms specified in COPYING.LIB.
  %---------------------------------------------------------------------------%
  %
@@ -93,11 +93,17 @@
      % They are very simple and almost certainly not very good
      % for your purpose, whatever your purpose is.
      %
+:- pragma obsolete(int_hash/2, [int.hash/2]).
  :- pred int_hash(int::in, int::out) is det.
+:- pragma obsolete(uint_hash/2, [uint.hash/2]).
  :- pred uint_hash(uint::in, int::out) is det.
+:- pragma obsolete(char_hash/2, [char.hash/2]).
  :- pred char_hash(char::in, int::out) is det.
+:- pragma obsolete(string_hash/2, [string.hash/2]).
  :- pred string_hash(string::in, int::out) is det.
+:- pragma obsolete(float_hash/2, [float.hash/2]).
  :- pred float_hash(float::in, int::out) is det.
+:- pragma obsolete(generic_hash/2).
  :- pred generic_hash(T::in, int::out) is det.

      % Copy the hash table explicitly.
@@ -341,54 +347,20 @@ find_slot_2(HashPred, K, NumBuckets, H) :-

  %---------------------------------------------------------------------------%

-    % The integer hash functions below are originally from:
-    %
-    %   http://www.concentric.net/~Ttwang/tech/inthash.htm
-    %
-    % The above link is now dead; the last version can be found at:
-    %
-    %   https://web.archive.org/web/20121102023700/http://www.concentric.net/~Ttwang/tech/inthash.htm
-    %
-    % The algorithms from that page that we use are:
-    %
-    %   public int hash32shiftmult(int key)
-    %   public long hash64shift(long key)
-    %
  int_hash(Key, Hash) :-
-    UKey = uint.cast_from_int(Key),
-    uint_hash(UKey, Hash).
-
-uint_hash(!.Key, Hash) :-
-    C2 = 0x_27d4_eb2d_u, % A prime or odd constant.
-    ( if bits_per_uint = 32 then
-        !:Key = (!.Key `xor` 61_u) `xor` (!.Key >> 16),
-        !:Key = !.Key + (!.Key << 3),
-        !:Key = !.Key `xor` (!.Key >> 4),
-        !:Key = !.Key * C2,
-        !:Key = !.Key `xor` (!.Key >> 15)
-    else
-        !:Key = (\ !.Key) + (!.Key << 21), % !:Key = (!.Key << 21) - !.Key - 1
-        !:Key = !.Key `xor` (!.Key >> 24),
-        !:Key = (!.Key + (!.Key << 3)) + (!.Key << 8), % !.Key * 265
-        !:Key = !.Key `xor` (!.Key >> 14),
-        !:Key = (!.Key + (!.Key << 2)) + (!.Key << 4), % !.Key * 21
-        !:Key = !.Key `xor` (!.Key >> 28),
-        !:Key = !.Key + (!.Key << 31)
-    ),
-    Hash = uint.cast_to_int(!.Key).
+    int.hash(Key, Hash).
+
+uint_hash(Key, Hash) :-
+    uint.hash(Key, Hash).

-    % There are almost certainly better ones out there...
-    %
  char_hash(C, H) :-
-    int_hash(char.to_int(C), H).
+    char.hash(C, H).

-    % There are almost certainly better ones out there...
-    %
-string_hash(S, string.hash(S)).
+string_hash(S, H) :-
+    string.hash(S, H).

-    % There are almost certainly better ones out there...
-    %
-float_hash(F, float.hash(F)).
+float_hash(F, H) :-
+    float.hash(F, H).

  generic_hash(T, H) :-
      % This, again, is straight off the top of my head.
diff --git a/tests/hard_coded/hash_table_delete.m b/tests/hard_coded/hash_table_delete.m
index 9e7d80a..37d718b 100644
--- a/tests/hard_coded/hash_table_delete.m
+++ b/tests/hard_coded/hash_table_delete.m
@@ -22,7 +22,7 @@

  main(!IO) :-
      some [!HT] (
-        !:HT = hash_table.init_default(generic_hash),
+        !:HT = hash_table.init_default(string.hash),
          myfoldl(fill, keys, !HT),
          myfoldl(hash_table.delete, keys, !HT),
          Residue = hash_table.to_assoc_list(!.HT),
diff --git a/tests/hard_coded/hash_table_test.m b/tests/hard_coded/hash_table_test.m
index f443217..4c3a398 100644
--- a/tests/hard_coded/hash_table_test.m
+++ b/tests/hard_coded/hash_table_test.m
@@ -38,7 +38,7 @@ main(!IO) :-
      Max = string.det_to_int(A),
      MaxOccupancy = string.det_to_float(B),
      some [!HT] (
-        !:HT = hash_table.init(int_hash, 1, MaxOccupancy),
+        !:HT = hash_table.init(int.hash, 1, MaxOccupancy),

          io.write_string("Inserting elements\n", !IO),
          inst_preserving_fold_up(do_insert, 0, Max - 1, !HT),
diff --git a/tests/hard_coded/version_hash_table_delete.m b/tests/hard_coded/version_hash_table_delete.m
index 0412ab2..bd80f4a 100644
--- a/tests/hard_coded/version_hash_table_delete.m
+++ b/tests/hard_coded/version_hash_table_delete.m
@@ -22,7 +22,7 @@

  main(!IO) :-
      some [!HT] (
-        !:HT = version_hash_table.init_default(generic_hash),
+        !:HT = version_hash_table.init_default(string.hash),
          list.foldl(fill, keys, !HT),
          list.foldl(version_hash_table.delete, keys, !HT),
          Residue = version_hash_table.to_assoc_list(!.HT),
diff --git a/tests/hard_coded/version_hash_table_test.m b/tests/hard_coded/version_hash_table_test.m
index d258203..0f2b22b 100644
--- a/tests/hard_coded/version_hash_table_test.m
+++ b/tests/hard_coded/version_hash_table_test.m
@@ -16,6 +16,7 @@

  :- import_module list.
  :- import_module pair.
+:- import_module string.
  :- import_module version_hash_table.

  %---------------------------------------------------------------------------%
@@ -23,7 +24,7 @@
  main(!IO) :-
      % Test `fold' which had an off-by-one bug.
      some [!HT] (
-        !:HT = version_hash_table.init_default(generic_hash),
+        !:HT = version_hash_table.init_default(string.hash),
          version_hash_table.set("one", 1, !HT),
          version_hash_table.set("two", 2, !HT),
          version_hash_table.set("three", 3, !HT),


More information about the reviews mailing list