hash_map
Fergus Henderson
fjh at cs.mu.oz.au
Fri May 2 00:51:40 AEST 1997
Hi,
I did an implementation of hash_map, i.e. maps implemented using hash tables.
The code (which follows) is type and mode-correct, but not unique-mode-correct.
Making it unique-mode-correct requires fixing the compiler: as well as
Andrew's changes, which will make `ui' work, we also need to solve the
problem with uniq_array__lookup mentioned in my other mail.
%-----------------------------------------------------------------------------%
% Copyright (C) 1997 University of Melbourne.
% This file may only be copied under the terms of the GNU Library General
% Public License - see the file COPYING.LIB in the Mercury distribution.
%-----------------------------------------------------------------------------%
%
% File: hash_map.m.
% Main author: fjh.
% Stability: medium.
%
% This file provides an implementation of maps using hash tables.
% A map (also known as a dictionary or an associative array) is a collection
% of (Key,Data) pairs which allows you to look up any Data item given the Key.
% This implementation gives you O(1) lookup, assuming your hash values
% are evenly distributed, or O(log N) in the worst case.
% Updates are O(1) amortized; unique modes are used to ensure that
% the implementation can use destructive update.
%
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
:- module hash_map.
:- interface.
:- import_module set, list, assoc_list.
%-----------------------------------------------------------------------------%
:- type hash_map(Key, Value).
% :- inst uniq_hash_map(K, V).
:- mode di_hash_map(K, V) :: uniq_hash_map(K, V) -> dead.
:- mode di_hash_map :: uniq_hash_map(ground, ground) -> dead.
:- mode uo_hash_map(K, V) :: free -> uniq_hash_map(K, V).
:- mode uo_hash_map :: free -> uniq_hash_map(ground, ground).
%-----------------------------------------------------------------------------%
% Initialize an empty map.
:- pred hash_map__init(hash_map(_,_)).
:- mode hash_map__init(uo) is det.
% Check whether a map is empty.
:- pred hash_map__is_empty(hash_map(_,_)).
% :- mode hash_map__is_empty(ui) is semidet.
:- mode hash_map__is_empty(in) is semidet.
% Check whether map contains key
:- pred hash_map__contains(hash_map(K,_V), K).
% :- mode hash_map__contains(ui, in) is semidet.
:- mode hash_map__contains(in, in) is semidet.
:- pred hash_map__member(hash_map(K,V), K, V).
% :- mode hash_map__member(ui, out, out) is nondet.
:- mode hash_map__member(in, out, out) is nondet.
% Search map for key.
:- pred hash_map__search(hash_map(K,V), K, V).
% :- mode hash_map__search(ui, in, out) is semidet.
:- mode hash_map__search(in, in, out) is semidet.
% Search map for key, but abort if search fails.
:- pred hash_map__lookup(hash_map(K,V), K, V).
% :- mode hash_map__lookup(ui, in, out) is det.
:- mode hash_map__lookup(in, in, out) is det.
% Search map for data.
:- pred hash_map__inverse_search(hash_map(K,V), V, K).
% :- mode hash_map__inverse_search(mui, in, out) is nondet.
:- mode hash_map__inverse_search(in, in, out) is nondet.
% Insert a new key and corresponding value into a map.
% Fail if the key already exists.
:- pred hash_map__insert(hash_map(K,V), K, V, hash_map(K,V)).
:- mode hash_map__insert(di, di, di, uo) is semidet.
% Insert a new key and corresponding value into a map.
% Abort if the key already exists.
:- pred hash_map__det_insert(hash_map(K,V), K, V, hash_map(K,V)).
:- mode hash_map__det_insert(di, di, di, uo) is det.
% Apply hash_map__det_insert to key - value pairs from corresponding
% lists.
:- pred hash_map__det_insert_from_corresponding_lists(hash_map(K,V), list(K),
list(V), hash_map(K,V)).
:- mode hash_map__det_insert_from_corresponding_lists(di, in, in, uo) is det.
% Update the value corresponding to a given key
% Fail if the key doesn't already exist.
:- pred hash_map__update(hash_map(K,V), K, V, hash_map(K,V)).
:- mode hash_map__update(di, in, in, uo) is semidet.
% Update the value corresponding to a given key
% Abort if the key doesn't already exist.
:- pred hash_map__det_update(hash_map(K,V), K, V, hash_map(K,V)).
:- mode hash_map__det_update(di, in, in, uo) is det.
% Update value if the key is already present, otherwise
% insert new key and value.
:- pred hash_map__set(hash_map(K,V), K, V, hash_map(K,V)).
:- mode hash_map__set(di, di, di, uo) is det.
:- mode hash_map__set(in, in, in, out) is det.
% Given a map, return a list of all the keys in the map
:- pred hash_map__unsorted_keys(hash_map(K, _V), list(K)).
:- mode hash_map__unsorted_keys(in, out) is det.
% Given a hash_map, return a list of all the data values in the map
:- pred hash_map__unsorted_values(hash_map(_K, V), list(V)).
:- mode hash_map__unsorted_values(in, out) is det.
% convert a hash_map to an association list
:- pred hash_map__to_unsorted_assoc_list(hash_map(K,V), assoc_list(K,V)).
:- mode hash_map__to_unsorted_assoc_list(in, out) is det.
% convert an association list to a hash_map
:- pred hash_map__from_assoc_list(assoc_list(K,V), hash_map(K,V)).
:- mode hash_map__from_assoc_list(in, out) is det.
/* NOT YET IMPLEMENTED
% delete a key-value pair from a map
% if the key is not present, leave the map unchanged
:- pred hash_map__delete(map(K,V), K, map(K,V)).
:- mode hash_map__delete(di, in, uo) is det.
:- mode hash_map__delete(in, in, out) is det.
% apply hash_map__delete/3 to a list of keys
:- pred hash_map__delete_list(map(K,V), list(K), map(K,V)).
:- mode hash_map__delete_list(di, in, uo) is det.
:- mode hash_map__delete_list(in, in, out) is det.
% delete a key-value pair from a map and return the value.
% fail if the key is not present
:- pred hash_map__remove(map(K,V), K, V, map(K,V)).
:- mode hash_map__remove(in, in, out, out) is semidet.
% delete a key-value pair from a map and return the value.
% abort if the key is not present
:- pred hash_map__det_remove(map(K,V), K, V, map(K,V)).
:- mode hash_map__det_remove(in, in, out, out) is det.
*/
% Count the number of elements in the map.
:- pred hash_map__count(hash_map(K, V), int).
:- mode hash_map__count(in, out) is det.
% Convert a pair of lists (which must be of the same length)
% to a map.
:- pred hash_map__from_corresponding_lists(list(K), list(V), hash_map(K, V)).
:- mode hash_map__from_corresponding_lists(in, in, out) is det.
% For hash_map__merge(MapA, MapB, Map), MapA and MapB must
% not both contain the same key.
:- pred hash_map__merge(hash_map(K, V), hash_map(K, V), hash_map(K, V)).
:- mode hash_map__merge(in, in, out) is det.
% For hash_map__overlay(MapA, MapB, Map), if MapA and MapB both
% contain the same key, then Map will map that key to
% the value from MapB. In otherwords, MapB takes precedence
% over MapA.
:- pred hash_map__overlay(hash_map(K,V), hash_map(K,V), hash_map(K,V)).
:- mode hash_map__overlay(in, in, out) is det.
% hash_map__select takes a map and a set of keys and returns
% a map containing the keys in the set and their corresponding
% values.
:- pred hash_map__select(hash_map(K,V), set(K), hash_map(K,V)).
:- mode hash_map__select(in, in, out) is det.
% Given a list of keys, produce a list of their corresponding
% values in a specified map.
:- pred hash_map__apply_to_list(list(K), hash_map(K, V), list(V)).
:- mode hash_map__apply_to_list(in, in, out) is det.
% Compute a hash value for a term of arbitrary type.
:- func hash(T) = int.
:- mode hash(ui) = out is det.
:- mode hash(in) = out is det.
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
:- implementation.
:- import_module uniq_array, tree234.
:- import_module std_util, int, string, float, require.
%-----------------------------------------------------------------------------%
% To implement hashing, we need univ_value/1 from std_util.m.
% But univ_value/1 can't be implemented until we support existential types,
% so we have to use univ_value_as_type_any/1 and unsafe_cast/1 instead.
% (We also use unsafe_cast/1 for efficiency, since it means that
% hash_special_case can be a switch rather than an if-then-else chain
% with lots of calls to type_to_univ.)
:- type any.
:- func univ_value_as_type_any(univ) = any.
:- pragma c_code(univ_value_as_type_any(Univ::in) = (Value::out),
will_not_call_mercury,
"
Value = field(mktag(0), Univ, UNIV_OFFSET_FOR_DATA);
").
:- func unsafe_cast(T1) = T2.
:- pragma c_code(unsafe_cast(In::in) = (Out::out), will_not_call_mercury, "
Out = In;
").
%-----------------------------------------------------------------------------%
hash(Key) = hash_type(Key, type_of(Key)).
:- func hash_univ(univ) = int.
:- mode hash_univ(ui) = out is det.
:- mode hash_univ(in) = out is det.
hash_univ(Key) = hash_type(univ_value_as_type_any(Key), univ_type(Key)).
:- func hash_type(T, type_info) = int.
% :- mode hash_type(ui, in) = out is det.
:- mode hash_type(in, in) = out is det.
hash_type(Key, Type) = HashValue :-
type_ctor_name_and_arity(type_ctor(Type), TypeName, TypeArity),
( hash_special_case(TypeName, TypeArity, Key) = Value ->
HashValue = Value
;
index(Key, FunctorHash),
expand(Key, _Functor, _Arity, Args),
HashValue = hash_args(Args, FunctorHash)
).
:- func hash_special_case(string, int, T) = int.
:- mode hash_special_case(in, in, in) = out is semidet.
hash_special_case("int", 0, Key) = unsafe_cast(Key).
hash_special_case("character", 0, Key) = unsafe_cast(Key).
hash_special_case("string", 0, Key) = Hash :-
string__hash(unsafe_cast(Key), Hash).
hash_special_case("float", 0, Key) = Hash :-
float__hash(unsafe_cast(Key), Hash).
% we can't get useful hash values for higher-order types
hash_special_case("pred", Arity, _) = Arity.
hash_special_case("func", Arity, _) = Arity.
hash_special_case("c_pointer", 0, Key) = unsafe_cast(Key).
% Implementing hash for uniq_array properly is hard,
% because unsafe_cast to type `unique_array(T)' won't work
% (since the type variable `T' is unbound, it becomes
% unsafe_cast to `unique_array(void)', which is wrong).
% The following will do for now...
hash_special_case("uniq_array", 1, Key) = uniq_array_size(unsafe_cast(Key)).
hash_special_case("univ", 0, Key) = hash_univ(unsafe_cast(Key)).
:- func uniq_array_size(uniq_array(any)) = int.
uniq_array_size(UniqArray) = Size :-
uniq_array__size(UniqArray, Size).
:- func hash_args(list(univ), int) = int.
hash_args([], HashValue) = HashValue.
hash_args([X|Xs], HashValue0) =
hash_args(Xs, combine_hash(HashValue0, hash_univ(X))).
:- func combine_hash(int, int) = int.
combine_hash(Hash1, Hash2) =
rotate_left(Hash1, 3) ^ Hash2.
:- func rotate_left(int, int) = int.
rotate_left(Value, NumBits) = RotatedValue :-
int__bits_per_int(IntBits),
RotatedValue = (Value << NumBits) \/ (Value >> (IntBits - NumBits)).
%-----------------------------------------------------------------------------%
% Each slot in the hash table is a 234-tree.
% Using assoc_lists instead might be slightly better in the usual
% case, but using 234-trees ensures a worse case of O(log N)
% rather than O(N).
:- type slot(K,V) == tree234(K,V).
:- inst uniq_slot(K,V) == uniq_tree234(K,V).
:- type hash_map(K,V)
---> hash_map(
int, % number of entries
uniq_array(slot(K,V))
).
:- inst uniq_hash_map(K, V) == unique(
hash_map(
unique,
uniq_array(uniq_slot(K,V))
)
).
%-----------------------------------------------------------------------------%
hash_map__init(hash_map(0, Table)) :-
uniq_array__make_empty_array(Table).
hash_map__is_empty(hash_map(0, _)).
hash_map__contains(HashMap, K) :-
hash_map__search(HashMap, K, _).
hash_map__member(HashMap, K, V) :-
hash_map__to_unsorted_assoc_list(HashMap, AssocList),
assoc_list_member(K, V, AssocList).
hash_map__search(hash_map(_, Table), K, V) :-
uniq_array__lookup(Table, hash(K), Slot),
tree234__search(Slot, K, V).
hash_map__lookup(HashMap, K, V) :-
( hash_map__search(HashMap, K, V1) ->
V = V1
;
error("hash_map__lookup: key not found")
).
hash_map__insert(hash_map(Count0, Table0), K, V, hash_map(Count, Table)) :-
%
% Insert the value into the hash table
%
uniq_array__size(Table0, Size0),
Hash = hash(K) mod Size0,
uniq_array__lookup(Table0, Hash, Slot0),
tree234__insert(Slot0, K, V, Slot),
uniq_array__set(Table0, Hash, Slot, Table1),
Count = Count0 + 1,
%
% If necessary, increase the size of the hash table.
% We must increase by a multiplicative factor, to ensure
% that insertion takes amortized constant time.
% But the exact magic numbers used in the heuristic below are just
% my guess as to what would be a reasonable value.
%
% if table is > 66% full...
( Count * 3 > Size0 * 2 ->
% ... then increase table size by 2 and then by 80%
% (leads to sequence of sizes 0, 3, 9, 19, 37, 70, ...
% should we use prime numbers instead? powers of two
% seems like a bad idea, since the array size adds an
% extra word, and GC_malloc may round things up to the
% next power of two, wasting half the space...)
Size1 = Size0 + 2,
Size = Size1 * 9 // 5,
resize_table(Table1, Size, Table)
;
Table = Table1
).
:- pred resize_table(uniq_array(slot(K,V)), int, uniq_array(slot(K,V))).
:- mode resize_table(di, in, uo) is det.
resize_table(OldTable, NewSize, NewTable) :-
tree234__init(EmptySlot),
uniq_array__init(NewSize, EmptySlot, Table),
uniq_array__size(OldTable, OldSize),
copy_elems(0, OldSize, OldTable, Table, NewTable).
:- pred copy_elems(int, int, uniq_array(slot(K,V)), uniq_array(slot(K,V)),
uniq_array(slot(K,V))).
:- mode copy_elems(in, in, di, di, uo) is det.
copy_elems(Index, Max, OldTable, Table0, Table) :-
( Index = Max ->
Table = Table0
;
copy_slot(Index, OldTable, Table0, Table1),
copy_elems(Index + 1, Max, OldTable, Table1, Table)
).
:- pred copy_slot(int, uniq_array(slot(K,V)), uniq_array(slot(K,V)),
uniq_array(slot(K,V))).
% :- mode copy_slot(in, ui, di, uo) is det.
copy_slot(Index, OldTable, Table0, Table) :-
uniq_array__lookup(OldTable, Index, Slot),
% XXX there should be a tree234__foldl
tree234__tree234_to_assoc_list(Slot, EntriesList),
list__foldl(insert_into_table, EntriesList, Table0, Table).
:- pred insert_into_table(pair(K, V), uniq_array(slot(K,V)),
uniq_array(slot(K,V))).
:- mode insert_into_table(in, di, uo) is det.
insert_into_table(Key - Value, Table0, Table) :-
uniq_array__size(Table0, Size0),
Hash = hash(Key) mod Size0,
uniq_array__lookup(Table0, Hash, Slot0),
( tree234__insert(Slot0, Key, Value, Slot) ->
uniq_array__set(Table0, Hash, Slot, Table)
;
error("hash_map: insert_into_table: tree234__det_insert failed")
).
hash_map__det_insert(Map0, K, V, Map) :-
( hash_map__insert(Map0, K, V, Map1) ->
Map = Map1
;
error("hash_map__det_insert: key already present")
).
hash_map__det_insert_from_corresponding_lists(Map0, Ks, Vs, Map) :-
( Ks = [Key | Keys], Vs = [Value | Values] ->
hash_map__det_insert(Map0, Key, Value, Map1),
hash_map__det_insert_from_corresponding_lists(Map1, Keys,
Values, Map)
;
Ks = [], Vs = []
->
Map = Map0
;
error("hash_map__det_insert_from_corresponding_lists - lists do not correspond")
).
hash_map__update(hash_map(Count, Table0), K, V, hash_map(Count, Table)) :-
uniq_array__size(Table0, Size0),
Hash = hash(K) mod Size0,
uniq_array__lookup(Table0, Hash, Slot0),
tree234__update(Slot0, K, V, Slot),
uniq_array__set(Table0, Hash, Slot, Table).
hash_map__det_update(hash_map(Count, Table0), K, V, hash_map(Count, Table)) :-
uniq_array__size(Table0, Size0),
Hash = hash(K) mod Size0,
uniq_array__lookup(Table0, Hash, Slot0),
( tree234__update(Slot0, K, V, Slot) ->
uniq_array__set(Table0, Hash, Slot, Table)
;
error("hash_map: insert_into_table: tree234__det_insert failed")
).
hash_map__set(hash_map(Count0, Table0), K, V, hash_map(Count, Table)) :-
uniq_array__size(Table0, Size0),
Hash = hash(K) mod Size0,
uniq_array__lookup(Table0, Hash, Slot0),
/****
% unique modes don't yet allow this
( tree234__insert(Slot0, K, V, Slot1) ->
Count = Count0 + 1,
Slot = Slot0
;
tree234__det_update(Slot0, K, V, Slot),
Count = Count0
),
****/
( tree234__search(Slot0, K, V) ->
Count = Count0 + 1
;
Count = Count0
),
tree234__set(Slot0, K, V, Slot),
uniq_array__set(Table0, Hash, Slot, Table).
hash_map__unsorted_keys(Map, KeyList) :-
hash_map__to_unsorted_assoc_list(Map, AssocList),
assoc_list__keys(AssocList, KeyList).
hash_map__unsorted_values(Map, ValueList) :-
hash_map__to_unsorted_assoc_list(Map, AssocList),
assoc_list__values(AssocList, ValueList).
hash_map__to_unsorted_assoc_list(hash_map(_Count, Table), AssocList) :-
uniq_array__size(Table, Size),
hash_map__to_unsorted_assoc_list_2(0, Size, Table, [], AssocListList),
list__condense(AssocListList, AssocList).
:- pred hash_map__to_unsorted_assoc_list_2(int, int, uniq_array(slot(K,V)),
list(assoc_list(K,V)),
list(assoc_list(K,V))).
% :- mode hash_map__to_unsorted_assoc_list_2(in, in, ui, di, uo) is det.
:- mode hash_map__to_unsorted_assoc_list_2(in, in, in, di, uo) is det.
hash_map__to_unsorted_assoc_list_2(Index, Max, Table, List0, List) :-
( Index = Max ->
List = List0
;
uniq_array__lookup(Table, Index, Slot),
tree234__tree234_to_assoc_list(Slot, AssocList),
List1 = [AssocList | List0],
hash_map__to_unsorted_assoc_list_2(Index, Max, Table,
List1, List)
).
hash_map__from_assoc_list(AssocList, Map) :-
InsertPair = (pred(KV::in, Map1::di, Map2::uo) is det :-
KV = Key - Value,
hash_map__det_insert(Map1, Key, Value, Map2)),
hash_map__init(Map0),
list__foldl(InsertPair, AssocList, Map0, Map).
/* NYI
hash_map__delete(Map0, Key, Map) :-
tree234__delete(Map0, Key, Map).
hash_map__delete_list(Map, [], Map).
hash_map__delete_list(Map0, [Key | Keys], Map) :-
hash_map__delete(Map0, Key, Map1),
hash_map__delete_list(Map1, Keys, Map).
hash_map__remove(Map0, Key, Value, Map) :-
tree234__remove(Map0, Key, Value, Map).
hash_map__det_remove(Map0, Key, Value, Map) :-
( tree234__remove(Map0, Key, Value1, Map1) ->
Value = Value1,
Map = Map1
;
error("hash_map__det_remove: key not found")
).
*/
hash_map__count(hash_map(Count, _), Count).
%-----------------------------------------------------------------------------%
% inefficient
hash_map__inverse_search(Map, V, K) :-
hash_map__to_unsorted_assoc_list(Map, AssocList),
assoc_list_member(K, V, AssocList).
%-----------------------------------------------------------------------------%
% The code here is deliberately written using very simple
% modes.
% The reason we don't just use member/2 is that we want to
% bootstrap this thing ASAP.
:- pred assoc_list_member(K, V, list(pair(K,V))).
:- mode assoc_list_member(in, out, in) is nondet.
:- mode assoc_list_member(out, in, in) is nondet.
:- mode assoc_list_member(out, out, in) is nondet.
:- mode assoc_list_member(in, in, in) is semidet.
assoc_list_member(K, V, [K - V | _]).
assoc_list_member(K, V, [_ | Xs]) :-
assoc_list_member(K, V, Xs).
%-----------------------------------------------------------------------------%
hash_map__from_corresponding_lists(Keys, Values, Map) :-
assoc_list__from_corresponding_lists(Keys, Values, AssocList),
hash_map__from_assoc_list(AssocList, Map).
%-----------------------------------------------------------------------------%
hash_map__merge(Map0, Map1, Map) :-
hash_map__to_unsorted_assoc_list(Map1, AssocList),
hash_map__merge_2(AssocList, Map0, Map).
:- pred hash_map__merge_2(assoc_list(K,V), hash_map(K,V), hash_map(K,V)).
:- mode hash_map__merge_2(in, in, out) is det.
hash_map__merge_2([], Map, Map).
hash_map__merge_2([K - V | AssocList], Map0, Map) :-
hash_map__det_insert(Map0, K, V, Map1),
hash_map__merge_2(AssocList, Map1, Map).
%-----------------------------------------------------------------------------%
hash_map__overlay(Map0, Map1, Map) :-
hash_map__to_unsorted_assoc_list(Map1, AssocList),
hash_map__overlay_2(AssocList, Map0, Map).
:- pred hash_map__overlay_2(assoc_list(K,V), hash_map(K,V), hash_map(K,V)).
:- mode hash_map__overlay_2(in, in, out) is det.
hash_map__overlay_2([], Map, Map).
hash_map__overlay_2([K - V | AssocList], Map0, Map) :-
hash_map__set(Map0, K, V, Map1),
hash_map__overlay_2(AssocList, Map1, Map).
%-----------------------------------------------------------------------------%
hash_map__select(Original, KeySet, NewMap) :-
set__to_sorted_list(KeySet, KeyList),
hash_map__init(NewMap0),
hash_map__select_2(KeyList, Original, NewMap0, NewMap).
:- pred hash_map__select_2(list(K), hash_map(K,V), hash_map(K,V),
hash_map(K,V)).
:- mode hash_map__select_2(in, in, in, out) is det.
hash_map__select_2([], _Original, New, New).
hash_map__select_2([K|Ks], Original, New0, New) :-
(
hash_map__search(Original, K, V)
->
hash_map__set(New0, K, V, New1)
;
New1 = New0
),
hash_map__select_2(Ks, Original, New1, New).
%-----------------------------------------------------------------------------%
hash_map__apply_to_list([], _, []).
hash_map__apply_to_list([K | Ks], Map, [V | Vs]) :-
hash_map__lookup(Map, K, V),
hash_map__apply_to_list(Ks, Map, Vs).
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%
--
Fergus Henderson <fjh at cs.mu.oz.au> | "I have always known that the pursuit
WWW: <http://www.cs.mu.oz.au/~fjh> | of excellence is a lethal habit"
PGP: finger fjh at 128.250.37.3 | -- the last words of T. S. Garp.
More information about the developers
mailing list