hash_map

Fergus Henderson fjh at cs.mu.oz.au
Fri May 2 00:51:40 AEST 1997


Hi,

I did an implementation of hash_map, i.e. maps implemented using hash tables.
The code (which follows) is type and mode-correct, but not unique-mode-correct.
Making it unique-mode-correct requires fixing the compiler: as well as
Andrew's changes, which will make `ui' work, we also need to solve the
problem with uniq_array__lookup mentioned in my other mail.

%-----------------------------------------------------------------------------%
% Copyright (C) 1997 University of Melbourne.
% This file may only be copied under the terms of the GNU Library General
% Public License - see the file COPYING.LIB in the Mercury distribution.
%-----------------------------------------------------------------------------%
%
% File: hash_map.m.
% Main author: fjh.
% Stability: medium.
%
% This file provides an implementation of maps using hash tables.
% A map (also known as a dictionary or an associative array) is a collection
% of (Key,Data) pairs which allows you to look up any Data item given the Key.
% This implementation gives you O(1) lookup, assuming your hash values
% are evenly distributed, or O(log N) in the worst case.
% Updates are O(1) amortized; unique modes are used to ensure that
% the implementation can use destructive update.
%
%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

:- module hash_map.
:- interface.
:- import_module set, list, assoc_list.

%-----------------------------------------------------------------------------%

:- type hash_map(Key, Value).

% :- inst uniq_hash_map(K, V).
:- mode di_hash_map(K, V) :: uniq_hash_map(K, V) -> dead.
:- mode di_hash_map       :: uniq_hash_map(ground, ground) -> dead.
:- mode uo_hash_map(K, V) :: free -> uniq_hash_map(K, V).
:- mode uo_hash_map       :: free -> uniq_hash_map(ground, ground).

%-----------------------------------------------------------------------------%

	% Initialize an empty map.
:- pred hash_map__init(hash_map(_,_)).
:- mode hash_map__init(uo) is det.

	% Check whether a map is empty.
:- pred hash_map__is_empty(hash_map(_,_)).
% :- mode hash_map__is_empty(ui) is semidet.
:- mode hash_map__is_empty(in) is semidet.

	% Check whether map contains key
:- pred hash_map__contains(hash_map(K,_V), K).
% :- mode hash_map__contains(ui, in) is semidet.
:- mode hash_map__contains(in, in) is semidet.

:- pred hash_map__member(hash_map(K,V), K, V).
% :- mode hash_map__member(ui, out, out) is nondet.
:- mode hash_map__member(in, out, out) is nondet.

	% Search map for key.
:- pred hash_map__search(hash_map(K,V), K, V).
% :- mode hash_map__search(ui, in, out) is semidet.
:- mode hash_map__search(in, in, out) is semidet.

	% Search map for key, but abort if search fails.
:- pred hash_map__lookup(hash_map(K,V), K, V).
% :- mode hash_map__lookup(ui, in, out) is det.
:- mode hash_map__lookup(in, in, out) is det.

	% Search map for data.
:- pred hash_map__inverse_search(hash_map(K,V), V, K).
% :- mode hash_map__inverse_search(mui, in, out) is nondet.
:- mode hash_map__inverse_search(in, in, out) is nondet.

	% Insert a new key and corresponding value into a map.
	% Fail if the key already exists.
:- pred hash_map__insert(hash_map(K,V), K, V, hash_map(K,V)).
:- mode hash_map__insert(di, di, di, uo) is semidet.

	% Insert a new key and corresponding value into a map.
	% Abort if the key already exists.
:- pred hash_map__det_insert(hash_map(K,V), K, V, hash_map(K,V)).
:- mode hash_map__det_insert(di, di, di, uo) is det.

	% Apply hash_map__det_insert to key - value pairs from corresponding
	% lists.
:- pred hash_map__det_insert_from_corresponding_lists(hash_map(K,V), list(K),
						list(V), hash_map(K,V)).
:- mode hash_map__det_insert_from_corresponding_lists(di, in, in, uo) is det.

	% Update the value corresponding to a given key
	% Fail if the key doesn't already exist.
:- pred hash_map__update(hash_map(K,V), K, V, hash_map(K,V)).
:- mode hash_map__update(di, in, in, uo) is semidet.

	% Update the value corresponding to a given key
	% Abort if the key doesn't already exist.
:- pred hash_map__det_update(hash_map(K,V), K, V, hash_map(K,V)).
:- mode hash_map__det_update(di, in, in, uo) is det.

	% Update value if the key is already present, otherwise
	% insert new key and value.
:- pred hash_map__set(hash_map(K,V), K, V, hash_map(K,V)).
:- mode hash_map__set(di, di, di, uo) is det.
:- mode hash_map__set(in, in, in, out) is det.

	% Given a map, return a list of all the keys in the map
:- pred hash_map__unsorted_keys(hash_map(K, _V), list(K)).
:- mode hash_map__unsorted_keys(in, out) is det.

	% Given a hash_map, return a list of all the data values in the map
:- pred hash_map__unsorted_values(hash_map(_K, V), list(V)).
:- mode hash_map__unsorted_values(in, out) is det.

	% convert a hash_map to an association list
:- pred hash_map__to_unsorted_assoc_list(hash_map(K,V), assoc_list(K,V)).
:- mode hash_map__to_unsorted_assoc_list(in, out) is det.

	% convert an association list to a hash_map
:- pred hash_map__from_assoc_list(assoc_list(K,V), hash_map(K,V)).
:- mode hash_map__from_assoc_list(in, out) is det.

/* NOT YET IMPLEMENTED
	% delete a key-value pair from a map
	% if the key is not present, leave the map unchanged
:- pred hash_map__delete(map(K,V), K, map(K,V)).
:- mode hash_map__delete(di, in, uo) is det.
:- mode hash_map__delete(in, in, out) is det.

	% apply hash_map__delete/3 to a list of keys
:- pred hash_map__delete_list(map(K,V), list(K), map(K,V)).
:- mode hash_map__delete_list(di, in, uo) is det.
:- mode hash_map__delete_list(in, in, out) is det.

	% delete a key-value pair from a map and return the value.
	% fail if the key is not present
:- pred hash_map__remove(map(K,V), K, V, map(K,V)).
:- mode hash_map__remove(in, in, out, out) is semidet.

	% delete a key-value pair from a map and return the value.
	% abort if the key is not present
:- pred hash_map__det_remove(map(K,V), K, V, map(K,V)).
:- mode hash_map__det_remove(in, in, out, out) is det.
*/

	% Count the number of elements in the map.
:- pred hash_map__count(hash_map(K, V), int).
:- mode hash_map__count(in, out) is det.

	% Convert a pair of lists (which must be of the same length)
	% to a map.
:- pred hash_map__from_corresponding_lists(list(K), list(V), hash_map(K, V)).
:- mode hash_map__from_corresponding_lists(in, in, out) is det.

	% For hash_map__merge(MapA, MapB, Map), MapA and MapB must
	% not both contain the same key.
:- pred hash_map__merge(hash_map(K, V), hash_map(K, V), hash_map(K, V)).
:- mode hash_map__merge(in, in, out) is det.

	% For hash_map__overlay(MapA, MapB, Map), if MapA and MapB both
	% contain the same key, then Map will map that key to
	% the value from MapB.  In otherwords, MapB takes precedence
	% over MapA.
:- pred hash_map__overlay(hash_map(K,V), hash_map(K,V), hash_map(K,V)).
:- mode hash_map__overlay(in, in, out) is det.

	% hash_map__select takes a map and a set of keys and returns
	% a map containing the keys in the set and their corresponding
	% values.
:- pred hash_map__select(hash_map(K,V), set(K), hash_map(K,V)).
:- mode hash_map__select(in, in, out) is det.

	% Given a list of keys, produce a list of their corresponding
	% values in a specified map.
:- pred hash_map__apply_to_list(list(K), hash_map(K, V), list(V)).
:- mode hash_map__apply_to_list(in, in, out) is det.

	% Compute a hash value for a term of arbitrary type.
:- func hash(T) = int.
:- mode hash(ui) = out is det.
:- mode hash(in) = out is det.

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

:- implementation.

:- import_module uniq_array, tree234.
:- import_module std_util, int, string, float, require.

%-----------------------------------------------------------------------------%

% To implement hashing, we need univ_value/1 from std_util.m.
% But univ_value/1 can't be implemented until we support existential types,
% so we have to use univ_value_as_type_any/1 and unsafe_cast/1 instead.
% (We also use unsafe_cast/1 for efficiency, since it means that
% hash_special_case can be a switch rather than an if-then-else chain
% with lots of calls to type_to_univ.)

:- type any.

:- func univ_value_as_type_any(univ) = any.

:- pragma c_code(univ_value_as_type_any(Univ::in) = (Value::out),
	will_not_call_mercury,
"
	Value = field(mktag(0), Univ, UNIV_OFFSET_FOR_DATA);
").

:- func unsafe_cast(T1) = T2.

:- pragma c_code(unsafe_cast(In::in) = (Out::out), will_not_call_mercury, "
	Out = In;
").

%-----------------------------------------------------------------------------%

hash(Key) = hash_type(Key, type_of(Key)).

:- func hash_univ(univ) = int.
:- mode hash_univ(ui) = out is det.
:- mode hash_univ(in) = out is det.

hash_univ(Key) = hash_type(univ_value_as_type_any(Key), univ_type(Key)).

:- func hash_type(T, type_info) = int.
% :- mode hash_type(ui, in) = out is det.
:- mode hash_type(in, in) = out is det.

hash_type(Key, Type) = HashValue :-
	type_ctor_name_and_arity(type_ctor(Type), TypeName, TypeArity),
	( hash_special_case(TypeName, TypeArity, Key) = Value ->
		HashValue = Value
	;
		index(Key, FunctorHash),
		expand(Key, _Functor, _Arity, Args),
		HashValue = hash_args(Args, FunctorHash)
	).

:- func hash_special_case(string, int, T) = int.
:- mode hash_special_case(in, in, in) = out is semidet.

hash_special_case("int", 0, Key) = unsafe_cast(Key).
hash_special_case("character", 0, Key) = unsafe_cast(Key).

hash_special_case("string", 0, Key) = Hash :-
	string__hash(unsafe_cast(Key), Hash).
hash_special_case("float", 0, Key) = Hash :-
	float__hash(unsafe_cast(Key), Hash).

% we can't get useful hash values for higher-order types
hash_special_case("pred", Arity, _) = Arity.
hash_special_case("func", Arity, _) = Arity.

hash_special_case("c_pointer", 0, Key) = unsafe_cast(Key).

	% Implementing hash for uniq_array properly is hard,
	% because unsafe_cast to type `unique_array(T)' won't work
	% (since the type variable `T' is unbound, it becomes
	% unsafe_cast to `unique_array(void)', which is wrong).
	% The following will do for now...
hash_special_case("uniq_array", 1, Key) = uniq_array_size(unsafe_cast(Key)).

hash_special_case("univ", 0, Key) = hash_univ(unsafe_cast(Key)).

:- func uniq_array_size(uniq_array(any)) = int.
uniq_array_size(UniqArray) = Size :-
	uniq_array__size(UniqArray, Size).

:- func hash_args(list(univ), int) = int.
hash_args([], HashValue) = HashValue.
hash_args([X|Xs], HashValue0) =
	hash_args(Xs, combine_hash(HashValue0, hash_univ(X))).

:- func combine_hash(int, int) = int.
combine_hash(Hash1, Hash2) =
	rotate_left(Hash1, 3) ^ Hash2.

:- func rotate_left(int, int) = int.
rotate_left(Value, NumBits) = RotatedValue :-
	int__bits_per_int(IntBits),
	RotatedValue = (Value << NumBits) \/ (Value >> (IntBits - NumBits)).

%-----------------------------------------------------------------------------%

	% Each slot in the hash table is a 234-tree.
	% Using assoc_lists instead might be slightly better in the usual
	% case, but using 234-trees ensures a worse case of O(log N) 
	% rather than O(N).

:- type slot(K,V) == tree234(K,V).
:- inst uniq_slot(K,V) == uniq_tree234(K,V).

:- type hash_map(K,V)
	--->	hash_map(
			int,	% number of entries
			uniq_array(slot(K,V))
		).
:- inst uniq_hash_map(K, V) == unique(
		hash_map(
			unique,
			uniq_array(uniq_slot(K,V))
		)
	).

%-----------------------------------------------------------------------------%

hash_map__init(hash_map(0, Table)) :-
	uniq_array__make_empty_array(Table).

hash_map__is_empty(hash_map(0, _)).

hash_map__contains(HashMap, K) :-
	hash_map__search(HashMap, K, _).

hash_map__member(HashMap, K, V) :-
	hash_map__to_unsorted_assoc_list(HashMap, AssocList),
	assoc_list_member(K, V, AssocList).

hash_map__search(hash_map(_, Table), K, V) :-
	uniq_array__lookup(Table, hash(K), Slot),
	tree234__search(Slot, K, V).

hash_map__lookup(HashMap, K, V) :-
	( hash_map__search(HashMap, K, V1) ->
		V = V1
	;
		error("hash_map__lookup: key not found")
	).

hash_map__insert(hash_map(Count0, Table0), K, V, hash_map(Count, Table)) :-
	%
	% Insert the value into the hash table
	%
	uniq_array__size(Table0, Size0),
	Hash = hash(K) mod Size0,
	uniq_array__lookup(Table0, Hash, Slot0),
	tree234__insert(Slot0, K, V, Slot),
	uniq_array__set(Table0, Hash, Slot, Table1),
	Count = Count0 + 1,

	%
	% If necessary, increase the size of the hash table.
	% We must increase by a multiplicative factor, to ensure
	% that insertion takes amortized constant time.
	% But the exact magic numbers used in the heuristic below are just
	% my guess as to what would be a reasonable value.
	%

	% if table is > 66% full...
	( Count * 3 > Size0 * 2 ->	
		% ... then increase table size by 2 and then by 80%
		% (leads to sequence of sizes 0, 3, 9, 19, 37, 70, ...
		% should we use prime numbers instead? powers of two
		% seems like a bad idea, since the array size adds an
		% extra word, and GC_malloc may round things up to the
		% next power of two, wasting half the space...)
		Size1 = Size0 + 2,
		Size = Size1 * 9 // 5,
		resize_table(Table1, Size, Table)
	;
		Table = Table1
	).

:- pred resize_table(uniq_array(slot(K,V)), int, uniq_array(slot(K,V))).
:- mode resize_table(di, in, uo) is det.

resize_table(OldTable, NewSize, NewTable) :-
	tree234__init(EmptySlot),
	uniq_array__init(NewSize, EmptySlot, Table),
	uniq_array__size(OldTable, OldSize),
	copy_elems(0, OldSize, OldTable, Table, NewTable).

:- pred copy_elems(int, int, uniq_array(slot(K,V)), uniq_array(slot(K,V)),
			uniq_array(slot(K,V))).
:- mode copy_elems(in, in, di, di, uo) is det.

copy_elems(Index, Max, OldTable, Table0, Table) :-
	( Index = Max ->
		Table = Table0
	;
		copy_slot(Index, OldTable, Table0, Table1),
		copy_elems(Index + 1, Max, OldTable, Table1, Table)
	).

:- pred copy_slot(int, uniq_array(slot(K,V)), uniq_array(slot(K,V)),
			uniq_array(slot(K,V))).
% :- mode copy_slot(in, ui, di, uo) is det.

copy_slot(Index, OldTable, Table0, Table) :-
	uniq_array__lookup(OldTable, Index, Slot),
	% XXX there should be a tree234__foldl
	tree234__tree234_to_assoc_list(Slot, EntriesList),
	list__foldl(insert_into_table, EntriesList, Table0, Table).

:- pred insert_into_table(pair(K, V), uniq_array(slot(K,V)),
			uniq_array(slot(K,V))).
:- mode insert_into_table(in, di, uo) is det.

insert_into_table(Key - Value, Table0, Table) :-
	uniq_array__size(Table0, Size0),
	Hash = hash(Key) mod Size0,
	uniq_array__lookup(Table0, Hash, Slot0),
	( tree234__insert(Slot0, Key, Value, Slot) ->
		uniq_array__set(Table0, Hash, Slot, Table)
	;
		error("hash_map: insert_into_table: tree234__det_insert failed")
	).

hash_map__det_insert(Map0, K, V, Map) :-
	( hash_map__insert(Map0, K, V, Map1) ->
		Map = Map1
	;
		error("hash_map__det_insert: key already present")
	).

hash_map__det_insert_from_corresponding_lists(Map0, Ks, Vs, Map) :-
	( Ks = [Key | Keys], Vs = [Value | Values] ->
		hash_map__det_insert(Map0, Key, Value, Map1),
		hash_map__det_insert_from_corresponding_lists(Map1, Keys,
							Values, Map)
	;
		Ks = [], Vs = []
	->
		Map = Map0
	;
		error("hash_map__det_insert_from_corresponding_lists - lists do not correspond")
	).

hash_map__update(hash_map(Count, Table0), K, V, hash_map(Count, Table)) :-
	uniq_array__size(Table0, Size0),
	Hash = hash(K) mod Size0,
	uniq_array__lookup(Table0, Hash, Slot0),
	tree234__update(Slot0, K, V, Slot),
	uniq_array__set(Table0, Hash, Slot, Table).

hash_map__det_update(hash_map(Count, Table0), K, V, hash_map(Count, Table)) :-
	uniq_array__size(Table0, Size0),
	Hash = hash(K) mod Size0,
	uniq_array__lookup(Table0, Hash, Slot0),
	( tree234__update(Slot0, K, V, Slot) ->
		uniq_array__set(Table0, Hash, Slot, Table)
	;
		error("hash_map: insert_into_table: tree234__det_insert failed")
	).

hash_map__set(hash_map(Count0, Table0), K, V, hash_map(Count, Table)) :-
	uniq_array__size(Table0, Size0),
	Hash = hash(K) mod Size0,
	uniq_array__lookup(Table0, Hash, Slot0),
/****
% unique modes don't yet allow this
	( tree234__insert(Slot0, K, V, Slot1) ->
		Count = Count0 + 1,
		Slot = Slot0
	;
		tree234__det_update(Slot0, K, V, Slot),
		Count = Count0
	),
****/
	( tree234__search(Slot0, K, V) ->
		Count = Count0 + 1
	;
		Count = Count0
	),
	tree234__set(Slot0, K, V, Slot),
	uniq_array__set(Table0, Hash, Slot, Table).

hash_map__unsorted_keys(Map, KeyList) :-
	hash_map__to_unsorted_assoc_list(Map, AssocList),
	assoc_list__keys(AssocList, KeyList).

hash_map__unsorted_values(Map, ValueList) :-
	hash_map__to_unsorted_assoc_list(Map, AssocList),
	assoc_list__values(AssocList, ValueList).

hash_map__to_unsorted_assoc_list(hash_map(_Count, Table), AssocList) :-
	uniq_array__size(Table, Size),
	hash_map__to_unsorted_assoc_list_2(0, Size, Table, [], AssocListList),
	list__condense(AssocListList, AssocList).

:- pred hash_map__to_unsorted_assoc_list_2(int, int, uniq_array(slot(K,V)),
			list(assoc_list(K,V)),
			list(assoc_list(K,V))).
% :- mode hash_map__to_unsorted_assoc_list_2(in, in, ui, di, uo) is det.
:- mode hash_map__to_unsorted_assoc_list_2(in, in, in, di, uo) is det.

hash_map__to_unsorted_assoc_list_2(Index, Max, Table, List0, List) :-
	( Index = Max ->
		List = List0
	;
		uniq_array__lookup(Table, Index, Slot),
		tree234__tree234_to_assoc_list(Slot, AssocList),
		List1 = [AssocList | List0],
		hash_map__to_unsorted_assoc_list_2(Index, Max, Table,
			List1, List)
	).

hash_map__from_assoc_list(AssocList, Map) :-
	InsertPair = (pred(KV::in, Map1::di, Map2::uo) is det :-
			KV = Key - Value,
			hash_map__det_insert(Map1, Key, Value, Map2)),
	hash_map__init(Map0),
	list__foldl(InsertPair, AssocList, Map0, Map).


/* NYI

hash_map__delete(Map0, Key, Map) :-
	tree234__delete(Map0, Key, Map).

hash_map__delete_list(Map, [], Map).
hash_map__delete_list(Map0, [Key | Keys], Map) :-
	hash_map__delete(Map0, Key, Map1),
	hash_map__delete_list(Map1, Keys, Map).

hash_map__remove(Map0, Key, Value, Map) :-
	tree234__remove(Map0, Key, Value, Map).

hash_map__det_remove(Map0, Key, Value, Map) :-
	( tree234__remove(Map0, Key, Value1, Map1) ->
		Value = Value1,
		Map = Map1
	;
		error("hash_map__det_remove: key not found")
	).
*/

hash_map__count(hash_map(Count, _), Count).

%-----------------------------------------------------------------------------%

	% inefficient

hash_map__inverse_search(Map, V, K) :-
	hash_map__to_unsorted_assoc_list(Map, AssocList),
	assoc_list_member(K, V, AssocList).

%-----------------------------------------------------------------------------%

	% The code here is deliberately written using very simple
	% modes.
	% The reason we don't just use member/2 is that we want to
	% bootstrap this thing ASAP.

:- pred assoc_list_member(K, V, list(pair(K,V))).
:- mode assoc_list_member(in, out, in) is nondet.
:- mode assoc_list_member(out, in, in) is nondet.
:- mode assoc_list_member(out, out, in) is nondet.
:- mode assoc_list_member(in, in, in) is semidet.
assoc_list_member(K, V, [K - V | _]).
assoc_list_member(K, V, [_ | Xs]) :-
	assoc_list_member(K, V, Xs).

%-----------------------------------------------------------------------------%

hash_map__from_corresponding_lists(Keys, Values, Map) :-
	assoc_list__from_corresponding_lists(Keys, Values, AssocList),
	hash_map__from_assoc_list(AssocList, Map).

%-----------------------------------------------------------------------------%

hash_map__merge(Map0, Map1, Map) :-
	hash_map__to_unsorted_assoc_list(Map1, AssocList),
	hash_map__merge_2(AssocList, Map0, Map).

:- pred hash_map__merge_2(assoc_list(K,V), hash_map(K,V), hash_map(K,V)).
:- mode hash_map__merge_2(in, in, out) is det.

hash_map__merge_2([], Map, Map).
hash_map__merge_2([K - V | AssocList], Map0, Map) :-
	hash_map__det_insert(Map0, K, V, Map1),
	hash_map__merge_2(AssocList, Map1, Map).

%-----------------------------------------------------------------------------%

hash_map__overlay(Map0, Map1, Map) :-
	hash_map__to_unsorted_assoc_list(Map1, AssocList),
	hash_map__overlay_2(AssocList, Map0, Map).

:- pred hash_map__overlay_2(assoc_list(K,V), hash_map(K,V), hash_map(K,V)).
:- mode hash_map__overlay_2(in, in, out) is det.

hash_map__overlay_2([], Map, Map).
hash_map__overlay_2([K - V | AssocList], Map0, Map) :-
	hash_map__set(Map0, K, V, Map1),
	hash_map__overlay_2(AssocList, Map1, Map).

%-----------------------------------------------------------------------------%

hash_map__select(Original, KeySet, NewMap) :-
	set__to_sorted_list(KeySet, KeyList),
	hash_map__init(NewMap0),
	hash_map__select_2(KeyList, Original, NewMap0, NewMap).

:- pred hash_map__select_2(list(K), hash_map(K,V), hash_map(K,V),
			hash_map(K,V)).
:- mode hash_map__select_2(in, in, in, out) is det.

hash_map__select_2([], _Original, New, New).
hash_map__select_2([K|Ks], Original, New0, New) :-
	(
		hash_map__search(Original, K, V)
	->
		hash_map__set(New0, K, V, New1)
	;
		New1 = New0
	),
	hash_map__select_2(Ks, Original, New1, New).

%-----------------------------------------------------------------------------%

hash_map__apply_to_list([], _, []).
hash_map__apply_to_list([K | Ks], Map, [V | Vs]) :-
	hash_map__lookup(Map, K, V),
	hash_map__apply_to_list(Ks, Map, Vs).

%-----------------------------------------------------------------------------%
%-----------------------------------------------------------------------------%

-- 
Fergus Henderson <fjh at cs.mu.oz.au>   |  "I have always known that the pursuit
WWW: <http://www.cs.mu.oz.au/~fjh>   |  of excellence is a lethal habit"
PGP: finger fjh at 128.250.37.3         |     -- the last words of T. S. Garp.



More information about the developers mailing list