[m-rev.] for review: Speed up retrieval of foreign imports.

Peter Wang novalazy at gmail.com
Wed Nov 23 12:28:39 AEDT 2022


On Tue, 22 Nov 2022 20:52:54 +1100 Julien Fischer <jfischer at opturion.com> wrote:
> 
> On Tue, 22 Nov 2022, Peter Wang wrote:
> 
> > On Tue, 22 Nov 2022 15:57:40 +1100 Peter Wang <novalazy at gmail.com> wrote:
> >> 
> >> This change replaces the representation of cached_foreign_imports from
> >> tree234 to one based on version_array. It reduces the average run time
> >> for a do-nothing build of Prince on my machine from 7.850 s to 6.505 s,
> >> for a speedup of ~17%.
> >> 
> >> compiler/va_map.m:
> >>     Add a module originally written for Prince.
> >
> > Actually the 2-level scheme employed by va_map isn't necessary for
> > this use case. I will try using a flat version_array.
> 
> Which would have the advantage of working when targetting C# or Java,
> unlike the current version of va_map.

Here is the new patch.

Peter

---

Speed up retrieval of foreign imports.

For a do-nothing build of Prince with intermodule optimisation enabled,
mmc --make spends a large fraction of its time computing foreign imports,
or rather looking up the cached results of such. (There are 650 modules,
and the results for each module are looked up tens to hundreds of
thousands of times each.)

This change replaces the representation of cached_foreign_imports from
tree234 to one based on version_array. It reduces the average run time
for a do-nothing build of Prince on my machine from 7.80 s to 5.87 s,
for a speedup of ~25%.

compiler/va_map.m:
    Add a simplified version of a module that I originally wrote for
    Prince. I have permission to include it in the Mercury compiler and
    assign copyright to the Mercury team.

compiler/libs.m:
    Include the new module in the libs package.

compiler/make.deps_set.m:
    Implement a typeclass needed to use module_index as a va_map key.

compiler/make.dependencies.m:
    Implement a typeclass needed to use maybe(T) as a va_map_value.

    Use va_map to represent cached_foreign_imports instead of map.

diff --git a/compiler/libs.m b/compiler/libs.m
index 4a56e0df6..cbc3fa105 100644
--- a/compiler/libs.m
+++ b/compiler/libs.m
@@ -42,6 +42,7 @@
 :- include_module md5.
 :- include_module pickle.
 :- include_module uint_emu.
+:- include_module va_map.
 
 % OS interfaces not provided by the standard library.
 :- include_module process_util.
diff --git a/compiler/make.dependencies.m b/compiler/make.dependencies.m
index 52402bacb..cd8aa7c7f 100644
--- a/compiler/make.dependencies.m
+++ b/compiler/make.dependencies.m
@@ -2,6 +2,7 @@
 % vim: ft=mercury ts=4 sw=4 et
 %---------------------------------------------------------------------------%
 % Copyright (C) 2002-2011 The University of Melbourne.
+% Copyright (C) 2013-2017, 2019-2022 The Mercury team.
 % This file may only be copied under the terms of the GNU General
 % Public License - see the file COPYING in the Mercury distribution.
 %---------------------------------------------------------------------------%
@@ -164,6 +165,7 @@
 :- import_module backend_libs.
 :- import_module backend_libs.compile_target_code.
 :- import_module libs.options.
+:- import_module libs.va_map.
 :- import_module make.module_dep_file.
 :- import_module make.util.
 :- import_module parse_tree.
@@ -730,14 +732,17 @@ find_module_foreign_imports_2(Languages, Globals, ModuleIndex, Succeeded,
         ForeignModules, !Info, !IO) :-
     % Languages should be constant for the duration of the process.
     CachedForeignImports0 = !.Info ^ mki_cached_foreign_imports,
-    ( if map.search(CachedForeignImports0, ModuleIndex, Result0) then
+    va_map.lookup(CachedForeignImports0, ModuleIndex, MaybeResult0),
+    (
+        MaybeResult0 = yes(Result0),
         Result0 = deps_result(Succeeded, ForeignModules)
-    else
+    ;
+        MaybeResult0 = no,
         find_module_foreign_imports_3(Languages, Globals, ModuleIndex,
             Succeeded, ForeignModules, !Info, !IO),
         Result = deps_result(Succeeded, ForeignModules),
         CachedForeignImports1 = !.Info ^ mki_cached_foreign_imports,
-        map.det_insert(ModuleIndex, Result,
+        va_map.set(ModuleIndex, yes(Result),
             CachedForeignImports1, CachedForeignImports),
         !Info ^ mki_cached_foreign_imports := CachedForeignImports
     ).
@@ -1375,9 +1380,14 @@ make_write_dependency_file_and_timestamp_list([Head | Tail], !IO) :-
 
 init_cached_direct_imports = map.init.
 
-:- type cached_foreign_imports == map(module_index, module_deps_result).
+:- type cached_foreign_imports ==
+    va_map(module_index, maybe(module_deps_result)).
 
-init_cached_foreign_imports = map.init.
+:- instance va_map_value(maybe(T)) where [
+    default_value = no
+].
+
+init_cached_foreign_imports = va_map.init.
 
 :- type transitive_dependencies_root
     --->    transitive_dependencies_root(
diff --git a/compiler/make.deps_set.m b/compiler/make.deps_set.m
index 0000b10fd..cc890d51f 100644
--- a/compiler/make.deps_set.m
+++ b/compiler/make.deps_set.m
@@ -2,6 +2,7 @@
 % vim: ft=mercury ts=4 sw=4 et
 %---------------------------------------------------------------------------%
 % Copyright (C) 2002-2011 The University of Melbourne.
+% Copyright (C) 2020-2022 The Mercury team.
 % This file may only be copied under the terms of the GNU General
 % Public License - see the file COPYING in the Mercury distribution.
 %---------------------------------------------------------------------------%
@@ -22,6 +23,8 @@
 :- module make.deps_set.
 :- interface.
 
+:- import_module libs.
+:- import_module libs.va_map.
 :- import_module make.dependencies.
 :- import_module make.make_info.
 :- import_module mdbcomp.
@@ -41,6 +44,7 @@
 
 :- type module_index.
 :- instance enum(module_index).
+:- instance va_map_key(module_index).
 
 :- type dependency_file_index.
 :- instance enum(dependency_file_index).
@@ -91,13 +95,13 @@
 
 :- implementation.
 
-:- import_module libs.
 :- import_module parse_tree.
 
 :- import_module int.
 :- import_module maybe.
 :- import_module version_array.
 :- import_module version_hash_table.
+:- import_module uint.
 
 %---------------------------------------------------------------------------%
 %
@@ -107,14 +111,19 @@
 :- type module_index
     --->    module_index(int).
 
-:- type dependency_file_index
-    --->    dependency_file_index(int).
-
 :- instance enum(module_index) where [
     to_int(module_index(I)) = I,
     from_int(I) = module_index(I)
 ].
 
+:- instance va_map_key(module_index) where [
+    ( from_key(module_index(I)) = uint.cast_from_int(I) ),
+    ( to_key(U) = module_index(uint.cast_to_int(U)) )
+].
+
+:- type dependency_file_index
+    --->    dependency_file_index(int).
+
 :- instance enum(dependency_file_index) where [
     to_int(dependency_file_index(I)) = I,
     from_int(I) = dependency_file_index(I)
diff --git a/compiler/va_map.m b/compiler/va_map.m
new file mode 100644
index 000000000..6785dfc47
--- /dev/null
+++ b/compiler/va_map.m
@@ -0,0 +1,139 @@
+%---------------------------------------------------------------------------%
+% vim: ft=mercury ts=4 sw=4 et
+%---------------------------------------------------------------------------%
+% Copyright (C) 2022 YesLogic Pty. Ltd.
+% Copyright (C) 2022 The Mercury team.
+% This file may only be copied under the terms of the GNU General
+% Public License - see the file COPYING in the Mercury distribution.
+%---------------------------------------------------------------------------%
+%
+% File: va_map.m.
+% Main author: wangp.
+%
+% This module defines a data structure on top of version arrays, with a
+% map-like interface. Keys are restricted to values that can be converted to
+% and from a uint.
+%
+% Like a plain version array, it provides O(1) indexing and update when
+% accessing the latest version of the data structure.
+%
+% Unlike a plain version array, the data structure will grow to accomodate keys
+% outside of the existing bounds. This is more like a map.
+%
+% Unlike a map, keys that were not explicitly set are implicitly associated
+% with a default value. This is more like an array.
+%
+% Note: this implementation of va_map is backed by a single contiguous array,
+% so the keys should be mapped to a relatively small range of values starting
+% from zero.
+%
+%---------------------------------------------------------------------------%
+
+:- module libs.va_map.
+:- interface.
+
+:- type va_map(K, V).
+
+    % The keys of a va_map must be convertible to/from a uint.
+    %
+:- typeclass va_map_key(K) where [
+    func from_key(K) = uint,
+    func to_key(uint) = K
+].
+
+    % The value type of a va_map must have a default value.
+    %
+:- typeclass va_map_value(V) where [
+    func default_value = V
+].
+
+    % Create a new va_map.
+    % All keys are implicitly associated with the default value.
+    %
+    % NOTE: the implementation uses "thread-unsafe" version_arrays. It is NOT
+    % safe to access or update copies of the same va_map simultaneously.
+    %
+:- func init = va_map(K, V).
+
+    % Return the value associated with the given key in the map.
+    %
+:- pred lookup(va_map(K, V)::in, K::in, V::out) is det
+    <= (va_map_key(K), va_map_value(V)).
+
+    % Insert or update the key with the given value.
+    %
+:- pred set(K::in, V::in, va_map(K, V)::in, va_map(K, V)::out) is det
+    <= (va_map_key(K), va_map_value(V)).
+
+%---------------------------------------------------------------------------%
+%---------------------------------------------------------------------------%
+
+:- implementation.
+
+:- import_module int.
+:- import_module uint.
+:- import_module version_array.
+
+:- type va_map(K, V)
+    --->    va_map(version_array(V)).
+
+%---------------------------------------------------------------------------%
+
+:- pred key_to_slot(K::in, int::out) is det <= va_map_key(K).
+
+key_to_slot(K, Slot) :-
+    Slot = uint.cast_to_int(from_key(K)).
+
+:- pred slot_to_key(int::in, K::out) is det <= va_map_key(K).
+:- pragma consider_used(pred(slot_to_key/2)).
+
+slot_to_key(Slot, K) :-
+    U = uint.cast_from_int(Slot),
+    K = to_key(U).
+
+%---------------------------------------------------------------------------%
+
+init = va_map(Array) :-
+    Array = version_array.unsafe_empty.
+
+%---------------------------------------------------------------------------%
+
+lookup(VAMap, Key, Value) :-
+    VAMap = va_map(Array),
+    key_to_slot(Key, Slot),
+    ( if Slot < size(Array) then
+        Value = version_array.lookup(Array, Slot)
+    else
+        Value = default_value
+    ).
+
+%---------------------------------------------------------------------------%
+
+set(Key, Value, VAMap0, VAMap) :-
+    VAMap0 = va_map(Array0),
+    key_to_slot(Key, Slot),
+    maybe_grow_array(Slot, Array0, Array1),
+    version_array.set(Slot, Value, Array1, Array),
+    VAMap = va_map(Array).
+
+:- pred maybe_grow_array(int::in, version_array(V)::in, version_array(V)::out)
+    is det <= va_map_value(V).
+
+maybe_grow_array(Slot, Array0, Array) :-
+    Size0 = size(Array0),
+    ( if Slot >= Size0 then
+        NewSize = roundup_array_size(Slot + 1),
+        version_array.resize(NewSize, default_value, Array0, Array)
+    else
+        Array = Array0
+    ).
+
+:- func roundup_array_size(int) = int.
+
+roundup_array_size(N) =
+    % Round up to next multiple of 512 (arbitrarily).
+    (N + 511) // 512 * 512.
+
+%---------------------------------------------------------------------------%
+:- end_module libs.va_map.
+%---------------------------------------------------------------------------%


More information about the reviews mailing list