[m-rev.] diff: [CTGC] keep datastructs normalised

Peter Wang novalazy at gmail.com
Wed May 28 15:27:45 AEST 2008


Branches: main

Speed up structure sharing and reuse analyses by keeping `datastruct'
selectors normalised.  A lot of time was spent normalising selectors in
inner loops.

compiler/ctgc.datastruct.m:
	Make `datastruct_termshift' normalise the selector in the "shifted"
	datastruct.

	Delete some unneeded procedures and clean up some code.

compiler/ctgc.livedata.m:
	Conform to the change to `datastruct_termshift'.

compiler/ctgc.selector.m:
	Make `selector.subsumed_by' take an argument which says whether its
	selector input arguments need normalisation.

compiler/prog_data.m:
	Add a comment that datastruct selectors are normalised.

compiler/structure_sharing.domain.m:
	Try to normalise selectors outside of loops.  Call
	`selector.subsumed_by' with pre-normalised selectors where possible.

	Add some assertions to check that selectors are normalised where they
	would have been explicitly normalised before.

	Fix a spot where we found solutions for all elements in a list when we
	only wanted wanted the first solution.

Index: compiler/ctgc.datastruct.m
===================================================================
RCS file: /home/mercury/mercury1/repository/mercury/compiler/ctgc.datastruct.m,v
retrieving revision 1.12
diff -u -r1.12 ctgc.datastruct.m
--- compiler/ctgc.datastruct.m	28 Apr 2008 07:23:12 -0000	1.12
+++ compiler/ctgc.datastruct.m	28 May 2008 05:09:30 -0000
@@ -49,9 +49,8 @@
     % It is assumed that the selector is a valid selector for that
     % datastructure.
     %
-:- pred datastruct_termshift(selector::in, datastruct::in, datastruct::out)
-    is det.
-:- func datastruct_termshift(selector, datastruct) = datastruct.
+:- func datastruct_termshift(module_info, proc_info, selector, datastruct)
+    = datastruct.
 
     % Normalize the representation of the datastructure.
     % (proc_info is needed to obtain the type of the variable of the
@@ -62,14 +61,6 @@
     %
 :- func normalize_datastruct(module_info, proc_info, datastruct) = datastruct.
 
-    % Normalize the representation of the datastructure using its
-    % type information.
-    %
-:- pred normalize_datastruct_with_type_information(module_info::in, 
-    mer_type::in, datastruct::in, datastruct::out) is det.
-:- func normalize_datastruct_with_type_information(module_info, mer_type,
-    datastruct) = datastruct.
-
 :- pred datastruct_subsumed_by_return_selector(module_info::in, proc_info::in,
     datastruct::in, datastruct::in, selector::out) is semidet.
 
@@ -122,37 +113,39 @@
     DSel = Data ^ sc_selector,
     DSel = [].
 
-datastruct_termshift(Sel, Data0, Data) :-
-    DSel = Data0 ^ sc_selector,
-    selector_termshift(DSel, Sel, NewSel),
-    Data = Data0 ^ sc_selector := NewSel.
-
-datastruct_termshift(Sel, Data0) = Data :-
-    datastruct_termshift(Sel, Data0, Data).
-
-normalize_datastruct_with_type_information(ModuleInfo, Type, !Datastruct) :-
-    DSel0 = !.Datastruct ^ sc_selector,
-    normalize_selector_with_type_information(ModuleInfo, Type, DSel0, DSel),
-    !:Datastruct = !.Datastruct ^ sc_selector := DSel.
+datastruct_termshift(ModuleInfo, ProcInfo, Sel, Data0) = Data :-
+    (
+        Sel = [],
+        Data = Data0
+    ;
+        Sel = [_ | _],
+        Data0 = selected_cel(Var, DSel),
+        selector_termshift(DSel, Sel, NewSel0),
+
+        % Keep datastruct seletors normalized.
+        proc_info_get_vartypes(ProcInfo, VarTypes),
+        map.lookup(VarTypes, Var, Type),
+        normalize_selector_with_type_information(ModuleInfo, Type,
+            NewSel0, NewSel),
 
-normalize_datastruct_with_type_information(ModuleInfo, Type, Data0) = Data :-
-    normalize_datastruct_with_type_information(ModuleInfo, Type, Data0, Data).
+        Data = selected_cel(Var, NewSel)
+    ).
 
 normalize_datastruct(ModuleInfo, ProcInfo, Data0) = Data :-
-    Var = Data0 ^ sc_var,
+    Data0 = selected_cel(Var, DSel0),
     proc_info_get_vartypes(ProcInfo, VarTypes),
     map.lookup(VarTypes, Var, Type),
-    Data = normalize_datastruct_with_type_information(ModuleInfo, Type, Data0).
+    normalize_selector_with_type_information(ModuleInfo, Type, DSel0, DSel),
+    Data = selected_cel(Var, DSel).
 
 datastruct_subsumed_by_return_selector(ModuleInfo, ProcInfo, Data1, Data2,
         Extension) :-
-    Var = Data1 ^ sc_var,
-    Var = Data2 ^ sc_var,
-    Sel1 = Data1 ^ sc_selector,
-    Sel2 = Data2 ^ sc_selector,
+    Data1 = selected_cel(Var, Sel1),
+    Data2 = selected_cel(Var, Sel2),
     proc_info_get_vartypes(ProcInfo, VarTypes),
     map.lookup(VarTypes, Var, Type),
-    ctgc.selector.subsumed_by(ModuleInfo, Sel1, Sel2, Type, Extension).
+    selector.subsumed_by(ModuleInfo, already_normalized, Sel1, Sel2, Type,
+        Extension).
 
 datastruct_subsumed_by(ModuleInfo, ProcInfo, Data1, Data2) :-
     datastruct_subsumed_by_return_selector(ModuleInfo, ProcInfo, Data1, Data2,
@@ -185,13 +178,12 @@
 datastructs_subsume_datastruct(ModuleInfo, ProcInfo, Datastructs, Data):- 
     datastruct_subsumed_by_list(ModuleInfo, ProcInfo, Data, Datastructs).
 
-datastruct_apply_widening(ModuleInfo, ProcInfo, !Data) :-
-    Var = !.Data ^ sc_var,
-    Sel0 = !.Data ^ sc_selector,
+datastruct_apply_widening(ModuleInfo, ProcInfo, Data0, Data) :-
+    Data0 = selected_cel(Var, Sel0),
     proc_info_get_vartypes(ProcInfo, VarTypes),
     map.lookup(VarTypes, Var, Type),
     selector_apply_widening(ModuleInfo, Type, Sel0, Sel),
-    !:Data = datastruct_init_with_selector(Var, Sel).
+    Data = selected_cel(Var, Sel).
 
 datastruct_lists_least_upper_bound(ModuleInfo, ProcInfo, Data1, Data2) 
         = Data :- 
Index: compiler/ctgc.livedata.m
===================================================================
RCS file: /home/mercury/mercury1/repository/mercury/compiler/ctgc.livedata.m,v
retrieving revision 1.6
diff -u -r1.6 ctgc.livedata.m
--- compiler/ctgc.livedata.m	28 Apr 2008 07:23:12 -0000	1.6
+++ compiler/ctgc.livedata.m	28 May 2008 05:09:30 -0000
@@ -340,7 +340,7 @@
 		% Each sx1 = sx.s2, where s2 is one of SelectorList.
 		list.map(
             (pred(S2::in, Xsx1::out) is det :-
-                datastruct_termshift(S2, Xsx, Xsx1)
+                datastruct_termshift(ModuleInfo, ProcInfo, S2, Xsx) = Xsx1
             ), SelectorList, List_Xsx1)
 	).
 
Index: compiler/ctgc.selector.m
===================================================================
RCS file: /home/mercury/mercury1/repository/mercury/compiler/ctgc.selector.m,v
retrieving revision 1.14
diff -u -r1.14 ctgc.selector.m
--- compiler/ctgc.selector.m	22 May 2008 04:11:29 -0000	1.14
+++ compiler/ctgc.selector.m	28 May 2008 05:09:30 -0000
@@ -23,6 +23,10 @@
 
 %-----------------------------------------------------------------------------%
 
+:- type normalization
+    --->    need_normalization
+    ;       already_normalized.
+
     % Create a selector as either the top selector, a term selector,
     % or a type selector.
     %
@@ -42,8 +46,8 @@
     %
     % The type specifies the type of the term to which the selectors refer.
     %
-:- pred subsumed_by(module_info::in, selector::in, selector::in,
-    mer_type::in, selector::out) is semidet.
+:- pred subsumed_by(module_info::in, normalization::in,
+    selector::in, selector::in, mer_type::in, selector::out) is semidet.
 
     % Using the type information of the variable to which the given selector
     % belongs, normalize that selector.
@@ -110,10 +114,19 @@
 selector_termshift(S1, S2, S) :-
     list.append(S1, S2, S).
 
-subsumed_by(ModuleInfo, S1, S2, MainType, Extension) :-
+subsumed_by(ModuleInfo, Normalization, S1, S2, MainType, Extension):-
     % First make sure that both selectors are in a normalized form.
-    normalize_selector_with_type_information(ModuleInfo, MainType, S1, NormS1),
-    normalize_selector_with_type_information(ModuleInfo, MainType, S2, NormS2),
+    (
+        Normalization = already_normalized,
+        NormS1 = S1,
+        NormS2 = S2
+    ;
+        Normalization = need_normalization,
+        normalize_selector_with_type_information(ModuleInfo, MainType,
+            S1, NormS1),
+        normalize_selector_with_type_information(ModuleInfo, MainType,
+            S2, NormS2)
+    ),
     (
         only_term_selectors(NormS1),
         only_term_selectors(NormS2)
Index: compiler/prog_data.m
===================================================================
RCS file: /home/mercury/mercury1/repository/mercury/compiler/prog_data.m,v
retrieving revision 1.211
diff -u -r1.211 prog_data.m
--- compiler/prog_data.m	3 Apr 2008 05:26:45 -0000	1.211
+++ compiler/prog_data.m	28 May 2008 05:09:30 -0000
@@ -383,7 +383,8 @@
 :- type structure_sharing_pair == pair(datastruct).
 
     % A datastructure is a concept that designates a particular subterm of the
-    % term to which a particular variable may be bound.
+    % term to which a particular variable may be bound.  The selector is
+    % normalized.
     %
 :- type datastruct
     --->    selected_cel(
Index: compiler/structure_sharing.domain.m
===================================================================
RCS file: /home/mercury/mercury1/repository/mercury/compiler/structure_sharing.domain.m,v
retrieving revision 1.33
diff -u -r1.33 structure_sharing.domain.m
--- compiler/structure_sharing.domain.m	9 May 2008 05:45:11 -0000	1.33
+++ compiler/structure_sharing.domain.m	28 May 2008 05:09:31 -0000
@@ -1189,7 +1189,7 @@
 
     % combine it all:
     ResultSharingSet = sharing_set_least_upper_bound_list(ModuleInfo, ProcInfo,
-        [NewSharingSet, OldSharingSet, OldNewSharingSet, NewOldNewSharingSet]).
+        [NewSharingSet, OldSharingSet, OldNewSharingSet], NewOldNewSharingSet).
 
     % XXX new implementation.
     %
@@ -1311,11 +1311,12 @@
     new_entries(ModuleInfo, ProcInfo, Pairs, Set, Union).
 
 :- func sharing_set_least_upper_bound_list(module_info, proc_info,
-    list(sharing_set)) = sharing_set.
+    list(sharing_set), sharing_set) = sharing_set.
 
-sharing_set_least_upper_bound_list(ModuleInfo, ProcInfo, ListSharingSet) =
+sharing_set_least_upper_bound_list(ModuleInfo, ProcInfo, ListSharingSet,
+        SharingSet0) =
     list.foldl(sharing_set_least_upper_bound(ModuleInfo, ProcInfo),
-        ListSharingSet, sharing_set_init).
+        ListSharingSet, SharingSet0).
 
 sharing_set_extend_datastruct(ModuleInfo, ProcInfo, Datastruct, SharingSet)
         = [Datastruct | Datastructures] :-
@@ -1329,7 +1330,7 @@
         proc_info_get_vartypes(ProcInfo, VarTypes),
         map.lookup(VarTypes, Var, VarType),
         Datastructures = selector_sharing_set_extend_datastruct(ModuleInfo,
-            VarType, Selector, SelectorSet)
+            ProcInfo, VarType, Selector, SelectorSet)
     ;
         Datastructures = []
     ).
@@ -1471,10 +1472,15 @@
     SharingSet = sharing_set(_, SharingMap),
 
     SharingPair = Data1 - Data2,
-    Var1 = Data1 ^ sc_var,
-    Sel1 = Data1 ^ sc_selector,
-    Var2 = Data2 ^ sc_var,
-    Sel2 = Data2 ^ sc_selector,
+    Data1 = selected_cel(Var1, Sel1),
+    Data2 = selected_cel(Var2, Sel2),
+    trace [
+        compile_time(flag("check_selector_normalization")),
+        run_time(env("check_selector_normalization"))
+    ] (
+        check_normalized(ModuleInfo, Type1, Sel1),
+        check_normalized(ModuleInfo, Type2, Sel2)
+    ),
 
     proc_info_get_vartypes(ProcInfo, VarTypes),
     map.lookup(VarTypes, Var1, Type1),
@@ -1484,29 +1490,36 @@
     SelSharingSet = selector_sharing_set(_, SelSharingMap),
     map.keys(SelSharingMap, SelectorList),
 
-        % Find at least one selector in SelectorList that is more general
-        % than Sel1 (with a specific extension), and whose associated dataset
-        % contains at least one datastructure that is more general than Data2
-        % (with that same extension).
-    list.find_first_map(
-        pred(Sel::in, Data::out) is semidet :-
-        (
-            subsumed_by(ModuleInfo, Sel1, Sel, Type1, Extension),
-            map.search(SelSharingMap, Sel, DataSet),
-            DataSet = datastructures(_, DatastructureSet),
-            MatchedDatastructs = list.filter(
-                pred(Datastructure::in) is semidet :-
-                (
-                    Var2 = Datastructure ^ sc_var,
-                    ctgc.selector.subsumed_by(ModuleInfo, Sel2,
-                        Datastructure ^ sc_selector, Type2, Extension)
-                ),
-                to_sorted_list(DatastructureSet)),
-            % The list of matched datastructures contains at least one element.
-            MatchedDatastructs = [Data| _]
+    % Find at least one selector in SelectorList that is more general
+    % than Sel1 (with a specific extension), and whose associated dataset
+    % contains at least one datastructure that is more general than Data2
+    % (with that same extension).
+    some [Sel] (
+        list.member(Sel, SelectorList),
+        trace [
+            compile_time(flag("check_selector_normalization")),
+            run_time(env("check_selector_normalization"))
+        ] (
+            check_normalized(ModuleInfo, Type1, Sel)
         ),
-        SelectorList,
-        _).
+        selector.subsumed_by(ModuleInfo, already_normalized,
+            Sel1, Sel, Type1, Extension),
+        map.search(SelSharingMap, Sel, datastructures(_, DatastructureSet)),
+
+        some [Datastructure] (
+            set.member(Datastructure, DatastructureSet),
+            Var2 = Datastructure ^ sc_var,
+            DatastructureSel = Datastructure ^ sc_selector,
+            trace [
+                compile_time(flag("check_selector_normalization")),
+                run_time(env("check_selector_normalization"))
+            ] (
+                check_normalized(ModuleInfo, Type2, DatastructureSel)
+            ),
+            selector.subsumed_by(ModuleInfo, already_normalized,
+                Sel2, DatastructureSel, Type2, Extension)
+        )
+    ).
 
     % Return the list of sharing pairs included in the sharing set that are
     % less or equal to the given sharing pair.
@@ -1520,10 +1533,15 @@
     SharingSet = sharing_set(_, SharingMap),
 
     SharingPair = Data1 - Data2,
-    Var1 = Data1 ^ sc_var,
-    Sel1 = Data1 ^ sc_selector,
-    Var2 = Data2 ^ sc_var,
-    Sel2 = Data2 ^ sc_selector,
+    Data1 = selected_cel(Var1, Sel1),
+    Data2 = selected_cel(Var2, Sel2),
+    trace [
+        compile_time(flag("check_selector_normalization")),
+        run_time(env("check_selector_normalization"))
+    ] (
+        check_normalized(ModuleInfo, Type1, Sel1),
+        check_normalized(ModuleInfo, Type2, Sel2)
+    ),
 
     proc_info_get_vartypes(ProcInfo, VarTypes),
     map.lookup(VarTypes, Var1, Type1),
@@ -1539,18 +1557,29 @@
         %
         map.keys(SelSharingMap, SelectorList),
         list.filter_map(
-            pred(Selector::in, SPairs::out) is semidet :-
-            (
-                ctgc.selector.subsumed_by(ModuleInfo, Selector, Sel1,
-                    Type1, Extension),
+            (pred(Selector::in, SPairs::out) is semidet :-
+                trace [
+                    compile_time(flag("check_selector_normalization")),
+                    run_time(env("check_selector_normalization"))
+                ] (
+                    check_normalized(ModuleInfo, Type1, Selector)
+                ),
+                selector.subsumed_by(ModuleInfo, already_normalized,
+                    Selector, Sel1, Type1, Extension),
                 map.search(SelSharingMap, Selector, Dataset),
                 Dataset = datastructures(_, Datastructs),
                 list.filter_map(
-                    pred(D::in, Pair::out) is semidet :-
-                    (
+                    (pred(D::in, Pair::out) is semidet :-
                         Var2 = D ^ sc_var,
-                        ctgc.selector.subsumed_by(ModuleInfo, D ^ sc_selector,
-                            Sel2, Type2, Extension),
+                        DSel = D ^ sc_selector,
+                        trace [
+                            compile_time(flag("check_selector_normalization")),
+                            run_time(env("check_selector_normalization"))
+                        ] (
+                            check_normalized(ModuleInfo, Type2, DSel)
+                        ),
+                        selector.subsumed_by(ModuleInfo, already_normalized,
+                            DSel, Sel2, Type2, Extension),
                         Pair = datastruct_init_with_selector(Var1, Selector)
                             - D
                     ),
@@ -1578,6 +1607,16 @@
         remove_swapped_dup_pairs(T, [H | Acc0], Acc)
     ).
 
+:- pred check_normalized(module_info::in, mer_type::in, selector::in) is det.
+
+check_normalized(ModuleInfo, Type, Sel) :-
+    normalize_selector_with_type_information(ModuleInfo, Type, Sel, Norm),
+    ( Sel = Norm ->
+        true
+    ;
+        unexpected(this_file, "check_normalized: unnormalized selector")
+    ).
+
 :- pred new_entries(module_info::in, proc_info::in, structure_sharing::in,
     sharing_set::in, sharing_set::out) is det.
 
@@ -1720,7 +1759,7 @@
 :- func selector_sharing_set_altclos(module_info, proc_info, mer_type,
     selector_sharing_set, selector_sharing_set) = structure_sharing.
 
-:- func selector_sharing_set_extend_datastruct(module_info,
+:- func selector_sharing_set_extend_datastruct(module_info, proc_info,
     mer_type, selector, selector_sharing_set) = list(datastruct).
 
 :- pred selector_sharing_set_apply_widening(module_info::in, proc_info::in,
@@ -1855,7 +1894,7 @@
 :- func basic_closure(module_info, proc_info, mer_type,
     data_set, data_set, selector, selector) = structure_sharing.
 
-basic_closure(ModuleInfo, _ProcInfo, Type, NewDataSet, OldDataSet,
+basic_closure(ModuleInfo, ProcInfo, Type, NewDataSet, OldDataSet,
         NewSel, OldSel) = SharingPairs :-
     % three cases:
     % 1. NewSel <= OldSel then generate sharing pairs.
@@ -1864,16 +1903,20 @@
 
     (
         % NewSel <= OldSel ie, \exists Extension: OldSel.Extension = NewSel.
-        ctgc.selector.subsumed_by(ModuleInfo, NewSel, OldSel, Type, Extension)
+        selector.subsumed_by(ModuleInfo, already_normalized, NewSel, OldSel,
+            Type, Extension)
     ->
-        data_set_termshift(OldDataSet, Extension, TermShiftedOldDataSet),
+        data_set_termshift(ModuleInfo, ProcInfo, OldDataSet, Extension,
+            TermShiftedOldDataSet),
         SharingPairs = data_set_directed_closure(TermShiftedOldDataSet,
             NewDataSet)
     ;
         % OldSel <= NewSel ie, \exists Extension: NewSel.Extension = OldSel.
-        ctgc.selector.subsumed_by(ModuleInfo, OldSel, NewSel, Type, Extension)
+        selector.subsumed_by(ModuleInfo, already_normalized, OldSel, NewSel,
+            Type, Extension)
     ->
-        data_set_termshift(NewDataSet, Extension, TermShiftedNewDataSet),
+        data_set_termshift(ModuleInfo, ProcInfo, NewDataSet, Extension,
+            TermShiftedNewDataSet),
         SharingPairs = data_set_directed_closure(TermShiftedNewDataSet,
             OldDataSet)
     ;
@@ -1881,28 +1924,28 @@
         SharingPairs = []
     ).
 
-selector_sharing_set_extend_datastruct(ModuleInfo, VarType, Selector,
+selector_sharing_set_extend_datastruct(ModuleInfo, ProcInfo, VarType, Selector,
         SelectorSharingSet) = Datastructures :-
     SelectorSharingSet = selector_sharing_set(_, SelectorMap),
-    DoExtend = selector_sharing_set_extend_datastruct_2(ModuleInfo, VarType,
-        Selector),
+    DoExtend = selector_sharing_set_extend_datastruct_2(ModuleInfo, ProcInfo,
+        VarType, Selector),
     Datastructures0 = map.map_values(DoExtend, SelectorMap),
     Datastructures = list.condense(map.values(Datastructures0)).
 
-:- func selector_sharing_set_extend_datastruct_2(module_info,
+:- func selector_sharing_set_extend_datastruct_2(module_info, proc_info,
     mer_type, selector, selector, data_set) = list(datastruct).
 
-selector_sharing_set_extend_datastruct_2(ModuleInfo, VarType, BaseSelector,
-        Selector, Dataset0) = Datastructures :-
+selector_sharing_set_extend_datastruct_2(ModuleInfo, ProcInfo, VarType,
+        BaseSelector, Selector, Dataset0) = Datastructures :-
     % If Selector is more general than BaseSelector, i.e.
     % BaseSelector = Selector.Extension, apply this extension
     % to all the datastructs associated with Selector, and add them
     % to the set of datastructs collected.
     (
-        ctgc.selector.subsumed_by(ModuleInfo, BaseSelector,
+        selector.subsumed_by(ModuleInfo, need_normalization, BaseSelector,
             Selector, VarType, Extension)
     ->
-        data_set_termshift(Dataset0, Extension, Dataset),
+        data_set_termshift(ModuleInfo, ProcInfo, Dataset0, Extension, Dataset),
         Dataset = datastructures(_, Data),
         Datastructures = set.to_sorted_list(Data)
     ;
@@ -1963,7 +2006,8 @@
 :- pred data_set_rename(prog_var_renaming::in, tsubst::in,
     data_set::in, data_set::out) is det.
 
-:- pred data_set_termshift(data_set::in, selector::in, data_set::out) is det.
+:- pred data_set_termshift(module_info::in, proc_info::in, data_set::in,
+    selector::in, data_set::out) is det.
 
 :- pred data_set_add(data_set::in, data_set::in, data_set::out) is det.
 
@@ -2005,9 +2049,9 @@
     Datastructs = set.map(rename_datastruct(Dict, Subst), Datastructs0),
     !:DataSet = datastructures(set.count(Datastructs), Datastructs).
 
-data_set_termshift(DataSet0, Selector, DataSet) :-
+data_set_termshift(ModuleInfo, ProcInfo, DataSet0, Selector, DataSet) :-
     DataSet0 = datastructures(Size, Set0),
-    Set = set.map(datastruct_termshift(Selector), Set0),
+    Set = set.map(datastruct_termshift(ModuleInfo, ProcInfo, Selector), Set0),
     DataSet = datastructures(Size, Set).
 
 data_set_add(DataSetA, DataSetB, DataSet) :-


--------------------------------------------------------------------------
mercury-reviews mailing list
Post messages to:       mercury-reviews at csse.unimelb.edu.au
Administrative Queries: owner-mercury-reviews at csse.unimelb.edu.au
Subscriptions:          mercury-reviews-request at csse.unimelb.edu.au
--------------------------------------------------------------------------



More information about the reviews mailing list