[m-dev.] for review: cache references to free(alias) variables in the code generator

Zoltan Somogyi zs at cs.mu.OZ.AU
Wed Aug 19 16:24:25 AEST 1998


> Implement caching of references to free variables in the code generator.
> E.g. after unification 
> 
> 	Y = f(X)
> 
> if Y is live, we don't need to keep a reference to X, we just need
> to remember that the X is located in the first field of Y.

Do you have a test case that shows that this scheme yields better code?

> +       % code_exprn__uncache_references(Vars, Code, ExprnInfo0, ExprnInfo)
> +       %       Output Code to produce references to any variables that 
> +       %       are cached as references to fields of variables in Vars.
> +       %       Update the references in the ExprnInfo to show that
> +       %       these references have been evaluated.
> +:- pred code_exprn__uncache_references(set(var), code_tree,
> +               exprn_info, exprn_info).
> +:- mode code_exprn__uncache_references(in, out, in, out) is det.

What does "Output Code to produce references" mean?

> +%      Locations where the value needs to be placed.  Each lval in a
> +%      set(lval) should point to the same place.  The bool is yes if the
> +%      value is already in this location, otherwise it is no.
> +:- type var_refs == set(pair(bool, var_ref_stat)).

I don't know what this means. What lval in what set(lval)?

Since I don't know the semantics of this key data structure, I can't evaluate
the correctness of several predicates in the change.

> +:- type var_ref_stat
> +	--->	evaled(set(lval))
> +	;	cached(
> +			var,	% Var where this reference occurs.
> +			tag,	% Tag to use for dereferencing Var.
> +			int	% Field number of reference.
> +		).
> +	%;	list(set(rval)).	% AAA  A linked list of references.

The last line should be deleted or fixed (you don't want `list'
to be the third alternative in the type).

>  code_exprn__filter_out_reg_depending_refs(Refs0, Vars, Refs) :-
>  	set__to_sorted_list(Refs0, RefList0),
>  	list__map(lambda([Ref0::in, Ref::out] is det,
> -		(
> -			Ref0 = Placed - Lvals0,
> +		( Ref0 = Placed - evaled(Lvals0),
>  			set__to_sorted_list(Lvals0, LvalList0),
>  			list__filter(lambda([Lval::in] is semidet,
>  				\+ code_exprn__lval_depends_on_reg(Lval, Vars)),
>  				LvalList0, LvalList),
>  			set__list_to_set(LvalList, Lvals),
> -			Ref = Placed - Lvals
> +			Ref = Placed - evaled(Lvals)
> +		; Ref0 = _ - cached(_, _, _),
> +			Ref = Ref0
>  		)), RefList0, RefList),
>  	set__list_to_set(RefList, Refs).

Why is it ok to ignore the possibility that the cached ref is to a variable
that is stored in an Lval for which \+ lval_depends_on_reg may fail?

Other switches in this module are not formatted like this.

> +code_exprn__cache_reference(Var, LHSVar, Tag, FieldNum) -->
> +	code_exprn__get_vars(Vars0),
> +	{ RefStat = no - cached(LHSVar, Tag, FieldNum) },
> +	{ map__search(Vars0, Var, var_info(Locs0, Stat0)) ->
> +		Stat = Stat0,
> +		set__insert(Locs0, RefStat, Locs)
> +	;
> +		Stat = none,
> +		set__singleton_set(Locs, RefStat)
> +	},
> +	{ map__set(Vars0, Var, var_info(Locs, Stat), Vars) },
> +	code_exprn__set_vars(Vars).
> +

> @@ -1247,9 +1297,12 @@
>  	set__to_sorted_list(Refs0, RefsList0),
>  	Filter = lambda([X::in] is semidet,
>  		( 
> -			X = _ - Lvals1,
> -			set__member(L, Lvals0),
> -			set__member(L, Lvals1)
> +			X = _ - RefStat,
> +			( RefStat = evaled(Lvals1),
> +				set__member(L, Lvals0),
> +				set__member(L, Lvals1)
> +			; RefStat = cached(_, _, _)
> +			)
>  		)),
>  	list__filter(Filter, RefsList0, RefsList),
>  	set__sorted_list_to_set(RefsList, Refs).

The variable X should have a more expressive name.

Other switches in this module are not formatted like this.

> @@ -1259,24 +1312,39 @@
>  	{ map__lookup(Vars0, Var, var_info(Refs0, Stat)) },
>  
>  	% Remove an lval from the set and recursively call place_var.
> -	( { get_next_ref_to_place(Refs0, Lvals, Refs1) } ->
> -		{ code_exprn__remove_lval_references(Refs1, Lvals, Refs2) },
> -		{ set__insert(Refs2, yes - Lvals, Refs) },
> -		{ map__det_update(Vars0, Var, var_info(Refs, Stat), Vars) },
> +	( { get_next_ref_to_place(Refs0, RefStat, Refs1) } ->
> +		( { RefStat = evaled(Lvals) },
> +			{ code_exprn__remove_lval_references(Refs1, Lvals,
> +				Refs2) },
> +			{ code_exprn__select_lval(Lvals, Lval) },
> +			{ Location = mem_ref(lval(Lval)) },
> +			{ Code0 = empty }
> +		; { RefStat = cached(LVar, Tag, FieldNum) },
> +			% There is no need to evaluate the ref, so leave it
> +			% cached and place the value of Var directly into
> +			% the cached location.
> +			code_exprn__produce_var(LVar, Rval, Code0),
> +			{ Location = field(yes(Tag), Rval,
> +				const(int_const(FieldNum))) },
> +			{ Refs2 = Refs1 }
> +		),
> +		code_exprn__get_vars(Vars1),
> +		{ set__insert(Refs2, yes - RefStat, Refs) },
> +		{ map__det_update(Vars1, Var, var_info(Refs, Stat), Vars) },
>  		code_exprn__set_vars(Vars),
> -		{ code_exprn__select_lval(Lvals, Lval) },
> -		code_exprn__place_var(Var, mem_ref(lval(Lval)), Code)
> +		code_exprn__place_var(Var, Location, Code1),
> +		{ Code = tree(Code0, Code1) }
>  	;
>  		{ Code = empty }
>  	).

I don't understand what this predicate, place_var_in_references, is supposed
to do. The comment about "no need to evaluate the ref" (what ref?) is followed
by code that does evaluation, which is especially confusing.

Why is the call to place_var instead of place_var_in_references?

> +code_exprn__uncache_references(Lvars, Code) -->
> +	code_exprn__get_vars(VarMap0),
> +	{ map__keys(VarMap0, Vars) },
> +	{ set__to_sorted_list(Lvars, LvarsList) },
> +	tree__construct(code_exprn__uncache_references_2(LvarsList),
> +		Vars, Code).
> +
> +:- pred code_exprn__uncache_references_2(list(var), var, code_tree,
> +		exprn_info, exprn_info).
> +:- mode code_exprn__uncache_references_2(in, in, out, in, out) is det.
> +
> +code_exprn__uncache_references_2(Lvars, Var, Code) -->
> +	tree__construct(code_exprn__uncache_reference(Var), Lvars, Code).

This code should be adjacent to the code of code_exprn__uncache_reference.

> +:- pred code_exprn__uncache_reference(var, var, code_tree,
> +		exprn_info, exprn_info).
> +:- mode code_exprn__uncache_reference(in, in, out, in, out) is det.
> +
> +code_exprn__uncache_reference(Var, Lvar, Code) -->
> +	code_exprn__get_vars(VarMap0),
> +	{ map__lookup(VarMap0, Var, var_info(Refs0, Stat)) },
> +
> +	% Removed the cached references from the var_info.
> +	{ set__to_sorted_list(Refs0, RefsList0) },
> +	{ list__filter(lambda([RefStat::in] is semidet, 
> +		RefStat = _ - cached(Lvar, _, _)), RefsList0, CachedRefs,
> +		RefsList1) },
> +	{ set__sorted_list_to_set(RefsList1, Refs1) },
> +	{ map__det_update(VarMap0, Var, var_info(Refs1, Stat), Vars1) },
> +	code_exprn__set_vars(Vars1),
> +
> +	tree__construct(code_exprn__uncache_reference_2(Lvar, Var), CachedRefs,
> +		Code).

I don't see how this works.

> +:- pred code_exprn__uncache_reference_2(var, var, pair(bool, var_ref_stat),
> +		code_tree, exprn_info, exprn_info).
> +:- mode code_exprn__uncache_reference_2(in, in, in, out, in, out) is det.
> +
> +code_exprn__uncache_reference_2(Lvar, Var, Placed - RefStat0, Code) -->
> +	( { RefStat0 = cached(Lvar, Tag, FieldNum) } ->
> +		code_exprn__select_preferred_reg(Var, PrefLval),
> +		( { PrefLval = reg(PrefRegType, PrefRegNum) } ->
> +			code_exprn__acquire_reg_prefer_given(PrefRegType,
> +				PrefRegNum, Lval)
> +		;
> +			{ Lval = PrefLval }
> +		),
> +		code_exprn__get_vars(Vars0),
> +		{ map__lookup(Vars0, Var, var_info(Refs0, Stat)) },
> +		{ set__singleton_set(Lvals, Lval) },
> +		{ set__insert(Refs0, Placed - evaled(Lvals), Refs) },
> +		{ map__det_update(Vars0, Var, var_info(Refs, Stat), Vars) },
> +		code_exprn__set_vars(Vars),
> +		code_exprn__uncache_reference_in_lval(Lvar, Tag, FieldNum, Lval,
> +			Code)
> +	;
> +		{ error("code_exprn__uncache_reference_2: oops") }
> +	).

Invoking set_vars to reflect a change before invoking uncache_reference_in_lval
to make the change is dangerous.

> +:- pred code_exprn__uncache_reference_in_lval(var, tag, int, lval,
> +		code_tree, exprn_info, exprn_info).
> +:- mode code_exprn__uncache_reference_in_lval(in, in, in, in, out, in, out)
> +		is det.
> +
> +code_exprn__uncache_reference_in_lval(Lvar, Tag, FieldNum, Lval, Code) -->
> +	code_exprn__produce_var(Lvar, Rval, Code0),
> +	{ Code1 = node([assign(Lval,
> +		mem_addr(heap_ref(Rval, Tag, FieldNum))) -
> +		"place reference"]) },
> +	{ Code = tree(Code0, Code1) }.

Can't you hoist produce_var out of the loop driven by tree__construct?

>  	% Update the code info structure to be consistent
> -	% immediately after generating a goal.
> -code_info__post_goal_update(GoalInfo) -->
> +	% immediately after generating a goal
> +code_info__post_goal_update(GoalInfo, Code) -->

Incomplete sentence.

>  	% note: we must be careful to apply deaths before births
>  	{ goal_info_get_post_deaths(GoalInfo, PostDeaths) },
> +	code_info__uncache_references(PostDeaths, Code),
>  	code_info__rem_forward_live_vars(PostDeaths),
>  	code_info__make_vars_forward_dead(PostDeaths),
>  	{ goal_info_get_post_births(GoalInfo, PostBirths) },

Why do you uncache references only for post-deaths, and not also for
pre-deaths in pre_goal_update?

> Index: middle_rec.m
> +	code_info__post_goal_update(SwitchGoalInfo, UpdateCode),
> +		% UpdateCode should be empty for switches because they
> +		% are not atomic goals.
> +	{ tree__is_empty(UpdateCode) ->
> +		true
> +	;
> +		error("middle_rec__generate_switch: UpdateCode not empty")
> +	},

This would be simpler as require(tree__is_empty(UpdateCode), "...").

I would like to see another diff.

Zoltan.



More information about the developers mailing list