[m-dev.] for review: deep profiling changes [part 3/3]

Thomas Conway conway at cs.mu.OZ.AU
Sat Feb 26 15:27:26 AEDT 2000


Index: runtime/mercury_wrapper.h
===================================================================
RCS file: /home/mercury1/repository/mercury/runtime/mercury_wrapper.h,v
retrieving revision 1.28
diff -u -r1.28 mercury_wrapper.h
--- runtime/mercury_wrapper.h	2000/02/08 02:08:07	1.28
+++ runtime/mercury_wrapper.h	2000/02/25 23:40:17
@@ -53,6 +53,10 @@
 ** the corresponding foo.
 */
 extern	Code 		*program_entry_point; /* normally mercury__main_2_0; */
+#ifdef MR_PROFILE_DEEP
+extern	MR_Stack_Layout_Entry *program_entry_layout;
+			/* normally mercury_data__layout__mercury__main_2_0; */
+#endif
 
 extern	void		(*MR_library_initializer)(void);
 extern	void		(*MR_library_finalizer)(void);
Index: util/mkinit.c
===================================================================
RCS file: /home/mercury1/repository/mercury/util/mkinit.c,v
retrieving revision 1.62
diff -u -r1.62 mkinit.c
--- util/mkinit.c	2000/02/12 15:53:49	1.62
+++ util/mkinit.c	2000/02/25 23:40:19
@@ -115,6 +115,10 @@
 	"\n"
 	"Declare_entry(%s);\n"
 	"\n"
+	"#ifdef MR_PROFILE_DEEP\n"
+	"extern MR_Stack_Layout_Entry mercury_data__layout__%s;\n"
+	"#endif\n"
+	"\n"
 	"#ifdef CONSERVATIVE_GC\n"
 	"extern char *GC_stackbottom;\n"
 	"#endif\n"
@@ -198,6 +202,10 @@
 	"#endif\n"
 	"	program_entry_point = ENTRY(%s);\n"
 	"\n"
+	"#ifdef MR_PROFILE_DEEP\n"
+	"       MR_prof_init_globals(&mercury_data__layout__%s);\n"
+	"#endif\n"
+	"\n"
 	"	mercury_runtime_init(argc, argv);\n"
 	"	return;\n"
 	"}\n"
@@ -550,8 +558,8 @@
 		aditi_load_func = "NULL";
 	}
 	
-	printf(mercury_funcs, need_tracing, entry_point,
-		aditi_load_func, entry_point);
+	printf(mercury_funcs, need_tracing, entry_point, entry_point, 
+		aditi_load_func, entry_point, entry_point);
 
 	if (output_main_func) {
 		fputs(main_func, stdout);
===================================================================
New file: compiler/profiling.m
===================================================================
%-----------------------------------------------------------------------------%
% Copyright (C) 2000 The University of Melbourne.
% This file may only be copied under the terms of the GNU General
% Public License - see the file COPYING in the Mercury distribution.
%-----------------------------------------------------------------------------%

% file: profiling.m
% main author: conway

%------------------------------------------------------------------------------%
% This module contains predicates for generating code instrumented for
% `deep' profiling. As well as the information here, more information can
% be found in runtime/mercury_prof_deep.{c,h}.
%
% Deep profiling is a form of profiling which does not make the assumption 
% that all calls to a given predicate have equal cost - an assumption made
% by profilers like prof, gprof, and mprof.  Instead, it records the call
% tree (collapsed into strongly connected components (SCCs)), and
% distinguishes each callsite on an SCC-wise basis.
%
% There are two main kinds of structure associated with the instrumented code:
%	- SCCIds which are structures which describe a static SCC by
%	  enumerating the call sites in the procedures that make up the
%	  SCC. The following information is stored about each call site:
%		- the caller's MR_Stack_Layout_Entry structure
%		- the callee's MR_Stack_Layout_Entry structure
%		- the line number on which the call occurs
%		- whether the call is a first-order, higher-order or
%		  class-method call.
%	- SCCInstances which correspond to a runtime instance of a static
%	  SCC. They contain the following information:
%		- which SCCId it is an instance of
%		- a profile structure for each first order call site at
%		  which a call occured, and a pointer to an SCCInstance for
%		  the callee.
%		- for each higher order call, a list of (closure, profile)
%		  tuples which record a profile for each different
%		  procedure that is called from this site.
%		- for each class method call, a list of (closure, profile)
%		  tuples which record a profile for each different
%		  procedure that is called from this site.
%		- a list of (closure, profile) structures for each
%		  different call back into mercury from C that occured from
%		  inside this SCC.
%
% We don't store an SCCInstance for leaf procedures, since the profile
% structure is stored in the caller's SCCInstance and therefore there
% can be no call-sites in it.
%
%------------------------------------------------------------------------------%
:- module profiling.

:- interface.

:- import_module code_info, hlds_module, hlds_pred, llds.
:- import_module list, std_util, term.

		% Compute the call graph, number the SCCs and create
		% the mapping from pred_proc_id to SCC-number.
:- pred profiling__compute_scc_info(module_info, module_info).
:- mode profiling__compute_scc_info(in, out) is det.

		% Allocate a pair of stack-slots for storing the
		% MR_prof_current_proc and MR_prof_current_scc variables.
:- pred profiling__setup(code_info, code_info).
:- mode profiling__setup(in, out) is det.

		% Generate the code fragment that goes in the prologue
		% which saves MR_prof_current_{proc,scc} on to the stack
		% and increments the number of calls.
:- pred profiling__prologue(code_tree, code_info, code_info).
:- mode profiling__prologue(out, in, out) is det.

		% Generate the code fragment the for the epilogue for
		% when a procedure succeeds.
:- pred profiling__success_epilogue(code_tree, code_info, code_info).
:- mode profiling__success_epilogue(out, in, out) is det.

		% Generate the code fragment the for the epilogue for
		% when a procedure fails.
:- pred profiling__failure_epilogue(code_tree, code_info, code_info).
:- mode profiling__failure_epilogue(out, in, out) is det.

		% Generate the code framgent to update the deep profiling
		% pointers before a first order call.
:- pred profiling__pre_call_update(pred_proc_id, term__context, code_tree,
		code_info, code_info).
:- mode profiling__pre_call_update(in, in, out, in, out) is det.

		% Generate the code framgent to update the deep profiling
		% pointers before a higher order call.
:- pred profiling__pre_ho_call_update(rval, term__context,
		code_tree, code_info, code_info).
:- mode profiling__pre_ho_call_update(in, in, out, in, out) is det.

		% Generate the code framgent to update the deep profiling
		% pointers after a call.
:- pred profiling__post_call_update(code_tree, code_info, code_info).
:- mode profiling__post_call_update(out, in, out) is det.

		% Generate the code framgent to update the deep profiling
		% pointers when a failure occurs.
:- pred profiling__post_failure_code(code_tree, code_info, code_info).
:- mode profiling__post_failure_code(out, in, out) is det.

		% Generate the MR_SCCId structures for a module.
:- pred profiling__generate_scc_ids(module_info, module_info,
		list(comp_gen_c_data), list(comp_gen_c_var)).
:- mode profiling__generate_scc_ids(in, out, out, out) is det.

		% Find the MR_SCCId for a given procedure.
:- pred profiling__scc_id(pred_proc_id, maybe(rval), code_info, code_info).
:- mode profiling__scc_id(in, out, in, out) is det.

%------------------------------------------------------------------------------%

:- implementation.

:- import_module code_util, dependency_graph, goal_util, llds_out, tree.
:- import_module continuation_info.
:- import_module goal_util, hlds_data, hlds_goal, prog_data, globals, options.
:- import_module bool, int, map, std_util, string, term.

profiling__compute_scc_info(ModuleInfo0, ModuleInfo) :-
	module_info_globals(ModuleInfo0, Globals),
	globals__lookup_bool_option(Globals, profile_deep, ProfileDetail),
	( ProfileDetail = yes ->
		module_info_clobber_dependency_info(ModuleInfo0, ModuleInfo0a),
		module_info_ensure_dependency_info(ModuleInfo0a, ModuleInfo1),
		module_info_dependency_info(ModuleInfo1, DepInfo),
		hlds_dependency_info_get_dependency_ordering(DepInfo, Ordering),
		map__init(SCCMembers0),
		map__init(SCCData0),
		compute_scc_members(Ordering, 0, ModuleInfo1, SCCMembers0,
			SCCMembers, SCCData0, SCCData),
		SCCInfo = scc_info(SCCMembers, SCCData),
		module_info_set_scc_info(ModuleInfo1, SCCInfo, ModuleInfo)
	;
		ModuleInfo = ModuleInfo0
	).

%------------------------------------------------------------------------------%

:- pred compute_scc_members(list(list(pred_proc_id)), int, module_info,
		map(pred_proc_id, scc_id), map(pred_proc_id, scc_id),
		map(scc_id, scc_data), map(scc_id, scc_data)).
:- mode compute_scc_members(in, in, in, in, out, in, out) is det.

compute_scc_members([], _N, _ModuleInfo, Members, Members, Data, Data).
compute_scc_members([SCC|SCCs], N0, ModuleInfo, Members0, Members,
		Data0, Data) :-
	module_info_name(ModuleInfo, Name),
	compute_scc_members1(SCC, scc_id(Name, N0), ModuleInfo, Members0,
		Members1, Data0, Data1),
	N1 is N0 + 1,
	compute_scc_members(SCCs, N1, ModuleInfo, Members1, Members,
		Data1, Data).

:- pred compute_scc_members1(list(pred_proc_id), scc_id, module_info,
		map(pred_proc_id, scc_id), map(pred_proc_id, scc_id),
		map(scc_id, scc_data), map(scc_id, scc_data)).
:- mode compute_scc_members1(in, in, in, in, out, in, out) is det.

compute_scc_members1([], _SCCId, _ModuleInfo, Members, Members, Data, Data).
compute_scc_members1([PPId|PPIds], SCCId, ModuleInfo, Members0, Members,
		Data0, Data) :-
	(
		% Figure out if this PPId belongs to a procedure in this
		% module, and it is not a leaf procedure then we need to
		% allocate a SCC data-structure for it.
		proc_needs_scc(PPId, ModuleInfo, MayCallMercury)
	->
		map__det_insert(Members0, PPId, SCCId, Members1),
		( MayCallMercury = may_call_mercury ->
			map__set(Data0, SCCId, scc_data(0, [], 0, [], 0, [],
				MayCallMercury), Data1)
		;
			Data1 = Data0
		)
	;
		Members1 = Members0,
		Data1 = Data0
	),
	compute_scc_members1(PPIds, SCCId, ModuleInfo, Members1, Members,
		Data1, Data).

:- pred proc_needs_scc(pred_proc_id, module_info, may_call_mercury).
:- mode proc_needs_scc(in, in, out) is semidet.

proc_needs_scc(PPId, ModuleInfo, MayCallMercury) :-
	proc_has_local_code(PPId, ModuleInfo),
	PPId = proc(PredId, ProcId),
	module_info_pred_proc_info(ModuleInfo, PredId, ProcId, _Pred, Proc),
	proc_info_goal(Proc, Goal),
	proc_needs_scc_2(Goal, yes, IsALeaf,
		will_not_call_mercury, MayCallMercury),
	IsALeaf = no.

:- pred proc_needs_scc_2(hlds_goal, bool, bool,
		may_call_mercury, may_call_mercury).
:- mode proc_needs_scc_2(in, in, out, in, out) is det.

proc_needs_scc_2(Goal - _GoalInfo, IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury) :-
	proc_needs_scc_3(Goal, IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury).

:- pred proc_needs_scc_3(hlds_goal_expr, bool, bool,
		may_call_mercury, may_call_mercury).
:- mode proc_needs_scc_3(in, in, out, in, out) is det.

proc_needs_scc_3(conj(Goals), IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury) :-
	foldl2(proc_needs_scc_2, Goals, IsALeaf0, IsALeaf, MayCallMercury0,
		MayCallMercury).

proc_needs_scc_3(par_conj(Goals, _), IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury) :-
	foldl2(proc_needs_scc_2, Goals, IsALeaf0, IsALeaf, MayCallMercury0,
		MayCallMercury).

proc_needs_scc_3(disj(Goals, _), IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury) :-
	foldl2(proc_needs_scc_2, Goals, IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury).

proc_needs_scc_3(switch(_, _, Cases, _), IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury) :-
	foldl2((pred(case(_, Goal)::in, L0::in, L::out, M0::in, M::out)
			is det :-
		proc_needs_scc_2(Goal, L0, L, M0, M)
	), Cases, IsALeaf0, IsALeaf, MayCallMercury0, MayCallMercury).

proc_needs_scc_3(call(_, _, _, Builtin, _, _), IsALeaf0, IsALeaf,
		MayCallMercury, MayCallMercury) :-
	( Builtin \= inline_builtin ->
		IsALeaf = no
	;
		IsALeaf = IsALeaf0
	).

proc_needs_scc_3(generic_call(_, _, _, _), _, no,
		MayCallMercury, MayCallMercury).

proc_needs_scc_3(unify(_, _, _, _, _), IsALeaf, IsALeaf,
		MayCallMercury, MayCallMercury).

proc_needs_scc_3(not(Goal), IsALeaf0, IsALeaf, MayCallMercury0, MayCallMercury) :-
	proc_needs_scc_2(Goal, IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury).

proc_needs_scc_3(some(_, _, Goal), IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury) :-
	proc_needs_scc_2(Goal, IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury).

proc_needs_scc_3(bi_implication(GoalA, GoalB), IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury) :-
	proc_needs_scc_2(GoalA, IsALeaf0, IsALeaf1,
		MayCallMercury0, MayCallMercury1),
	proc_needs_scc_2(GoalB, IsALeaf1, IsALeaf,
		MayCallMercury1, MayCallMercury).

proc_needs_scc_3(if_then_else(_, C, T, E, _), IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury) :-
	foldl2(proc_needs_scc_2, [C, T, E], IsALeaf0, IsALeaf,
		MayCallMercury0, MayCallMercury).

proc_needs_scc_3(pragma_c_code(Attrs, _, _, _, _, _, _), IsALeaf, IsALeaf,
		MayCallMercury0, MayCallMercury) :-
	( may_call_mercury(Attrs, may_call_mercury) ->
		MayCallMercury = may_call_mercury
	;
		MayCallMercury = MayCallMercury0
	).

:- pred proc_has_local_code(pred_proc_id, module_info).
:- mode proc_has_local_code(in, in) is semidet.

proc_has_local_code(PPId, ModuleInfo) :-
	PPId = proc(PredId, ProcId),
	module_info_pred_proc_info(ModuleInfo, PredId, ProcId, Pred, _Proc),
	pred_info_import_status(Pred, ImportStatus),
	(
		status_defined_in_this_module(ImportStatus, yes)
	;
		% if the ImportStatus is pseudo_imported then we do need
		% an SCC entry because we generate code for this procedure
		ImportStatus = pseudo_imported
	).

%------------------------------------------------------------------------------%

profiling__setup -->
	code_info__acquire_temp_slot(profiling_data, SCCSlot),
	code_info__acquire_temp_slot(profiling_data, ProcSlot),
	{ PI = profiling_info(SCCSlot, ProcSlot) },
	code_info__set_profiling_info(PI).

	%
	% In the prologue, we save the current SCCInstance onto the stack,
	% and also a pointer to the current CallProfile structure.
	% Also, in the prologue we increment the number of calls to
	% this procedure.
	%
profiling__prologue(Code) -->
	code_info__get_profiling_info(PI),
	{ PI = profiling_info(SCCSlot, ProcSlot) },
	/*
		Debugging code only
	code_info__get_pred_id(CallerPredId),
	code_info__get_proc_id(CallerProcId),
	{ CallerPPId = proc(CallerPredId, CallerProcId) },
	code_info__get_module_info(ModuleInfo),
	{ module_info_get_scc_info(ModuleInfo, SCCInfo) },
	{ SCCInfo = scc_info(ProcSCCs, _SCCCallSites) },
	(
		{ map__search(ProcSCCs, CallerPPId, CallerSCC) }
	->
		{ CallerSCC = scc_id(ModuleName, SCCNum) },
		{ SCCIdConst = const(data_addr_const(data_addr(ModuleName,
			scc_id(SCCNum)))) }
	;
		{ SCCIdConst = const(int_const(0)) }
	),
	*/
	{ Code = node([
		assign(SCCSlot, lval(global(data_ptr, "MR_prof_current_scc")))
			- "",
		assign(ProcSlot, lval(global(data_ptr, "MR_prof_current_proc")))
			- "",
		c_code("MR_prof_current_proc->calls++;\n") - ""
	]) }.

	%
	% We generate two epilogues: one for success and one for failure.
	% Each updates the appropriate counter in the current_proc structure.
	%
profiling__success_epilogue(SuccCode) -->
	{ SuccCode = node([c_code("MR_prof_current_proc->successes++;\n") -
		"update the profiling information"]) }.

profiling__failure_epilogue(FailCode) -->
	{ FailCode = node([c_code("MR_prof_current_proc->failures++;\n") - 
		"update the profiling information"]) }.

%------------------------------------------------------------------------------%

	%
	% Before a (first order) call there are three situations
	%	- the call is an intra-module, intra-scc call, in which case
	%	  we assign the address of the appropriate CallProfile
	%	  structure to the global current_call_profile variable,
	%	  allocating the structure if it didn't already exist.
	%	- the call is an intra-module, inter-scc call, in which case
	%	  we do the same as above, and assign to the global
	%	  current_scc variable the address of the child SCCInstance
	%	  structure, allocating it if necessary. (We distinguish
	%	  this case from the next - for inter-module calls because
	%	  for intra-module inter-scc calls we know at compile time
	%	  the SCCId for the called procedure, so we can do a
	%	  direct assignment rather than having to refer to it
	%	  via it's proc_layout structure.
	%	- the call is an inter-module, inter-scc call (all inter-
	%	  module calls are assumed to be inter-scc calls), in which
	%	  case we do the same as above, except that the SCCId must
	%	  be obtained indirectly through the proc_layout structures.
	%
profiling__pre_call_update(CalleePPId, Context, Code) -->
	profiling__my_ppid(CallerPPId),
	{ ThisSite = first_order_call(CallerPPId, CalleePPId, Context) },
	profiling__add_callsite(fo(ThisSite), _UpdateFunc, CallSiteNum),
	code_info__get_profiling_info(PI),
	{ PI = profiling_info(SCCSlot, _ProcSlot) },
	{ CProc = global(data_ptr, "MR_prof_current_proc") },
	{ CSCC = global(data_ptr, "MR_prof_current_scc") },
	code_info__get_module_info(ModuleInfo),
	code_info__get_scc_info(SCCInfo0),
	{ SCCInfo0 = scc_info(ProcSCCs, _SCCCallSites) },
	{ map__lookup(ProcSCCs, CallerPPId, CallerSCC) },
	(
			% Check to see if the callee is in the
			% same SCC as the caller.
		{ map__search(ProcSCCs, CalleePPId, CalleeSCC0) },
		{ CalleeSCC0 = CallerSCC }
	->
		{ Code = node([
			assign(CProc, c_func(data_ptr, "MR_intra_scc_call",
				[word - lval(SCCSlot),
				word - const(int_const(CallSiteNum))],
				dynamic)) - "update the current CallProf"
		]) }
	;
			% Check to see if we know which SCC the callee
			% is in. Leaf procedures are not allocated an
			% SCC number, because we don't need a whole SCC
			% structure for them.
		{ map__search(ProcSCCs, CalleePPId, CalleeSCC1) }
		% { CalleeSCC1 \= CallerSCC }
	->
		{ CalleeSCC1 = scc_id(ModuleName, SCCNum) },
		{ ChildSCCId = const(data_addr_const(data_addr(ModuleName,
			scc_id(SCCNum)))) },
		{ Code = node([
			assign(CProc, c_func(data_ptr, "MR_local_inter_scc",
				[word - lval(SCCSlot),
				word - const(int_const(CallSiteNum)),
				data_ptr - ChildSCCId], dynamic)) -
					"update the current CallProf",
			assign(CSCC, c_func(data_ptr,
				"MR_scc_from_current_proc", [], dynamic)) -
					"update the current SCCInstance"
		]) }
	;
			% The callee is a leaf procedure, so we don't want it's
			% SCCId. We pass NULL instead, which the runtime
			% recognises appropriately.
		{ pred_has_local_code(CalleePPId, ModuleInfo) }
	->
		{ Code = node([
			assign(CProc, c_func(data_ptr, "MR_local_inter_scc",
				[word - lval(SCCSlot),
				word - const(int_const(CallSiteNum)),
				data_ptr - const(int_const(0))], dynamic)) -
					"update the current CallProf",
			assign(CSCC, c_func(data_ptr,
				"MR_scc_from_current_proc", [], dynamic)) -
					"update the current SCCInstance"
		]) }
	;
			% The callee is in another module (or compilation
			% unit), so we need to pass the proc_id layout
			% structure to the runtime so it can create the new
			% SCC instance.
		{ CalleePPId = proc(Pred, Proc) },
		{ code_util__make_proc_label(ModuleInfo, Pred, Proc, ProcLab) },
		{ ProcEntry = proc_layout(local(ProcLab)) },
		{ pred_module(CalleePPId, ModuleInfo, ModuleName) },
		{ ChildProcLayout = const(data_addr_const(data_addr(ModuleName,
			ProcEntry))) },
		{ Code = node([
			assign(CProc, c_func(data_ptr, "MR_nonlocal_inter_scc",
				[word - lval(SCCSlot),
				word - const(int_const(CallSiteNum)),
				data_ptr - ChildProcLayout], dynamic)) -
					"update the current CallProf",
			assign(CSCC, c_func(data_ptr,
				"MR_scc_from_current_proc", [], dynamic)) -
					"update the current SCCInstance"
		]) }
	).

profiling__pre_ho_call_update(Closure, Context, Code) -->
	profiling__my_ppid(CallerPPId),
	{ ThisSite = higher_order_call(CallerPPId, Context) },
	profiling__add_callsite(ho(ThisSite), UpdateFunc, CallSiteNum),
	code_info__get_profiling_info(PI),
	{ PI = profiling_info(SCCSlot, _ProcSlot) },
	{ CProc = global(data_ptr, "MR_prof_current_proc") },
	{ CSCC = global(data_ptr, "MR_prof_current_scc") },
	{ Code = node([
		assign(CProc, c_func(data_ptr, UpdateFunc,
			[word - lval(SCCSlot),
			word - const(int_const(CallSiteNum)),
			word - Closure], dynamic)) -
				"update the current CallProf",
		assign(CSCC, c_func(data_ptr, "MR_scc_from_current_proc",
			[], dynamic)) -
				"update the current SCCInstance"
	]) }.

profiling__post_call_update(Code) -->
	code_info__get_profiling_info(PI),
	{ PI = profiling_info(SCCSlot, ProcSlot) },
	{ Code = node([
		assign(global(data_ptr, "MR_prof_current_proc"), lval(ProcSlot))
			- "",
		assign(global(data_ptr, "MR_prof_current_scc"), lval(SCCSlot))
			- ""
	]) }.

profiling__post_failure_code(Code) -->
	profiling__post_call_update(Code).

%------------------------------------------------------------------------------%

:- pred profiling__my_ppid(pred_proc_id, code_info, code_info).
:- mode profiling__my_ppid(out, in, out) is det.

profiling__my_ppid(PPId) -->
	code_info__get_pred_id(CallerPredId),
	code_info__get_proc_id(CallerProcId),
	{ PPId = proc(CallerPredId, CallerProcId) }.

%------------------------------------------------------------------------------%

:- type callsite
	--->	fo(first_order_call)
	;	ho(higher_order_call)
	;	cm(class_method_call)
	.

:- pred profiling__add_callsite(callsite, string, int, code_info, code_info).
:- mode profiling__add_callsite(in, out, out, in, out) is det.

profiling__add_callsite(CallSite, UpdateFunc, SlotNum) -->
	profiling__my_ppid(CallerPPId),
	code_info__get_scc_info(SCCInfo0),
	{ SCCInfo0 = scc_info(ProcSCCs, SCCCallSites0) },
	{ map__lookup(ProcSCCs, CallerPPId, CallerSCC) },
	( { map__search(SCCCallSites0, CallerSCC, SCCData0a) } ->
		{ SCCData0 = SCCData0a }
	;
		{ SCCData0 = scc_data(0, [], 0, [], 0, [],
			will_not_call_mercury) }
	),
	{ SCCData0 = scc_data(A0, As0, B0, Bs0, C0, Cs0, Rec) },
	(
		{ CallSite = fo(ThisSite) },
		{ A is A0 + 1 },
		{ list__append(As0, [ThisSite], As) },
		{ UpdateFunc = "MR_fo_call" },
		{ B = B0, Bs = Bs0 },
		{ C = C0, Cs = Cs0 },
		{ SlotNum = A0 }
	;
		{ CallSite = ho(ThisSite) },
		{ B is B0 + 1 },
		{ list__append(Bs0, [ThisSite], Bs) },
		{ UpdateFunc = "MR_ho_call" },
		{ A = A0, As = As0 },
		{ C = C0, Cs = Cs0 },
		{ SlotNum = B0 }
	;
		{ CallSite = cm(ThisSite) },
		{ C is C0 + 1 },
		{ list__append(Cs0, [ThisSite], Cs) },
		{ UpdateFunc = "MR_cm_call" },
		{ A = A0, As = As0 },
		{ B = B0, Bs = Bs0 },
		{ SlotNum = C0 }
	),
	{ SCCData = scc_data(A, As, B, Bs, C, Cs, Rec) },
	{ map__set(SCCCallSites0, CallerSCC, SCCData, SCCCallSites) },
	{ SCCInfo = scc_info(ProcSCCs, SCCCallSites) },
	code_info__set_scc_info(SCCInfo).

%------------------------------------------------------------------------------%
%------------------------------------------------------------------------------%

:- type scc_id_data
	--->	scc_id_data(int, list(comp_gen_c_data), list(comp_gen_c_var)).

profiling__generate_scc_ids(ModuleInfo0, ModuleInfo, Data, Vars) :-
	module_info_get_scc_info(ModuleInfo0, SCCInfo),
	module_info_get_cell_count(ModuleInfo0, InitialCount),
	SCCInfo = scc_info(_ProcSCCs, SCCCallSites),
	InitialSCCIdData = scc_id_data(InitialCount, [], []),
	map__foldl(lambda([SCCId::in, SCCData::in,
			SCCIdData0::in, SCCIdData7::out] is det, (
		SCCId = scc_id(ModuleName, SCCNum),
		SCCData = scc_data(NFO, FOCalls, NHO, HOCalls, NCM, CMCalls,
				_Rec),
		(
			FOCalls = [],
			FOVector = yes(const(int_const(0))),
			SCCIdData2 = SCCIdData0
		;
			FOCalls = [_|_],
			list__map_foldl(first_order_scc_ids(ModuleInfo0),
				FOCalls, FOCallRvals, SCCIdData0, SCCIdData1),
			profiling__scc_id_data_next_cell(LabNum1, SCCIdData1,
				SCCIdData2),
			FOVector = yes(create(0, FOCallRvals, uniform(no),
				must_be_static,
				LabNum1, "first order call data", no))
		),
		(
			HOCalls = [],
			HOVector = yes(const(int_const(0))),
			SCCIdData2 = SCCIdData4
		;
			HOCalls = [_|_],
			list__map_foldl(higher_order_scc_ids(ModuleInfo0),
				HOCalls, HOCallRvals, SCCIdData2, SCCIdData3),
			profiling__scc_id_data_next_cell(LabNum2, SCCIdData3,
				SCCIdData4),
			HOVector = yes(create(0, HOCallRvals, uniform(no),
				must_be_static,
				LabNum2, "higher order call data", no))
		),
		(
			CMCalls = [],
			CMVector = yes(const(int_const(0))),
			SCCIdData6 = SCCIdData4
		;
			CMCalls = [_|_],
			list__map_foldl(class_method_scc_ids(ModuleInfo0),
				CMCalls, CMCallRvals, SCCIdData4, SCCIdData5),
			profiling__scc_id_data_next_cell(LabNum3, SCCIdData5,
				SCCIdData6),
			CMVector = yes(create(0, CMCallRvals, uniform(no),
				must_be_static,
				LabNum3, "class method data", no))
		),
		MRVals = [
			yes(const(int_const(NFO))),
			FOVector,
			yes(const(int_const(NHO))),
			HOVector,
			yes(const(int_const(NCM))),
			CMVector
		],
		CData = comp_gen_c_data(ModuleName, scc_id(SCCNum), yes,
				MRVals, uniform(no), []),
		SCCIdData6 = scc_id_data(Count, CDataList0, CVarsList0),
		GlobData = scc_id_counter(ModuleName, SCCNum),
		CDataList = [CData|CDataList0],
		CVarsList = [GlobData|CVarsList0],
		SCCIdData7 = scc_id_data(Count, CDataList, CVarsList)
	)), SCCCallSites, InitialSCCIdData, FinalSCCIdData),
	FinalSCCIdData = scc_id_data(FinalCount, Data, Vars),
	module_info_set_cell_count(ModuleInfo0, FinalCount, ModuleInfo).

%------------------------------------------------------------------------------%

:- pred first_order_scc_ids(module_info, first_order_call, maybe(rval),
		scc_id_data, scc_id_data).
:- mode first_order_scc_ids(in, in, out, in, out) is det.

first_order_scc_ids(ModuleInfo, FOCall, MRval) -->
	{ FOCall = first_order_call(CallerPPId, CalleePPId, Context) },
	{ code_util__make_proc_layout_ref(CallerPPId, ModuleInfo,
		CallerLayout) },
	{ code_util__make_proc_layout_ref(CalleePPId, ModuleInfo,
		CalleeLayout) },
	profiling__scc_id_data_next_cell(CellNum),
	{ term__context_file(Context, File) },
	{ term__context_line(Context, Line) },
	{ MRval = yes(create(0, [
		yes(CallerLayout),
		yes(CalleeLayout),
		yes(const(string_const(File))),
		yes(const(int_const(Line)))
	], uniform(no), must_be_static, CellNum, "fo_call_site", no)) }.

%------------------------------------------------------------------------------%

:- pred higher_order_scc_ids(module_info, higher_order_call, maybe(rval),
		scc_id_data, scc_id_data).
:- mode higher_order_scc_ids(in, in, out, in, out) is det.

higher_order_scc_ids(ModuleInfo, HOCall, MRval) -->
	{ HOCall = higher_order_call(CallerPPId, Context) },
	{ code_util__make_proc_layout_ref(CallerPPId, ModuleInfo,
		CallerLayout) },
	profiling__scc_id_data_next_cell(CellNum),
	{ term__context_file(Context, File) },
	{ term__context_line(Context, Line) },
	{ MRval = yes(create(0, [
		yes(CallerLayout),
		yes(const(int_const(0))),
		yes(const(string_const(File))),
		yes(const(int_const(Line)))
	], uniform(no), must_be_static, CellNum, "ho_call_site", no)) }.

%------------------------------------------------------------------------------%

:- pred class_method_scc_ids(module_info, class_method_call, maybe(rval),
		scc_id_data, scc_id_data).
:- mode class_method_scc_ids(in, in, out, in, out) is det.

class_method_scc_ids(ModuleInfo, CMCall, MRval) -->
	{ CMCall = class_method_call(CallerPPId, CalleePPId, Context) },
	{ code_util__make_proc_layout_ref(CallerPPId, ModuleInfo,
		CallerLayout) },
	{ code_util__make_proc_layout_ref(CalleePPId, ModuleInfo,
		CalleeLayout) },
	profiling__scc_id_data_next_cell(CellNum),
	{ term__context_file(Context, File) },
	{ term__context_line(Context, Line) },
	{ MRval = yes(create(0, [
		yes(CallerLayout),
		yes(CalleeLayout),
		yes(const(string_const(File))),
		yes(const(int_const(Line)))
	], uniform(no), must_be_static, CellNum, "cm_call_site", no)) }.

%------------------------------------------------------------------------------%

:- pred profiling__scc_id_data_next_cell(int, scc_id_data, scc_id_data).
:- mode profiling__scc_id_data_next_cell(out, in, out) is det.

profiling__scc_id_data_next_cell(Cell, Data0, Data) :-
	Data0 = scc_id_data(Cell, Structs, Vars),
	Cell1 is Cell + 1,
	Data = scc_id_data(Cell1, Structs, Vars).

%------------------------------------------------------------------------------%

profiling__scc_id(PredProcId, MSCCIdRval) -->
	code_info__get_globals(Globals),
	{ globals__lookup_bool_option(Globals, profile_deep, DeepProfiling) },
	(
		{ DeepProfiling = no },
		{ MSCCIdRval = no }
	;
		{ DeepProfiling = yes },
		code_info__get_scc_info(scc_info(SCCIds, _)),
		code_info__get_module_info(ModuleInfo),
		( { map__search(SCCIds, PredProcId, SCCId) } ->
			{ SCCId = scc_id(ModuleName, SCCNum) },
			{ SCCIdRval = const(data_addr_const(
				data_addr(ModuleName, scc_id(SCCNum)))) },
			{ MSCCIdRval = yes(SCCIdRval) }
		; { not proc_has_local_code(PredProcId, ModuleInfo) } ->
				% If the procedure is defined in another
				% module, then we create a tagged pointer
				% to the MR_Stack_Layout_Entry structure
				% (which must exist in this grade).
				%
				% This is necessary for the case in which
				% we take the address of a non-leaf
				% procedure that is defined in another
				% module. In that case, we don't know its
				% SCCId, and can't figure it out directly.
			{ PredProcId = proc(PredId, ProcId) },
			{ code_util__make_local_entry_label(ModuleInfo,
				PredId, ProcId, no, EntryLabel) },
			{ module_info_name(ModuleInfo, ModuleName) },
			{ SCCIdRval = mkword(1, const(data_addr_const(
				data_addr(ModuleName,
				proc_layout(EntryLabel))))) },
			{ MSCCIdRval = yes(SCCIdRval) }
		;
			{ MSCCIdRval = yes(const(int_const(0))) }
		)
	).

===================================================================
New file: runtime/mercury_prof_deep.c
===================================================================
/*
** Copyright (C) 2000 The University of Melbourne.
** This file may only be copied under the terms of the GNU Library General
** Public License - see the file COPYING.LIB in the Mercury distribution.
*/

/*
**      Detailed profiling module
**
**	Main Authors : conway
*/

#include        "mercury_imp.h"

#include	<stdio.h>
#include	<unistd.h>
#include	<errno.h>
#include	<string.h>

#include	"mercury_prof.h"
#include	"mercury_stack_layout.h"
#include	"mercury_wrapper.h"
#include        "mercury_std.h"

/*
** Global Variables
*/

#ifdef	MR_PROFILE_DEEP

/*
** Manufacture the root of the SCCId tree, except for the callee
** layout structure, which we don't know yet. It is filled in by
** MR_prof_init_globals(), which also creates the root SCCInstance.
*/

extern const MR_Stack_Layout_Entry mercury_data__layout__do_interpreter;


MR_CallSite MR_prof_main_callsite = {
	&mercury_data__layout__do_interpreter,
	(MR_Stack_Layout_Entry *) NULL,	/* don't know till runtime */
	"Mercury Engine",
	0
};

MR_MAKE_SCC_ID(MR_prof_root_scc_id, { &MR_prof_main_callsite }, { }, { });

MR_SCCInstance		*MR_prof_root_scc;

MR_ProcCallProfile	MR_prof_root_proc = { 0, 0, 0, 0 };

MR_ProcCallProfile	*MR_prof_current_proc = NULL;

MR_SCCInstance *	MR_prof_current_scc = NULL;

MR_Count		MR_prof_num_sigs=0;

#endif

/*
** Local function declarations
*/

#ifdef MR_PROFILE_DEEP
  MR_SCCInstance *MR_prof_allocate_scc_instance(const MR_SCCId *scc);
  static void MR_prof_dump_prof_tree(FILE *fp, MR_SCCInstance *scc);
  static void MR_prof_dump_fo_calls(FILE *fp, int n, int m, const MR_SCCId *id,
		MR_InterCallProfile **profs, const MR_CallSite **sites);
  static void MR_prof_dump_ho_calls(FILE *fp, int n, int m, const MR_SCCId *id,
		MR_HigherOrderCallProfile **profs, const MR_CallSite **sites);
  static void MR_prof_dump_cm_calls(FILE *fp, int n, int m, const MR_SCCId *id,
		MR_ClassMethodCallProfile **profs, const MR_CallSite **sites);
  static void MR_prof_dump_callbacks(FILE *fp, int m, const MR_SCCId *id,
		MR_CallBackCallProfile *cbacks);
  static void MR_prof_dump_call_data(FILE *fp, const char *prefix, int n,
		const MR_Stack_Layout_Entry *caller,
		const MR_Stack_Layout_Entry *callee, 
		MR_ProcCallProfile *prof, int child);
  static void MR_prof_dump_proc_call_profile(FILE *fp, MR_ProcCallProfile *p);
  static void MR_prof_dump_ppid(FILE *fp, const MR_Stack_Layout_Proc_Id *ppid);
  static void MR_prof_dump_SCCid(FILE *fp, const MR_SCCId *SCCid);
  static void MR_prof_dump_ppid_table(FILE *fp);
  static void MR_prof_dump_ppid_defn(FILE *fp, int ppid_num,
		const MR_Stack_Layout_Proc_Id *ppid);
  static void MR_prof_dump_SCCid_table(FILE *fp);
  static void MR_prof_output_call_site_list(FILE *fp, int n,
		const MR_CallSite **calls);
  static void MR_prof_dump_string(FILE *fp, const char *s);

  static const MR_SCCId *MR_prof_get_scc_id(const MR_Stack_Layout_Proc_Id *p);
#endif

/*----------------------------------------------------------------------------*/

#ifdef MR_PROFILE_DEEP

static const MR_SCCId *
MR_prof_get_scc_id(const MR_Stack_Layout_Proc_Id *p)
{
	const MR_SCCId *sccid;

	sccid = p->MR_proc_user.MR_user_scc_id;
	if (MR_tag((const Word) sccid))
	{
		return MR_ENTRY_LAYOUT_SCC_ID((const MR_Stack_Layout_Entry *)
					MR_strip_tag((Word) sccid));
	} else {
		return sccid;
	}
}

void
MR_prof_init_globals(MR_Stack_Layout_Entry *proclayout)
{
	const MR_SCCId *root_scc_id;

	program_entry_layout = proclayout;

	MR_prof_main_callsite.callee = proclayout;

	MR_prof_root_scc = MR_prof_allocate_scc_instance(&MR_prof_root_scc_id);

	MR_prof_current_proc = &MR_prof_root_proc;
	MR_prof_current_scc = MR_prof_root_scc;
}

MR_ProcCallProfile *
MR_prof_ensure_fo_call_slot(MR_SCCInstance *scc, int site_num)
{
	MR_InterCallProfile *tmp;

	assert((scc != NULL) && (scc->fo_calls != NULL));

	if (scc->fo_calls[site_num] != NULL)
		return &(scc->fo_calls[site_num]->prof);
	
	tmp = (MR_InterCallProfile *)
		MR_malloc(sizeof(MR_InterCallProfile));
	tmp->prof.calls = 0;
	tmp->prof.successes = 0;
	tmp->prof.failures = 0;
	tmp->prof.quanta = 0;
	tmp->child = NULL;

	scc->fo_calls[site_num] = tmp;

	return &(tmp->prof);
}

MR_ProcCallProfile *
MR_prof_ensure_fo_call_inter_slot(MR_SCCInstance *scc, int site_num,
		const MR_SCCId *child_scc_id)
{
	MR_InterCallProfile *tmp;

	assert((scc != NULL) && (scc->fo_calls != NULL));

	if (scc->fo_calls[site_num] != NULL)
		return &(scc->fo_calls[site_num]->prof);
	
	tmp = (MR_InterCallProfile *)
		MR_malloc(sizeof(MR_InterCallProfile));
	tmp->prof.calls = 0;
	tmp->prof.successes = 0;
	tmp->prof.failures = 0;
	tmp->prof.quanta = 0;
	tmp->child = MR_prof_allocate_scc_instance(child_scc_id);

	scc->fo_calls[site_num] = tmp;

	return &(tmp->prof);
}

MR_ProcCallProfile *
MR_prof_ensure_fo_call_inter_slot2(MR_SCCInstance *scc, int site_num,
	const MR_Stack_Layout_Entry *proc_layout)
{
	const MR_SCCId *child_scc_id;

	child_scc_id = MR_ENTRY_LAYOUT_SCC_ID(proc_layout);
	return MR_prof_ensure_fo_call_inter_slot(scc, site_num, child_scc_id);
}

MR_ProcCallProfile *
MR_prof_ensure_ho_call_inter_slot(MR_SCCInstance *scc, int site_num,
	MR_Closure *closure)
{
	return MR_prof_proc_const_call(&(scc->ho_calls[site_num]), closure);
}

MR_ProcCallProfile *
MR_prof_ensure_special_ho_call_inter_slot(MR_SCCInstance *scc, int site_num,
	Code *code_ptr, const MR_Stack_Layout_Entry *entry_layout)
{
	return MR_prof_special_proc_const_call(&(scc->ho_calls[site_num]), code_ptr,
			&(entry_layout->MR_sle_proc_id));
}

MR_ProcCallProfile *
MR_prof_ensure_c_calls_mercury(const MR_Stack_Layout_Entry *layout)
{
	MR_CallBackCallProfile *tmp;

	assert(MR_prof_current_scc);

	tmp = (MR_CallBackCallProfile *)
		MR_malloc(sizeof(MR_CallBackCallProfile));
	tmp->prof.calls = 0;
	tmp->prof.successes = 0;
	tmp->prof.failures = 0;
	tmp->prof.quanta = 0;
	tmp->child = MR_prof_allocate_scc_instance(
			MR_ENTRY_LAYOUT_SCC_ID(layout));
	tmp->next = MR_prof_current_scc->callbacks;
	tmp->entry  = (MR_Stack_Layout_Entry *) &(layout->MR_sle_code_addr);
	tmp->proc_id = (MR_Stack_Layout_Proc_Id *) &(layout->MR_sle_proc_id);
	MR_prof_current_scc->callbacks = tmp;

	return &(tmp->prof);
}

MR_ProcCallProfile *
MR_prof_proc_const_call(MR_MultiCallProfile **call_list, MR_Closure *closure)
{
	MR_Stack_Layout_Proc_Id *proc_id;

	assert((call_list != NULL) && (closure != NULL));

	proc_id = closure->MR_closure_layout->proc_id;
	return MR_prof_special_proc_const_call(call_list, closure->MR_closure_code, proc_id);
}

MR_ProcCallProfile *
MR_prof_special_proc_const_call(MR_MultiCallProfile **call_list, Code *code_ptr, const MR_Stack_Layout_Proc_Id *proc_id)
{
	MR_MultiCallProfile *tmp;
	const MR_SCCId *child_scc_id;

	child_scc_id = MR_prof_get_scc_id(proc_id);

	/*
	** Search the list of HigherOrderCallProfiles for the proc we've been given.
	** This should probably use a move-to-front list to exploit locality.
	*/
	for(tmp = *call_list; tmp; tmp = tmp->next)
	{
		if (tmp->entry == code_ptr)
			return &(tmp->prof);
	}

	/*
	** If we didn't find the proc in the list, then allocate a new
	** HigherOrderCallProfile and stick it on the front of the list.
	*/

	tmp = (MR_HigherOrderCallProfile *)
		MR_malloc(sizeof(MR_HigherOrderCallProfile));
	tmp->prof.calls = 0;
	tmp->prof.successes = 0;
	tmp->prof.failures = 0;
	tmp->prof.quanta = 0;
	tmp->child = MR_prof_allocate_scc_instance(child_scc_id);
	tmp->entry = code_ptr;
	tmp->proc_id = proc_id;
	tmp->next = *call_list;

	*call_list = tmp;

	return &(tmp->prof);
}

MR_SCCInstance *
MR_prof_allocate_scc_instance(const MR_SCCId *child_scc_id)
{
	static int	scc_num=1;
	MR_SCCInstance *child;
	size_t nbytes;

	if (child_scc_id != NULL)
	{
		child = (MR_SCCInstance *)
			MR_malloc(sizeof(MR_SCCInstance));
		child->scc = child_scc_id;
		child->scc_num = scc_num++;
		if (child_scc_id->num_fo_calls > 0)
		{
			nbytes = sizeof(MR_InterCallProfile *)
					* child_scc_id->num_fo_calls;
			child->fo_calls = (MR_InterCallProfile **)
				MR_malloc(nbytes);
			memset(child->fo_calls, 0, nbytes);

		} else
			child->fo_calls = NULL;
		if (child_scc_id->num_ho_calls > 0)
		{
			nbytes = sizeof(MR_HigherOrderCallProfile *)
					* child_scc_id->num_ho_calls;
			child->ho_calls = (MR_HigherOrderCallProfile **)
				MR_malloc(nbytes);
			memset(child->ho_calls, 0, nbytes);
		} else
			child->ho_calls = NULL;
		if (child_scc_id->num_cm_calls > 0)
		{
			nbytes = sizeof(MR_ClassMethodCallProfile *)
					* child_scc_id->num_cm_calls;
			child->cm_calls = (MR_ClassMethodCallProfile **)
				MR_malloc(nbytes);
			memset(child->cm_calls, 0, nbytes);
		} else
			child->cm_calls = NULL;
		child->callbacks = NULL;
	} else {
		child = NULL;
	}

	return child;
}

#endif /* MR_PROFILE_CALLS */

/* ======================================================================== */

#ifdef MR_PROFILE_DEEP

/*
**	prof_time_profile:
**		Signal handler to be called whenever a profiling signal is
**		received. Saves the current code address into a hash table.
**		If the address already exists, it increments its count.
**	XXX
*/

void
prof_deep_time_profile(int signum);

void
prof_deep_time_profile(int signum)
{
	MR_prof_num_sigs++;
	MR_prof_current_proc->quanta++;

	return;
} /* end prof_time_profile() */

#endif /* MR_PROFILE_DEEP */

/* ======================================================================== */

#ifdef MR_PROFILE_DEEP

static MR_SCCInstance *instance_stack[1000];
static int isp=0;

#define MR_PPID_TABLE_SIZE	5021

typedef struct MR_PPID_NODE	{
	const	MR_Stack_Layout_Proc_Id	*ppid;
	int				ppid_num;
	struct	MR_PPID_NODE		*next;
} MR_PPIdNode;

static MR_PPIdNode	*MR_prof_ppid_table[MR_PPID_TABLE_SIZE]; 
static MR_prof_ppid_counter = 1;

typedef struct MR_SCCID_NODE	{
	const	MR_SCCId		*SCCid;
	int				SCCid_num;
	struct	MR_SCCID_NODE		*next;
} MR_SCCIdNode;

static MR_SCCIdNode	*MR_prof_SCCid_table[MR_PPID_TABLE_SIZE]; 
static MR_prof_SCCid_counter = 1;

static void
MR_prof_dump_prof_tree(FILE *fp, MR_SCCInstance *root)
{
	int kind;
	const MR_SCCId	*id;
	MR_SCCInstance *scc;
	
	isp = 0;
	instance_stack[isp++] = root;

	while (isp > 0)
	{
		scc = instance_stack[--isp];
		id = scc->scc;
		if (id->num_cm_calls != 0)
			kind = 3;
		else if (id->num_ho_calls != 0)
			kind = 2;
		else
			kind = 1;
		MR_prof_write_word(fp, kind);
		MR_prof_write_word(fp, scc->scc_num);
		MR_prof_dump_SCCid(fp, id);
		MR_prof_dump_fo_calls(fp, id->num_fo_calls, scc->scc_num,
			scc->scc, scc->fo_calls, id->fo_calls);
		if (kind > 1)
			MR_prof_dump_ho_calls(fp, id->num_ho_calls,
				scc->scc_num, scc->scc, scc->ho_calls,
				id->ho_calls);
		if (kind > 2)
			MR_prof_dump_cm_calls(fp, id->num_cm_calls,
				scc->scc_num, scc->scc, scc->cm_calls,
				id->cm_calls);
		MR_prof_dump_callbacks(fp, scc->scc_num, scc->scc,
			scc->callbacks);
	}
}

static void
MR_prof_dump_fo_calls(FILE *fp, int n, int scc_num, const MR_SCCId *SCCid,
			MR_InterCallProfile **profs, const MR_CallSite **sites)
{
	int i, child_num, count;

	for (count = 0, i = 0; i < n ; i++)
	{
		if (profs[i] != NULL)
			count++;
	}

	MR_prof_write_word(fp, count); /* end marker */

	for (i = 0; i < n ; i++)
	{
		if (profs[i] != NULL)
		{
			if (profs[i]->child != NULL)
			{
				child_num = profs[i]->child->scc_num;
				instance_stack[isp++] = profs[i]->child;
			} else
				child_num = 0;
			MR_prof_write_word(fp, i);
			MR_prof_write_word(fp, child_num);
			MR_prof_dump_proc_call_profile(fp, &(profs[i]->prof));
		}
	}
}

static void MR_prof_dump_proc_call_profile(FILE *fp, MR_ProcCallProfile *p)
{
	MR_prof_write_word(fp, p->calls);
	MR_prof_write_word(fp, p->successes);
	MR_prof_write_word(fp, p->failures);
	MR_prof_write_word(fp, p->quanta);
}

static void MR_prof_dump_string(FILE *fp, const char *s)
{
	int i,l;
	l = strlen(s);
	MR_prof_write_word(fp, l);
	for(i = 0; i < l ; i++)
		MR_prof_write_word(fp, s[i]);
}

static void MR_prof_dump_ho_calls(FILE *fp, int n, int scc_num,
		const MR_SCCId *SCCid, MR_HigherOrderCallProfile **profs,
		const MR_CallSite **sites)
{
	int i, child_num;
	MR_HigherOrderCallProfile *tmp;

	for (i = 0; i < n ; i++)
	{
		
		if (profs[i])
		{
			MR_prof_write_word(fp, i + 1);
					/* add 1 so we can use 0 as `end' */
			tmp = profs[i];
			while (tmp)
			{
				if (tmp->child != NULL)
				{
					child_num = tmp->child->scc_num;
					instance_stack[isp++] = tmp->child;
				} else
					child_num = 0;
				MR_prof_dump_ppid(fp, tmp->proc_id);
				MR_prof_write_word(fp, child_num);
				MR_prof_dump_proc_call_profile(fp,
						&(tmp->prof));
				tmp = tmp->next;
			}
			MR_prof_write_word(fp, 0); /* end of list */
		}
	}
	MR_prof_write_word(fp, 0); /* end of list */
}

static void MR_prof_dump_cm_calls(FILE *fp, int n, int scc_num,
		const MR_SCCId *scc, MR_ClassMethodCallProfile **profs,
		const MR_CallSite **sites)
{
	MR_prof_write_word(fp, 0); /* end marker */
}

static void MR_prof_dump_callbacks(FILE *fp, int scc_num, const MR_SCCId *SCCid,
		MR_CallBackCallProfile *cback)
{
	int child_num;

	while (cback != NULL)
	{
		/* MR_prof_dump_ppid(fp, (int) NULL); mercury caller */
		MR_prof_dump_ppid(fp, cback->proc_id);
		if (cback->child != NULL)
		{
			child_num = cback->child->scc_num;
			instance_stack[isp++] = cback->child;
		} else {
			child_num = 0;
		}
		MR_prof_write_word(fp, child_num);
		MR_prof_dump_proc_call_profile(fp, &(cback->prof));
		cback = cback->next;
	}
	MR_prof_write_word(fp, 0); /* end marker */
}

static void MR_prof_dump_call_data(FILE *fp, const char *prefix, int num,
		const MR_Stack_Layout_Entry *caller,
		const MR_Stack_Layout_Entry *callee, 
		MR_ProcCallProfile *prof, int child)
{

	MR_prof_write_word(fp, num);
	MR_prof_dump_ppid(fp, &(caller->MR_sle_proc_id));
	MR_prof_dump_ppid(fp, &(callee->MR_sle_proc_id));
	MR_prof_write_word(fp, prof->calls);
	MR_prof_write_word(fp, prof->successes);
	MR_prof_write_word(fp, prof->failures);
	MR_prof_write_word(fp, prof->quanta);
	MR_prof_write_word(fp, child);
}

static void MR_prof_dump_ppid(FILE *fp, const MR_Stack_Layout_Proc_Id *ppid)
{
	int num;
	unsigned ind;
	MR_PPIdNode *tmp;

	if (ppid)
	{
		ind = (((const unsigned) ppid) >> 3) % MR_PPID_TABLE_SIZE;
		tmp = MR_prof_ppid_table[ind];
		num = -1;
		while (tmp)
		{
			if (ppid == tmp->ppid)
			{
				num = tmp->ppid_num;
				break;
			} else {
				tmp = tmp->next;
			}
		}
		if (num < 0)
		{
			tmp = (MR_PPIdNode *)
					MR_malloc(sizeof(MR_PPIdNode));
			tmp->ppid = ppid;
			tmp->ppid_num = MR_prof_ppid_counter++;
			tmp->next = MR_prof_ppid_table[ind];
			MR_prof_ppid_table[ind] = tmp;
			num = tmp->ppid_num;
		}
	} else {
		num = 0;
	}
	MR_prof_write_word(fp, num);
}

static void MR_prof_dump_ppid_table(FILE *fp)
{
	int i;
	MR_PPIdNode *tmp;

	for (i = 0; i < MR_PPID_TABLE_SIZE; i++)
	{
		tmp = MR_prof_ppid_table[i];
		while (tmp)
		{
			MR_prof_dump_ppid_defn(fp, tmp->ppid_num, tmp->ppid);
			tmp = tmp->next;
		}
	}
	MR_prof_write_word(fp, 0); /* end marker */
}

static void MR_prof_dump_ppid_defn(FILE *fp, int ppid_num,
		const MR_Stack_Layout_Proc_Id *ppid)
{
	if (ppid->MR_proc_user.MR_user_pred_or_func <= MR_FUNCTION) {
		/* User defined pred or func */
		if (ppid->MR_proc_user.MR_user_pred_or_func == MR_PREDICATE)
			MR_prof_write_word(fp, 1);
		else
			MR_prof_write_word(fp, 2);
		MR_prof_write_word(fp, ppid_num);
		MR_prof_dump_string(fp, ppid->MR_proc_user.MR_user_decl_module);
		MR_prof_dump_string(fp, ppid->MR_proc_user.MR_user_name);
		MR_prof_write_word(fp, ppid->MR_proc_user.MR_user_arity);
		MR_prof_write_word(fp, ppid->MR_proc_user.MR_user_mode);
	} else {
		/* Compiler generated pred */
		MR_prof_write_word(fp, 3);
		MR_prof_write_word(fp, ppid_num);
		MR_prof_dump_string(fp, ppid->MR_proc_comp.MR_comp_type_name);
		MR_prof_dump_string(fp, ppid->MR_proc_comp.MR_comp_type_module);
		MR_prof_dump_string(fp, ppid->MR_proc_comp.MR_comp_def_module);
		MR_prof_dump_string(fp, ppid->MR_proc_comp.MR_comp_pred_name);
		MR_prof_write_word(fp, ppid->MR_proc_comp.MR_comp_arity);
		MR_prof_write_word(fp, ppid->MR_proc_comp.MR_comp_mode);
	}
}

static void MR_prof_dump_SCCid(FILE *fp, const MR_SCCId *SCCid)
{
	int ind, num;
	MR_SCCIdNode *tmp;

	if (SCCid)
	{
		ind = (((const int) SCCid) >> 3) % MR_PPID_TABLE_SIZE;
		tmp = MR_prof_SCCid_table[ind];
		num = -1;
		while (tmp)
		{
			if (SCCid == tmp->SCCid)
			{
				num = tmp->SCCid_num;
				break;
			} else {
				tmp = tmp->next;
			}
		}
		if (num < 0)
		{
			tmp = (MR_SCCIdNode *)
					MR_malloc(sizeof(MR_SCCIdNode));
			tmp->SCCid = SCCid;
			tmp->SCCid_num = MR_prof_SCCid_counter++;
			tmp->next = MR_prof_SCCid_table[ind];
			MR_prof_SCCid_table[ind] = tmp;
			num = tmp->SCCid_num;
		}
	} else {
		num = 0;
	}
	MR_prof_write_word(fp, num);
}

static void MR_prof_dump_SCCid_table(FILE *fp)
{
	int i,j;
	MR_SCCIdNode *tmp;

	for (i = 0; i < MR_PPID_TABLE_SIZE; i++)
	{
		tmp = MR_prof_SCCid_table[i];
		while (tmp)
		{
			assert(tmp->SCCid_num != 0);
			MR_prof_write_word(fp, tmp->SCCid_num);
			MR_prof_output_call_site_list(fp,
				tmp->SCCid->num_fo_calls,
				tmp->SCCid->fo_calls);
			MR_prof_output_call_site_list(fp,
				tmp->SCCid->num_ho_calls,
				tmp->SCCid->ho_calls);
			/*
			MR_prof_output_call_site_list(fp,
				tmp->SCCid->num_cm_calls,
				tmp->SCCid->cm_calls);
			*/
			tmp = tmp->next;
		}
	}
	MR_prof_write_word(fp, 0); /* end marker */
}

static void
MR_prof_output_call_site_list(FILE *fp, int n, const MR_CallSite **calls)
{
	int j;

	MR_prof_write_word(fp, n);
	for (j = 0 ; j < n ; j++)
	{
		if (calls[j]->caller)
		{
			MR_prof_dump_ppid(fp, &calls[j]->caller->MR_sle_proc_id);
		} else {
			MR_prof_write_word(fp, 0);
		}
		if (calls[j]->callee)
		{
			MR_prof_dump_ppid(fp, &calls[j]->callee->MR_sle_proc_id);
		} else {
			MR_prof_write_word(fp, 0);
		}
		MR_prof_write_word(fp, calls[j]->line);
	}
}

void
MR_prof_output_deep_tables(void)
{
	FILE *fp;

	fp = checked_fopen("Prof.data", "create", "w");

	MR_prof_dump_prof_tree(fp, MR_prof_root_scc);

	checked_fclose(fp, "Prof.data");

	fp = checked_fopen("Prof.decls", "create", "w");

	MR_prof_dump_SCCid_table(fp);

	MR_prof_dump_ppid_table(fp);

	checked_fclose(fp, "Prof.decls");

}

#endif /* MR_PROFILE_DEEP */

/* ======================================================================== */

#define MR_PROF_USE_7BIT_ENCODING

#ifdef MR_PROF_USE_7BIT_ENCODING

/*
** 7bit encoding is an arbitary length encoding that uses the topmost
** bit of each byte to mark whether there are any more bits to come.
** The 7bit groups are written least significant first.
*/
void
MR_prof_write_word(FILE *fp, Word w)
{
	Word tmp;

	do
	{
		tmp = w & 0x7f;
		w = w >> 7;
		if (w != 0)
			tmp = tmp | 0x80; 
		fputc(tmp, fp);
	} while (w != 0);
}

Word
MR_prof_read_word(FILE *fp, int *eof_marker)
{
	Word w = 0;
	int c, i=0;

	while ((c = fgetc(fp)) != EOF)
	{
		w = w | ((c & 0x7f) << i);
		if (!(c & 0x80))
			break;
		i+=7;
	}

	/*
	if (i+7 > sizeof(Word))
		fatal_error("MR_prof_read_word: overflow");
	*/

	*eof_marker = (c == EOF);

	return w;
}

#elif MR_PROF_USE_MULTI_ENCODING

#else
#error No encoding specified
#endif

/* ======================================================================== */
===================================================================
New file: runtime/mercury_prof_deep.h
===================================================================
/*
** Copyright (C) 2000 The University of Melbourne.
** This file may only be copied under the terms of the GNU Library General
** Public License - see the file COPYING.LIB in the Mercury distribution.
*/

/*
** mercury_prof.h -- definitions for profiling.
** 
** This header gives information and declarations necessary for deep
** profiling. Other places for information are runtime/mercury_prof_deep.c
** and compiler/profiling.m.
**
** Deep profiling captures a snapshot of the performance of a program by
** using SIGPROFs to estimate how much time is spent in each procedure in
** the program. Unlike mprof, deep profiling does not make the assumption
** that each call to a procedure is equal in cost. Instead, keeps an
** annotated copy of the call tree, reduced into strongly connected
** components (SCCs), which is represented using structures called
** `MR_SCCInstance's. Each MR_SCCInstance is an instance of a static SCC
** from the program. The MR_SCCInstance structure stores information about
** the number of times each procedure called in that dynamic instance of the
** corresponding static SCC was called, succeeded, failed, and the number of
** SIGPROFs that occured while we were in that procedure. Each
** MR_SCCInstance also has a pointer to a static structure MR_SCCId which
** records the static information about the call-sites in that SCC.
**
** Deep profiling works by annotating the generated code to create and
** maintain the reduced call-tree. It uses two global variables:
**
** 	MR_prof_current_scc which points to the current MR_SCCInstance;
** and
**	MR_prof_current_proc which points to the MR_ProcCallProfile which
**		stores the counts mentioned above for the current procedure.
**		Note that this is part of the *caller's* MR_SCCInstance, not
**		the callee's MR_SCCInstance (ie MR_prof_current_scc).
*/

#ifndef MERCURY_PROF_DEEP_H
#define MERCURY_PROF_DEEP_H

#ifdef MR_PROFILE_DEEP

#include "mercury_types.h"		/* for `Word' */
#include "mercury_stack_layout.h"	/* for `MR_Stack_Layout_Entry *' */
#include "mercury_ho_call.h"		/* for `MR_Closure *' */

#define MR_IF_PROFILE_DEEP(x)	x

/*
** The SCCId structures are generated as part of the static data
** along with the stack-layout data.
*/

typedef struct MR_SCC_ID {
	const int			num_fo_calls;
	const struct MR_CALL_SITE	**fo_calls;
	const int			num_ho_calls;
	const struct MR_CALL_SITE	**ho_calls;
	const int			num_cm_calls;
	const struct MR_CALL_SITE	**cm_calls;
} MR_SCCId;

typedef struct MR_CALL_SITE {
	const MR_Stack_Layout_Entry	*caller;
	const MR_Stack_Layout_Entry	*callee;
	const char			*file;
	const int			line;
} MR_CallSite;

#define MR_MAKE_SCC_ID(nm, focs, hocs, cmcs)				    \
	const MR_CallSite *MR_PASTE2(nm, _focs)[] = focs;		    \
	const MR_CallSite *MR_PASTE2(nm, _hocs)[] = hocs;		    \
	const MR_CallSite *MR_PASTE2(nm, _cmcs)[] = cmcs;		    \
	const MR_SCCId nm = {						    \
			sizeof(MR_PASTE2(nm, _focs))/sizeof(MR_CallSite *), \
			MR_PASTE2(nm, _focs),				    \
			sizeof(MR_PASTE2(nm, _hocs))/sizeof(MR_CallSite *), \
			MR_PASTE2(nm, _hocs),				    \
			sizeof(MR_PASTE2(nm, _cmcs))/sizeof(MR_CallSite *), \
			MR_PASTE2(nm, _cmcs)				    \
		}

#define MR_REFER_SCC_ID(l)	&mercury__scc_##l

#define MR_DECL_SLE(nm)							\
	extern const struct mercury_data__layout__##nm##_struct		\
			mercury_data__layout__##nm

#define MR_REF_SLE(nm)	(const MR_Stack_Layout_Entry *)			\
				&mercury_data__layout__##nm

#define MR_REF_SLEc(nm)	(const MR_Stack_Layout_Entry *)			\
					&mercury_data__layout__##nm,

#define MR_MAKE_CALL_SITE(nm, cer, cee, fl, li)				\
	MR_DECL_SLE(cer);						\
	MR_DECL_SLE(cee);						\
	MR_CallSite nm = { MR_REF_SLE(cer), MR_REF_SLE(cee), fl, li }

#define MR_MAKE_HO_CALL_SITE(nm, cer, fl, li)				\
	MR_DECL_SLE(cer);						\
	MR_CallSite nm = {						\
		MR_REF_SLE(cer),					\
		(const MR_Stack_Layout_Entry *) NULL,			\
		fl,							\
		li							\
	}

typedef unsigned long	MR_Count;

/*
** A MR_ProcCallProfile contains the data that we wish to record
** about call sites; ie
**	the number of calls
**	the number of times that the call succeeded
**	the number of times that the call failed
**	(#calls <= #successes + #failures)
*/
typedef struct MR_PROC_CALL_PROFILE {
	MR_Count		calls;
	MR_Count		successes;
	MR_Count		failures;
	MR_Count		quanta;
} MR_ProcCallProfile;

typedef struct MR_SCC_INSTANCE {
	const MR_SCCId			*scc;
	int				scc_num;
	struct MR_INTER_CALL_PROFILE	**fo_calls;
	struct MR_MULTI_CALL_PROFILE	**ho_calls;
	struct MR_MULTI_CALL_PROFILE	**cm_calls;
	struct MR_MULTI_CALL_PROFILE	*callbacks;
} MR_SCCInstance;

	/*
	** For first order calls, we know the caller and the unique callee
	** at compile time, so all we need to store is the profile and a
	** pointer to the child MR_SCCInstance.
	*/
typedef struct MR_INTER_CALL_PROFILE {
	MR_ProcCallProfile	prof;
	struct MR_SCC_INSTANCE	*child;
}  MR_InterCallProfile;

	/*
	** For higher order calls (including class method calls) and
	** callbacks from C, we know the caller at compile time, but not
	** the callee. Indeed, since there may be multiple closures passed
	** to the same call site, we keep a linked list of the different
	** closures that were passed. We keep the address of the entry
	** point as an index, and a pointer to the MR_Stack_Layout_Proc_Id
	** for that procedure. We need the former, because the latter may
	** not be unique, and we need a unique key to identify the
	** procedure.
	**
	** Note that for class method calls, since the closure is dependent
	** on the type of which the typeclass is an instance, we can only
	** get multiple closures if multiple types are used. This is harder
	** than it sounds, since we're talking about dynamic instances of
	** the call-site, not the single static instance. In fact, it is only
	** possible if existentially quantified types are used.
	*/
typedef struct MR_MULTI_CALL_PROFILE {
	MR_ProcCallProfile		prof;
	struct MR_SCC_INSTANCE		*child;
	struct MR_MULTI_CALL_PROFILE	*next;
	Code				*entry;
	const MR_Stack_Layout_Proc_Id	*proc_id;
} MR_MultiCallProfile;;

typedef MR_MultiCallProfile MR_HigherOrderCallProfile;

typedef MR_MultiCallProfile MR_ClassMethodCallProfile;

typedef MR_MultiCallProfile MR_CallBackCallProfile;

void MR_prof_init_globals(MR_Stack_Layout_Entry *proclayout);

/*
** This variable holds the address of the "current" CallProfile struct
** so that when a profiling interrupt occurs, the profiler can simply
** increment the appropriate counter.
*/

extern	MR_ProcCallProfile * volatile	MR_prof_current_proc;
extern	MR_SCCInstance * volatile	MR_prof_current_scc;
extern	MR_Count			MR_prof_num_sigs;

#define	MR_intra_scc_call(this_scc, new_slot)	\
		MR_prof_ensure_fo_call_slot((MR_SCCInstance *) (this_scc), \
			(new_slot))

#define	MR_local_inter_scc(this_scc, new_slot, scc_id)	\
		MR_prof_ensure_fo_call_inter_slot(	\
			(MR_SCCInstance *) (this_scc), \
			(new_slot), (const MR_SCCId *) (scc_id))

#define MR_nonlocal_inter_scc(this_scc, new_slot, playt) \
		MR_prof_ensure_fo_call_inter_slot2(	\
			(MR_SCCInstance *) (this_scc), \
			(new_slot), (const MR_Stack_Layout_Entry *) (playt))

#define MR_ho_call(this_scc, new_slot, closure) \
		MR_prof_ensure_ho_call_inter_slot(	\
			(MR_SCCInstance *) (this_scc), \
			(new_slot), (MR_Closure *) (closure))

#define MR_special_ho_call(this_scc, new_slot, cptr, playt) \
		MR_prof_ensure_special_ho_call_inter_slot(	\
			(MR_SCCInstance *) (this_scc), \
			(new_slot), (Code *) (cptr), \
			(const MR_Stack_Layout_Entry *) (playt))

#define MR_prof_c_calls_mercury(playt) \
		MR_prof_ensure_c_calls_mercury(	\
			(const MR_Stack_Layout_Entry *) (playt))

#define MR_scc_from_current_proc()	\
		(((MR_InterCallProfile *) MR_prof_current_proc)->child)

int MR_prof_check_current_scc(const Word *c);

static int MR_prof_debugging_dummy;

MR_ProcCallProfile *
	MR_prof_ensure_fo_call_slot(MR_SCCInstance *scc, int site_num);

MR_ProcCallProfile *
	MR_prof_ensure_fo_call_inter_slot(MR_SCCInstance *scc, int site_num,
		const MR_SCCId *child_scc);

MR_ProcCallProfile *
	MR_prof_ensure_fo_call_inter_slot2(MR_SCCInstance *scc, int site_num,
		const MR_Stack_Layout_Entry *callee);

MR_ProcCallProfile *
	MR_prof_ensure_ho_call_inter_slot(MR_SCCInstance *scc, int site_num,
		MR_Closure *closure);

MR_ProcCallProfile *
	MR_prof_ensure_special_ho_call_inter_slot(MR_SCCInstance *scc, int site_num,
		Code *code_ptr, const MR_Stack_Layout_Entry *playt);

MR_ProcCallProfile *
	MR_prof_proc_const_call(MR_MultiCallProfile **call_list,
		MR_Closure *proc);

MR_ProcCallProfile *
	MR_prof_special_proc_const_call(MR_MultiCallProfile **call_list,
		Code *code_ptr, const MR_Stack_Layout_Proc_Id *proc);

MR_ProcCallProfile *
	MR_prof_ensure_c_calls_mercury(const MR_Stack_Layout_Entry *playt);

void
MR_prof_output_deep_tables(void);

#else

#define MR_MAKE_SCC_ID(x, y, z, w)

#define MR_MAKE_CALL_SITE(x, y, z, w, v)

#define MR_IF_PROFILE_DEEP(x)

#endif

void
MR_prof_write_word(FILE *fp, Word w);

Word
MR_prof_read_word(FILE *fp, int *eof_marker);

#endif	/* not MERCURY_PROF_DEEP_H */

--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to:       mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions:          mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------



More information about the developers mailing list