[m-rev.] for review: MLDS demangling

Fergus Henderson fjh at cs.mu.OZ.AU
Sat Aug 24 03:00:14 AEST 2002


Estimated hours taken: 8
Branches: main

util/mdemangle.c:
profiler/demangle.m:
	Add support for demangling code produced by the MLDS back-end.

Workspace: /home/ceres/fjh/mercury
Index: profiler/demangle.m
===================================================================
RCS file: /home/mercury1/repository/mercury/profiler/demangle.m,v
retrieving revision 1.15
diff -u -d -r1.15 demangle.m
--- profiler/demangle.m	25 Sep 2001 09:37:07 -0000	1.15
+++ profiler/demangle.m	23 Aug 2002 14:07:13 -0000
@@ -73,7 +73,9 @@
 :- pred demangle_from_c(string, string).
 :- mode demangle_from_c(in, out) is semidet.
 demangle_from_c -->
-	( demangle_proc ->
+	( demangle_proc_hl ->
+		{ true }
+	; demangle_proc_ll ->
 		{ true }
 	; demangle_data ->
 		{ true }
@@ -85,9 +87,9 @@
 
 /*---------------------------------------------------------------------------*/
 
-:- pred demangle_proc(string, string).
-:- mode demangle_proc(in, out) is semidet.
-demangle_proc -->
+:- pred demangle_proc_ll(string, string).
+:- mode demangle_proc_ll(in, out) is semidet.
+demangle_proc_ll -->
 	remove_prefix("mercury__"),
 
 	%
@@ -309,6 +311,249 @@
 	{ string__append_list(Parts, DemangledName) },
 	dcg_set(DemangledName).
 
+:- pred demangle_proc_hl(string, string).
+:- mode demangle_proc_hl(in, out) is semidet.
+demangle_proc_hl -->
+	maybe_remove_prefix("mercury__"),
+	%
+	% Get integer from end of string (it might be the mode number,
+	% it might be the internal label number).
+	%
+	remove_trailing_int(Int),
+
+	( m_remove_suffix("i") ->
+		%
+		% if we got to an `i', that means it is an internal
+		% label of the form `append_3_p_0_i1'
+		% in that case, save the internal label number and then
+		% get the mode number
+		%
+		{ MaybeInternalLabelNum = yes(Int) },
+		m_remove_suffix("_"),
+		remove_trailing_int(ModeNum0)
+	;
+		{ MaybeInternalLabelNum = no },
+		{ ModeNum0 = Int }
+	),
+
+	( m_remove_suffix("f_") ->
+		{ PredOrFunc = "function" },
+		{ Normal = yes }
+	; m_remove_suffix("p_") ->
+		{ PredOrFunc = "predicate" },
+		{ Normal = yes }
+	;
+		% it could be a compiler-generated unify or compare predicate
+		{ PredOrFunc = "comp-gen predicate" },
+		{ Normal = no }
+	),
+
+	%
+	% scan back past the arity number and then parse it
+	%
+	m_remove_suffix("_"),
+	remove_trailing_int(Arity),
+	m_remove_suffix("_"),
+
+	%
+	% Process the mangling introduced by unused_args.m.
+	% This involves stripping off the `__ua<m>' or `__uab<m>' added to 
+	% the end of the predicate/function name, where m is the mode number.
+	% 
+	(
+		remove_trailing_int(UA_ModeNum),
+		m_remove_suffix("__ua")
+	->
+		{ UnusedArgs = yes(ModeNum0 - no) },
+		{ ModeNum1 is UA_ModeNum mod 10000 }
+	;
+		remove_trailing_int(UA_ModeNum),
+		m_remove_suffix("__uab")
+	->
+		{ UnusedArgs = yes(ModeNum0 - yes) },
+		{ ModeNum1 is UA_ModeNum mod 10000 }
+	;
+		{ UnusedArgs = no },
+		{ ModeNum1 = ModeNum0 }
+	),
+		
+	%
+	% Process the mangling introduced by higher_order.m.
+	% This involves stripping off the `__ho<n>' where
+	% n is a unique identifier for this specialized version
+	%
+	(
+		remove_trailing_int(HO_Num),
+		m_remove_suffix("__ho")
+	->
+		{ HigherOrder = yes(HO_Num) }
+	;
+		{ HigherOrder = no }
+	),
+	{ ModeNum = ModeNum1 },
+
+	%
+	% Make sure special predicates with unused_args 
+	% are reported correctly.
+	%
+
+	( { UnusedArgs = yes(_), Normal = no } ->
+		remove_trailing_int(Arity)
+	;
+		{ true }
+	),
+
+	%
+	% Separate the module name from the predicate name
+	%
+	remove_maybe_module_prefix(MaybeModule0,
+		["IntroducedFrom__", "DeforestationIn__",
+		"AccFrom__", "TypeSpecOf__", "__"]),
+
+	%
+	% Check whether the start of the string matches the name of
+	% one of the special compiler-generated predicates; if so,
+	% set the `category' to the appropriate value and then
+	% skip past the prefix.  Also check that the setting of
+	% the category matches the setting of `Normal' determined above.
+	%
+	( remove_prefix("__Unify____") ->
+		{ Normal = no },
+		{ Category0 = unify }
+	; remove_prefix("__Compare____") ->
+		{ Normal = no },
+		{ Category0 = compare },
+		% there should only be one mode for compare/3 preds
+		{ ModeNum0 = 0 }
+	; remove_prefix("__Index____") ->
+		{ Normal = no },
+		{ Category0 = index },
+		% there should only be one mode for index/2 preds
+		{ ModeNum0 = 0 }
+	;	
+		{ Normal = yes },
+		{ Category0 = ordinary }
+	),
+
+
+	%
+	% Fix any mangled ascii codes in the predicate name.
+	%
+	% XXX This should be done *before* stripping off
+	% the mangling added by HLDS->HLDS passes such as
+	% unused_args.m and higher_order.m.
+	% (Doing it here means that we won't properly demangle
+	% names that involve both special characters and
+	% unused_args/higher_order specializations.)
+	% But for the MLDS back-end, it needs to be done *after*
+	% removing the module prefix, and currently that can't be
+	% done until after stripping off the `__ua*' and `__ho*' suffixes.
+	%
+	fix_mangled_ascii,
+	% 
+	% Fix any mangled ascii codes in the module name, if any.
+	% 
+	{
+		MaybeModule0 = no,
+		MaybeModule = no
+	;
+		MaybeModule0 = yes(ModuleName0),
+		fix_mangled_ascii(ModuleName0, ModuleName),
+		MaybeModule = yes(ModuleName)
+	},
+
+	%
+	% Now we need to look at the pred name and see if it is an
+	% introduced lambda predicate.
+	% XXX handle multiple prefixes
+	%
+
+	=(PredName0),
+
+	(
+		( 
+			remove_prefix("IntroducedFrom__") 
+		->
+			{ IntroducedPredType0 = (lambda) }
+		;
+			remove_prefix("DeforestationIn__")
+		->
+			{ IntroducedPredType0 = deforestation }
+		;
+			remove_prefix("AccFrom__")
+		->
+			{ IntroducedPredType0 = accumulator }
+		;
+			remove_prefix("TypeSpecOf__"),
+			{ IntroducedPredType0 = type_spec("") }
+		)
+	->
+		(
+			remove_prefix("pred__")
+		->
+			{ LambdaPredOrFunc = "pred" }
+		;
+			remove_prefix("func__")
+		->
+			{ LambdaPredOrFunc = "func" }
+		;
+			{ IntroducedPredType0 = type_spec(_) },
+			remove_prefix("pred_or_func__")
+		->
+			{ LambdaPredOrFunc = "" }
+		;
+			{ fail }
+		),
+		(
+			remove_maybe_pred_name(MPredName),
+			{ MPredName = yes(PredName1) },
+			( { IntroducedPredType0 = type_spec(_) } ->
+				remove_type_spec(TypeSpec),
+				{ IntroducedPredType = type_spec(TypeSpec) },
+				{ Seq = 0 },
+				{ Line = 0 }
+
+				% The compiler adds a redundant mode
+				% number to the predicate name to avoid
+				% creating two predicates with the same
+				% name (deep profiling doesn't like that).
+				% It isn't used here so we just ignore it.
+				% The compiler also adds a version number
+				% for the argument order used for specialized
+				% versions, which can also be ignored.
+			;
+				{ IntroducedPredType = IntroducedPredType0 },
+				remove_int(Line),
+				remove_prefix("__"),
+				remove_int(Seq)
+			)
+		->
+			{ PredName = PredName1 },
+			{ Category = introduced(IntroducedPredType, Line,
+				Seq, LambdaPredOrFunc) }
+		;
+			% If we get here it usually means that there
+			% were multiple prefixes, which aren't dealt
+			% with properly yet. Just treat it as an
+			% ordinary name for now.
+			{ Category = ordinary },
+			{ PredName = PredName0 }
+		)
+	;
+		{ Category = Category0 },
+		{ PredName = PredName0 }
+	),
+
+
+	%
+	% Now, finally, we can construct the demangled symbol name
+	%
+	{ format_proc(Category, MaybeModule, PredOrFunc, PredName,
+		Arity, ModeNum, HigherOrder, UnusedArgs, MaybeInternalLabelNum,
+		Parts, []) },
+	{ string__append_list(Parts, DemangledName) },
+	dcg_set(DemangledName).
+
 :- pred format_proc(pred_category, maybe(string), string, string, int, int,
 		maybe(int), maybe(pair(int, bool)), maybe(int), list(string),
 		list(string)).
@@ -400,13 +645,29 @@
 :- pred demangle_data(string, string).
 :- mode demangle_data(in, out) is semidet.
 demangle_data -->
-	remove_prefix("mercury_data_"),
+	( remove_prefix("mercury_data_") ->
+		% LLDS mangled data
+		{ HighLevel = no }
+	;
+		% MLDS mangled data
+		{ HighLevel = yes },
+		maybe_remove_prefix("mercury__")
+	),
 	remove_maybe_module_prefix(MaybeModule0,
 		["type_ctor_info_", "type_ctor_layout_",
 		"type_ctor_functors_", "common_"]),
 	{ MaybeModule0 = yes("") ->
 		MaybeModule = no
 	;
+		% for the MLDS back-end,
+		% the module qualifiers get include twice (XXX why?)
+		HighLevel = yes,
+		MaybeModule0 = yes(Twice)
+	->
+		Once = string__left(Twice, string__length(Twice) // 2),
+		Once = string__right(Twice, string__length(Twice) // 2),
+		MaybeModule = yes(Once)
+	;
 		MaybeModule = MaybeModule0
 	},
 	( remove_prefix("type_ctor_info_") ->
@@ -471,8 +732,8 @@
 :- pred demangle_typeclass_info(string, string).
 :- mode demangle_typeclass_info(in, out) is semidet.
 demangle_typeclass_info -->
-	remove_prefix("mercury_data_"),
-	remove_prefix("__base_typeclass_info_"),
+	maybe_remove_prefix("mercury_data___"),
+	remove_prefix("base_typeclass_info_"),
 	remove_maybe_module_prefix(yes(ClassName), ["arity"]),
 	{ ClassName \= "" },
 	remove_prefix("arity"),
Index: util/mdemangle.c
===================================================================
RCS file: /home/mercury1/repository/mercury/util/mdemangle.c,v
retrieving revision 1.45
diff -u -d -r1.45 mdemangle.c
--- util/mdemangle.c	23 Jul 2002 19:39:00 -0000	1.45
+++ util/mdemangle.c	23 Aug 2002 16:53:25 -0000
@@ -43,6 +43,7 @@
 static MR_bool cut_trailing_underscore_integer(char *str,
 		char **end, int *num);
 static MR_bool strip_prefix(char **str, const char *prefix);
+static MR_bool strip_suffix(const char *str, char **end, const char *suffix);
 static MR_bool strip_leading_integer(char **start_ptr, int *num);
 
 /*
@@ -148,11 +149,12 @@
 	static const char entry[]   = "_entry_";
 	static const char mercury[] = "mercury__";
 	static const char func_prefix[] = "fn__"; /* added for functions */
-	static const char unify[]   = "__Unify___";
-	static const char compare[] = "__Compare___";
-	static const char mindex[]  = "__Index___";
-	/* we call it `mindex' rather than `index' to
-	   avoid a naming conflict with strchr's alter ego index() */
+	static const char unify1[]   = "__Unify___";
+	static const char unify2[]   = "__Unify____";
+	static const char compare1[] = "__Compare___";
+	static const char compare2[] = "__Compare____";
+	static const char index1[]  = "__Index___";
+	static const char index2[]  = "__Index____";
 
 	static const char introduced[]  = "IntroducedFrom__";
 	static const char deforestation[]  = "DeforestationIn__";
@@ -170,7 +172,9 @@
 	static const char type_ctor_layout[] = "type_ctor_layout_";
 	static const char type_ctor_info[] = "type_ctor_info_";
 	static const char type_ctor_functors[] = "type_ctor_functors_";
-	static const char base_typeclass_info[] = "__base_typeclass_info_";
+	static const char base_typeclass_info[] = "base_typeclass_info_";
+	static const char underscores_base_typeclass_info[] =
+						"__base_typeclass_info_";
 	static const char common[] = "common";
 	static const char arity_string[] = "arity";
 	static const char underscores_arity_string[] = "__arity";
@@ -183,6 +187,7 @@
 		deforestation,
 		accumulator,
 		type_spec,
+		unify1, compare1, index1,
 		NULL
 	};
 
@@ -207,6 +212,8 @@
 	int mode_num;
 	int mode_num2;
 	int arity;
+	MR_bool high_level = MR_TRUE;
+	MR_bool matched = MR_FALSE;
 	const char *pred_or_func; /* either "predicate" or "function" */
 		/* does this proc have any unused arguments */
 	MR_bool unused_args = MR_FALSE;
@@ -271,11 +278,10 @@
 	strip_prefix(&start, entry);
 
 	/*
-	** strip off the `mercury__' prefix
+	** strip off the `mercury__' prefix, if any
 	*/
-
-	if (!strip_prefix(&start, mercury)) {
-		goto not_plain_mercury;
+	if (strip_prefix(&start, mercury)) {
+		matched = MR_TRUE;
 	}
 
 /*
@@ -283,25 +289,16 @@
 */
 
 	/*
-	** strip off the `fn__' prefix, if any
-	*/
-	if (strip_prefix(&start, func_prefix)) {
-		pred_or_func = "function";
-	} else {
-		pred_or_func = "predicate";
-	}
-
-	/*
 	** Get integer from end of string (it might be the mode number,
 	** it might be the internal label number). We'll assume its mode
 	** number for the moment.
 	*/
 
 	if (!cut_trailing_integer(start, &end, &mode_num)) {
-		goto wrong_format;
+		goto not_plain_mercury;
 	}
 
-	if (end == start) goto wrong_format;
+	if (end == start) goto not_plain_mercury;
 
 	/*
 	** if we got to an `i', that means it is an internal
@@ -311,21 +308,54 @@
 	*/
 	if (*--end == 'i') {
 		internal = mode_num;
-		if (end == start || *--end != '_') goto wrong_format;
+		if (end == start || *--end != '_') goto not_plain_mercury;
 
 		if (!cut_trailing_underscore_integer(start, &end, &mode_num)) {
-			goto wrong_format;
+			goto not_plain_mercury;
 		}
 	}
 
+	if (end == start) goto not_plain_mercury;
+
+	/*
+	** strip off the `fn__' prefix, if any
+	*/
+	if (strip_prefix(&start, func_prefix)) {
+		high_level = MR_FALSE;
+		pred_or_func = "function";
+	} else if (strip_suffix(start, &end, "_f")) {
+		high_level = MR_TRUE;
+		matched = MR_TRUE;
+		pred_or_func = "function";
+	} else if (strip_suffix(start, &end, "_p")) {
+		high_level = MR_TRUE;
+		matched = MR_TRUE;
+		pred_or_func = "predicate";
+	} else {
+		/*
+		** It's not a function.
+		** But it could be either an LLDS predicate,
+		** or an MLDS compiler-generated predicate.
+		*/
+		high_level = (strstr(start, unify2) ||
+		    strstr(start, compare2) ||
+		    strstr(start, index2));
+		pred_or_func = "predicate";
+	}
+
+	if (end == start) goto not_plain_mercury;
+
 	/*
 	** scan back past the arity number and then parse it
 	*/
 
 	if (!cut_trailing_underscore_integer(start, &end, &arity)) {
-		goto wrong_format;
+		goto not_plain_mercury;
 	}
 
+	if (high_level) {
+		module = strip_module_name(&start, end, trailing_context_1);
+	}
 	/*
 	** Now start processing from the start of the string again.
 	** Check whether the start of the string matches the name of
@@ -334,16 +364,27 @@
 	** skip past the prefix.
 	*/
 
-	if (strip_prefix(&start, unify)) {
+	if (strip_prefix(&start, unify1)) {
 		category = UNIFY;
-	} else if (strip_prefix(&start, compare)) {
+	} else if (strip_prefix(&start, compare1)) {
 		category = COMPARE;
-		if (mode_num != 0) goto wrong_format;
-	} else if (strip_prefix(&start, mindex)) {
+		if (mode_num != 0) goto not_plain_mercury;
+	} else if (strip_prefix(&start, index1)) {
 		category = INDEX;
-		if (mode_num != 0) goto wrong_format;
+		if (mode_num != 0) goto not_plain_mercury;
 	} else {
 		category = ORDINARY;
+		/*
+		** For ordinary predicates, we should have matched
+		** against something by now --
+		** either the "mercury__" prefix, for LLDS mangling,
+		** or the "_f" or "_p" suffix, for MLDS mangling.
+		*/
+		if (!matched) goto not_plain_mercury;
+	}
+
+	if (category != ORDINARY && start[0] == '_') {
+		start++;
 	}
 
 	/*
@@ -417,7 +458,9 @@
 		}
 	}
 
-	module = strip_module_name(&start, end, trailing_context_1);
+	if (!high_level) {
+		module = strip_module_name(&start, end, trailing_context_1);
+	}
 
 	/*
 	** look for "IntroducedFrom" or "DeforestationIn" or "AccFrom"
@@ -607,16 +650,48 @@
 */
 
 not_plain_mercury:
+	/*
+	** Undo any in-place modifications done while trying to demangle
+	** predicate names.
+	*/
+	strcpy(name, orig_name);
+	start = name;
+	end = name + strlen(name);
 
-	if (!strip_prefix(&start, mercury_data)) {
-		goto wrong_format;
+	/*
+	** skip any leading underscore inserted by the C compiler
+	*/
+	if (*start == '_') {
+		start++;
 	}
 
-	if (strip_prefix(&start, base_typeclass_info)) {
-		goto typeclass_info;
+	if (strip_prefix(&start, mercury_data)) {
+		/* LLDS */
+		high_level = MR_FALSE;
+		if (strip_prefix(&start, underscores_base_typeclass_info)) {
+			goto typeclass_info;
+		}
+	} else {
+		/* MLDS */
+		high_level = MR_TRUE;
+		if (strip_prefix(&start, base_typeclass_info)) {
+			goto typeclass_info;
+		}
+		strip_prefix(&start, mercury);
 	}
 
 	module = strip_module_name(&start, end, trailing_context_2);
+	if (high_level) {
+		/*
+		** For MLDS, the module name gets duplicated (XXX why?)
+		** So here we must replace `foo:foo' with just `foo'.
+		*/
+		size_t half_len = strlen(module) / 2;
+		if (strncmp(module, module + half_len + 1, half_len) != 0) {
+			goto wrong_format;
+		}
+		module += half_len + 1;
+	}
 
 	if (strip_prefix(&start, type_ctor_info)) {
 		data_category = INFO;
@@ -800,11 +875,10 @@
 }
 
 	/*
-	** Remove the prefix from a string, if it has 
-	** it. 
-	** Returns MR_TRUE if it has that prefix, and newstr will
-	** then point to the rest of that string.
-	** If the string doesn't have that prefix, newstr will
+	** Remove the prefix from a string, if it has it. 
+	** Returns MR_TRUE if the string has that prefix, and
+	** *str will then point to the rest of that string.
+	** If the string doesn't have that prefix, *str will
 	** be unchanged, and the function will return MR_FALSE.
 	*/
 static MR_bool 
@@ -816,6 +890,26 @@
 
 	if (strncmp(*str, prefix, len) == 0) {
 		*str += len;
+		return MR_TRUE;
+	}
+	return MR_FALSE;
+}
+
+	/*
+	** Remove the suffix from a string, if it has it. 
+	** Returns MR_TRUE if the string between start and *end
+	** has the specified suffix, and sets *end to point to
+	** the beginning of the suffix.
+	*/
+static MR_bool 
+strip_suffix(const char *start, char **end, const char *suffix) 
+{
+	int len;
+
+	len = strlen(suffix);
+
+	if (*end - start >= len && strncmp(*end - len, suffix, len) == 0) {
+		*end -= len;
 		return MR_TRUE;
 	}
 	return MR_FALSE;

-- 
Fergus Henderson <fjh at cs.mu.oz.au>  |  "I have always known that the pursuit
The University of Melbourne         |  of excellence is a lethal habit"
WWW: <http://www.cs.mu.oz.au/~fjh>  |     -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list