for review: demangle instance declarations

Fergus Henderson fjh at cs.mu.OZ.AU
Tue Feb 2 11:51:50 AEDT 1999


DJ, could you please review this one?

--------------------

Estimated hours taken: 4

Implement demangling for instance declarations (i.e. base_typeclass_infos).

compiler/base_typeclass_info.m:
compiler/llds_out.m:
	Change the way name mangling for base_typeclass_infos is done
	to ensure that they can be unambiguously demangled.

util/mdemangle.c:
profiler/demangle.m:
	Add code to demangle base_typeclass_infos.

util/mdemangle.c:
	Fix a bug: if it got part way through demangling a symbol
	before noticing that it had the wrong format, then it would
	sometimes print out a partially demangled version of the
	symbol rather than printing out the original symbol unchanged.

cvs diff  compiler/base_typeclass_info.m compiler/llds_out.m profiler/demangle.m util/mdemangle.c
Index: compiler/base_typeclass_info.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/base_typeclass_info.m,v
retrieving revision 1.9
diff -u -r1.9 base_typeclass_info.m
--- base_typeclass_info.m	1998/11/20 04:06:59	1.9
+++ base_typeclass_info.m	1999/02/02 00:18:32
@@ -33,6 +33,7 @@
 
 :- implementation.
 
+:- import_module prog_io, prog_out.
 :- import_module hlds_data, hlds_pred, hlds_out.
 :- import_module code_util, globals, options, term.
 :- import_module bool, string, map, std_util, require, assoc_list.
@@ -181,11 +182,10 @@
 	list__map(GetRval, SuperClassConstraints, SuperClassRvals).
 	
 %----------------------------------------------------------------------------%
-	% XXX this version of base_typeclass_info__make_instance_string 
-	% handles non-qualified types, even though everything should be
-	% qualified by now. Unfortunately, for some reason builtins are
-	% not qualified. The version that aborts when given an unqualified
-	% type is included at the end.
+
+	% Note that for historical reasons, builtin types
+	% are treated as being unqualified (`int') rather than
+	% being qualified (`builtin:int') at this point.
 
 base_typeclass_info__make_instance_string(InstanceTypes, InstanceString) :-
 	list__map(base_typeclass_info__type_to_string, 
@@ -196,77 +196,15 @@
 :- mode base_typeclass_info__type_to_string(in, out) is det.
 
 base_typeclass_info__type_to_string(Type, String) :-
-	(
-		Type = term__functor(Name, Args, _),
-		(
-			Name = term__atom(":"),
-			Args = [ModuleName, TypeName]
-		->
-			base_typeclass_info__type_to_string(ModuleName,
-				ModuleString),
-			base_typeclass_info__type_to_string(TypeName,
-				TypeString),
-			string__append(ModuleString, TypeString, String)
-		;
-			Name = term__atom(NameString)
-		->
-			list__length(Args, Arity),
-			string__int_to_string(Arity, ArityString),
-			string__append_list([NameString, "_", ArityString, "_"],
-				String)
-		;
-			error("instance functor not an atom")
-		)
+	( sym_name_and_args(Type, TypeName, TypeArgs) ->
+		prog_out__sym_name_to_string(TypeName, "__", TypeNameString),
+		list__length(TypeArgs, TypeArity),
+		string__int_to_string(TypeArity, TypeArityString),
+		string__append_list(
+			[TypeNameString, "__arity", TypeArityString, "__"],
+			String)
 	;
-		Type = term__variable(_),
-		error("instance type should be a single functor with variables as args")
+		error("base_typeclass_info__type_to_string: invalid type")
 	).
-	
-/******************************* 
- *
- * This is the non-working version of base_typeclass_info__type_to_string,
- * enforces the rule that every type be qualified. Unfortunately this doesn't
- * seem to be the case
-
-:- pred base_typeclass_info__type_to_string(type, string).
-:- mode base_typeclass_info__type_to_string(in, out) is det.
-
-base_typeclass_info__type_to_string(Type, String) :-
-	(
-		Type = term__functor(Name, Args, _),
-		Name = term__atom(":"),
-		Args = [ModuleName, TypeName]
-	->
-		base_typeclass_info__unqualified_type_to_string(ModuleName,
-			ModuleString),
-		base_typeclass_info__unqualified_type_to_string(TypeName,
-			TypeString),
-		string__append(ModuleString, TypeString, String)
-	;
-		error("type not qualified")
-	).
-
-:- pred base_typeclass_info__unqualified_type_to_string(type, string).
-:- mode base_typeclass_info__unqualified_type_to_string(in, out) is det.
-
-base_typeclass_info__unqualified_type_to_string(Type, String) :-
-	(
-		Type = term__functor(Name, Args, _),
-		(
-			Name = term__atom(NameString)
-		->
-			list__length(Args, Arity),
-			string__int_to_string(Arity, ArityString),
-			string__append_list([NameString, "_", ArityString, "_"],
-				String)
-		;
-			error("instance functor not an atom")
-		)
-	;
-		Type = term__variable(_),
-		error("instance type should be a single functor with variables as args")
-	).
-
-**********************************/
-	
+		
 %----------------------------------------------------------------------------%
Index: compiler/llds_out.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/llds_out.m,v
retrieving revision 1.102
diff -u -r1.102 llds_out.m
--- llds_out.m	1999/01/27 08:34:29	1.102
+++ llds_out.m	1999/02/01 22:55:42
@@ -3818,20 +3818,11 @@
 
 llds_out__make_base_typeclass_info_name(class_id(ClassSym, ClassArity),
 		TypeNames, Str) :-
-	(
-		ClassSym = unqualified(_),
-		error("llds_out__make_base_typeclass_info_name: unqualified name")
-	;
-		ClassSym = qualified(ModuleName, ClassName),
-		llds_out__sym_name_mangle(ModuleName, MangledModuleName),
-		llds_out__name_mangle(ClassName, MangledClassName),
-		llds_out__qualify_name(MangledModuleName, MangledClassName,
-			MangledClassString)
-	),
+	llds_out__sym_name_mangle(ClassSym, MangledClassString),
 	string__int_to_string(ClassArity, ArityString),
 	llds_out__name_mangle(TypeNames, MangledTypeNames),
 	string__append_list(["base_typeclass_info_", MangledClassString,
-		"_", ArityString, "__", MangledTypeNames], Str).
+		"__arity", ArityString, "__", MangledTypeNames], Str).
 
 %-----------------------------------------------------------------------------%
 
Index: profiler/demangle.m
===================================================================
RCS file: /home/mercury1/repository/mercury/profiler/demangle.m,v
retrieving revision 1.6
diff -u -r1.6 demangle.m
--- demangle.m	1998/04/27 04:04:19	1.6
+++ demangle.m	1999/02/02 00:44:33
@@ -77,6 +77,8 @@
 		{ true }
 	; demangle_data ->
 		{ true }
+	; demangle_typeclass_info ->
+		{ true }
 	;
 		{ fail }
 	).
@@ -394,6 +396,42 @@
 	;
 		fail
 	).
+
+:- pred demangle_typeclass_info(string, string).
+:- mode demangle_typeclass_info(in, out) is semidet.
+demangle_typeclass_info -->
+	remove_prefix("mercury_data_"),
+	remove_prefix("__base_typeclass_info_"),
+	remove_maybe_module_prefix(yes(ClassName), ["arity"]),
+	{ ClassName \= "" },
+	remove_prefix("arity"),
+	remove_int(ClassArity),
+	remove_prefix("__"),
+	fix_mangled_ascii,
+	demangle_class_args(ClassArity, Args),
+	{ string__format("<instance declaration for %s(%s)>",
+		[s(ClassName), s(Args)], Result) },
+	dcg_set(Result).
+
+:- pred demangle_class_args(int, string, string, string).
+:- mode demangle_class_args(in, out, in, out) is semidet.
+demangle_class_args(Num, FormattedArgs) -->
+	remove_maybe_module_prefix(yes(TypeName), ["arity"]),
+	{ TypeName \= "" },
+	remove_prefix("arity"),
+	remove_int(TypeArity),
+	remove_prefix("__"),
+	( { Num > 1 } ->
+		{ Sep = ", " },
+		{ Num1 is Num - 1 },
+		demangle_class_args(Num1, Rest)
+	;
+		{ Sep = "" },
+		{ Rest = "" }
+	),
+	{ string__format("%s/%d%s%s",
+		[s(TypeName), i(TypeArity), s(Sep), s(Rest)],
+		FormattedArgs) }.
 
 /*---------------------------------------------------------------------------*/
 
Index: util/mdemangle.c
===================================================================
RCS file: /home/mercury1/repository/mercury/util/mdemangle.c,v
retrieving revision 1.31
diff -u -r1.31 mdemangle.c
--- mdemangle.c	1998/07/27 19:45:07	1.31
+++ mdemangle.c	1999/02/02 00:14:49
@@ -27,7 +27,7 @@
 #include <stdio.h>
 #include "mercury_std.h"
 
-static void demangle(char *name);
+static void demangle(const char *name);
 static const char *strip_module_name(char **start_ptr, char *end,
 		const char *trailing_context[]);
 static bool check_for_suffix(char *start, char *position, const char *suffix,
@@ -38,6 +38,7 @@
 static bool cut_trailing_integer(char *str, char **end, int *num);
 static bool cut_trailing_underscore_integer(char *str, char **end, int *num);
 static bool strip_prefix(char **str, const char *prefix);
+static bool strip_leading_integer(char **start_ptr, int *num);
 
 /*
 ** Bloody SunOS 4.x doesn't have memmove()...
@@ -94,7 +95,7 @@
 */
 
 static void 
-demangle(char *name)
+demangle(const char *orig_name)
 {
 	static const char entry[]   = "_entry_";
 	static const char mercury[] = "mercury__";
@@ -118,7 +119,10 @@
 	static const char base_type_layout[] = "base_type_layout_";
 	static const char base_type_info[] = "base_type_info_";
 	static const char base_type_functors[] = "base_type_functors_";
+	static const char base_typeclass_info[] = "__base_typeclass_info_";
 	static const char common[] = "common";
+	static const char arity_string[] = "arity";
+	static const char underscores_arity_string[] = "__arity";
 
 	static const char * trailing_context_1[] = {
 		introduced,
@@ -134,9 +138,15 @@
 		NULL
 	};
 
+	static const char * trailing_context_3[] = {
+		arity_string,
+		NULL
+	};
+
+	char name[1000];
 	char *start = name;
 	const char *module = "";	/* module name */
-	char *end = name + strlen(name);
+	char *end = name + strlen(orig_name);
 	char *position;		/* current position in string */
 	int mode_num;
 	int mode_num2;
@@ -152,6 +162,20 @@
 	enum { ORDINARY, UNIFY, COMPARE, INDEX, LAMBDA, DEFORESTATION }
 		category;
 	enum { COMMON, INFO, LAYOUT, FUNCTORS } data_category;
+	const char * class_name;
+	int class_arity;
+	char class_arg_buf[1000];
+	int class_arg_num;
+	const char* class_arg;
+
+	/*
+	** copy orig_name to a local buffer which we can modify,
+	** making sure that we don't overflow the buffer
+	*/
+	if (strlen(orig_name) >= sizeof(name)) {
+		goto wrong_format;
+	}
+	strcpy(name, orig_name);
 
 	/*
 	** skip any leading underscore inserted by the C compiler
@@ -258,7 +282,7 @@
 	do {
 		if (position == start) goto wrong_format;
 		position--;
-	} while (isdigit((unsigned char)*position));
+	} while (MR_isdigit(*position));
 		/* get the mode number */
 	
 	if (check_for_suffix(start, position, ua_suffix,
@@ -284,7 +308,7 @@
 	do {
 		if (position == start) goto wrong_format;
 		position--;
-	} while (isdigit((unsigned char)*position));
+	} while (MR_isdigit(*position));
 	if (check_for_suffix(start, position, ho_suffix,
 			sizeof(ho_suffix), &mode_num2)) {
 		end = position + 1 - (sizeof(ho_suffix) - 1);
@@ -408,6 +432,10 @@
 		goto wrong_format;
 	}
 
+	if (strip_prefix(&start, base_typeclass_info)) {
+		goto typeclass_info;
+	}
+
 	module = strip_module_name(&start, end, trailing_context_2);
 
 	if (strip_prefix(&start, base_type_info)) {
@@ -476,8 +504,55 @@
 	}
 	return;
 
+typeclass_info:
+	/*
+	** Parse the class name and class arity, which have the following
+	** layout:
+	**	<module-qualified class name>__arity<arity>__
+	*/
+	class_name = strip_module_name(&start, end, trailing_context_3);
+	/* XXX fix_mangled_ascii() */
+	if (!(strip_prefix(&start, arity_string)
+		&& strip_leading_integer(&start, &class_arity)
+		&& strip_prefix(&start, "__")))
+	{
+		goto wrong_format;
+	}
+
+	/*
+	** Parse the class argument types, which each have the following
+	** layout:
+	**	<module-qualified type name>__arity<arity>__
+	**
+	** We store the human-readable formatted output in
+	** class_arg_buf as we go.
+	*/
+	fix_mangled_ascii(start, &end);
+	strcpy(class_arg_buf, "");
+	for (class_arg_num = 0; class_arg_num < class_arity; class_arg_num++) {
+		if (class_arg_num != 0) {
+			strcat(class_arg_buf, ", ");
+		}
+		class_arg = strip_module_name(&start, end, trailing_context_3);
+		if (!(strip_prefix(&start, arity_string)
+		      && strip_leading_integer(&start, &arity)
+		      && strip_prefix(&start, "__")))
+		{
+			goto wrong_format;
+		}
+		sprintf(class_arg_buf + strlen(class_arg_buf),
+			"%s/%d", class_arg, arity);
+	}
+		
+	/*
+	** now print the results
+	*/
+	printf("<instance declaration for %s(%s)>",
+		class_name, class_arg_buf);
+	return;
+
 wrong_format:
-	printf("%s", name);
+	printf("%s", orig_name);
 	return;
 } /* end demangle() */
 
@@ -567,6 +642,36 @@
 }
 
 	/*
+	** If the string pointed to by *start_ptr starts with
+	** an integer, then advance *start_ptr past the leading integer,
+	** store the value of the integer in the int pointed to by `num',
+	** and return true; otherwise leave *start_ptr unchanged and
+	** return false.  (The string itself is always left unchanged.)
+	*/
+static bool 
+strip_leading_integer(char **start_ptr, int *num) 
+{
+	char *start = *start_ptr;
+	char save_char;
+	bool got_int;;
+
+	while(MR_isdigit(*start)) {
+		start++;
+	}
+	if (start == *start_ptr) return FALSE;
+	save_char = *start;
+	*start = '\0';
+	got_int = (sscanf(*start_ptr, "%d", num) == 1);
+	*start = save_char;
+	if (got_int) {
+		*start_ptr = start;
+		return TRUE;
+	} else {
+		return FALSE;
+	}
+}
+
+	/*
 	** Remove trailing integer (at the supplied `real_end' of the
 	** string), and return it in the int pointed to by `num'.   True
 	** is returned if there is an integer at the end, false if not.
@@ -581,7 +686,7 @@
 	do { 
 		if (end == str) return FALSE;
 		end--;
-	} while (isdigit((unsigned char)*end));
+	} while (MR_isdigit(*end));
 
 	if (sscanf(end + 1, "%d", num) != 1) {
 		return FALSE;
@@ -685,7 +790,7 @@
 		int count = 0;
 		while (num < end) {
 			char *next_num = num;
-			while (isdigit((unsigned char)*next_num)) {
+			while (MR_isdigit(*next_num)) {
 				next_num++;
 			}
 			if (*next_num != '_' && *next_num != '\0') 
-- 
Fergus Henderson <fjh at cs.mu.oz.au>  |  "Binaries may die
WWW: <http://www.cs.mu.oz.au/~fjh>  |   but source code lives forever"
PGP: finger fjh at 128.250.37.3        |     -- leaked Microsoft memo.



More information about the developers mailing list