[m-rev.] for review: MLDS demangling

Fergus Henderson fjh at cs.mu.OZ.AU
Thu Sep 5 21:33:21 AEST 2002


On 28-Aug-2002, Simon Taylor <stayl at cs.mu.OZ.AU> wrote:
> 
> Some test cases in misc_tests/mdemangle_test would be good.

Done.

> > Index: profiler/demangle.m
> > ===================================================================
> > +:- pred demangle_proc_hl(string, string).
> > +:- mode demangle_proc_hl(in, out) is semidet.
> > +demangle_proc_hl -->
> 
> There's a lot of code duplication here.

I've rewritten it to avoid most of that.

I'll go ahead and commit this now, despite the XXXs below;
it is at least an improvement on the status quo.

----------

Estimated hours taken: 16
Branches: main

util/mdemangle.c:
profiler/demangle.m:
	Add support for demangling code produced by the MLDS back-end.
	XXX util/mdemangle.c it doesn't handle internal labels yet

tests/misc_tests/Mmakefile:
tests/misc_tests/mdemangle_test_hl.inp:
tests/misc_tests/mdemangle_test_hl.exp:
	Add a test case.
	The test is not enabled, since we don't yet pass it;
	demangling works OK for many cases, but fails
	for cases involving internal labels.

Workspace: /home/ceres/fjh/mercury
Index: profiler/demangle.m
===================================================================
RCS file: /home/mercury1/repository/mercury/profiler/demangle.m,v
retrieving revision 1.15
diff -u -d -r1.15 demangle.m
--- profiler/demangle.m	25 Sep 2001 09:37:07 -0000	1.15
+++ profiler/demangle.m	5 Sep 2002 11:00:49 -0000
@@ -73,7 +73,9 @@
 :- pred demangle_from_c(string, string).
 :- mode demangle_from_c(in, out) is semidet.
 demangle_from_c -->
-	( demangle_proc ->
+	( demangle_proc_hl ->
+		{ true }
+	; demangle_proc_ll ->
 		{ true }
 	; demangle_data ->
 		{ true }
@@ -85,9 +87,9 @@
 
 /*---------------------------------------------------------------------------*/
 
-:- pred demangle_proc(string, string).
-:- mode demangle_proc(in, out) is semidet.
-demangle_proc -->
+:- pred demangle_proc_ll(string, string).
+:- mode demangle_proc_ll(in, out) is semidet.
+demangle_proc_ll -->
 	remove_prefix("mercury__"),
 
 	%
@@ -134,19 +136,7 @@
 	% set the `category' to the appropriate value and then
 	% skip past the prefix.
 	%
-	( remove_prefix("__Unify__") ->
-		{ Category0 = unify }
-	; remove_prefix("__Compare__") ->
-		{ Category0 = compare },
-		% there should only be one mode for compare/3 preds
-		{ ModeNum0 = 0 }
-	; remove_prefix("__Index__") ->
-		{ Category0 = index },
-		% there should only be one mode for index/2 preds
-		{ ModeNum0 = 0 }
-	;	
-		{ Category0 = ordinary }
-	),
+	handle_compiler_generated_pred(ModeNum0, Category0),
 
 	%
 	% Fix any ascii codes mangled in the predicate name
@@ -154,6 +144,215 @@
 	fix_mangled_ascii,
 
 	%
+	% Process the mangling introduced by unused_args.m
+	% and higher_order.m.
+	% This involves stripping off the `__ua<m>', `__uab<m>',
+	% and/or `__ho<n>' added to the end of the
+	% predicate/function name, where m is the mode number.
+	% 
+	demangle_unused_args(UnusedArgs, ModeNum0, ModeNum1),
+	demangle_higher_order(HigherOrder, ModeNum1, ModeNum),
+
+	%
+	% Make sure special predicates with unused_args 
+	% are reported correctly.
+	%
+	( { UnusedArgs = yes(_), Category0 \= ordinary } ->
+		remove_trailing_int(Arity)
+	;
+		{ true }
+	),
+
+	%
+	% Separate the module name from the type name for the compiler
+	% generated predicates.
+	%
+	( { Category0 \= ordinary } ->
+		remove_prefix("_"),
+		remove_maybe_module_prefix(MaybeModule,
+			["IntroducedFrom__", "DeforestationIn__",
+			"AccFrom__", "TypeSpecOf__"]),
+		{ MaybeModule \= yes("") }
+	;
+		remove_maybe_module_prefix(MaybeModule,
+			["IntroducedFrom__", "DeforestationIn__",
+			"AccFrom__", "TypeSpecOf__"])
+	),
+
+	% Remove any prefixes added for introduced predicates,
+	% and get the predicate name.
+	handle_category_etc(PredName, Category0, Category),
+
+	%
+	% Now, finally, we can construct the demangled symbol name
+	%
+	{ format_proc(Category, MaybeModule, PredOrFunc, PredName,
+		Arity, ModeNum, HigherOrder, UnusedArgs, MaybeInternalLabelNum,
+		Parts, []) },
+	{ string__append_list(Parts, DemangledName) },
+	dcg_set(DemangledName).
+
+:- pred demangle_proc_hl(string, string).
+:- mode demangle_proc_hl(in, out) is semidet.
+demangle_proc_hl -->
+	% Symbols in the Mercury standard library get an additional
+	% "mercury__" prefix in their mangled name.
+	maybe_remove_prefix("mercury__"),
+
+	%
+	% Get integer from end of string (it might be the mode number,
+	% it might be the internal label number).
+	%
+	remove_trailing_int(Int),
+	(
+		%
+		% if we got to another int, that means it is an internal
+		% label of the form `append_3_p_0_1'
+		% in that case, save the internal label number and then
+		% get the mode number
+		%
+		m_remove_suffix("_"),
+		remove_trailing_int(ModeNum0)
+	->
+		{ ModeNum1 = ModeNum0 },
+		{ MaybeInternalLabelNum0 = yes(Int) }
+	;
+		{ ModeNum1 = Int },
+		{ MaybeInternalLabelNum0 = no }
+	),
+
+	%
+	% Handle the "f_" or "p_" suffix which indicates whether
+	% the procedure is a function or a predicate
+	%
+	( m_remove_suffix("f_") ->
+		{ PredOrFunc = "function" },
+		{ Normal = yes }
+	; m_remove_suffix("p_") ->
+		{ PredOrFunc = "predicate" },
+		{ Normal = yes }
+	;
+		% it could be a compiler-generated unify or compare predicate
+		{ PredOrFunc = "predicate" },
+		{ Normal = no }
+	),
+
+	( 
+		%
+		% Scan back past the arity number and then parse it.
+		%
+		m_remove_suffix("_"),
+		remove_trailing_int(Arity0)
+	->
+		{ Arity = Arity0 },
+		{ ModeNum2 = ModeNum1 },
+		{ MaybeInternalLabelNum = MaybeInternalLabelNum0 }
+	;
+		% It must be a compiler-generated unify or compare.
+		% What we thought were the mode number and label number
+		% were actually the arity and mode number
+		{ Normal = no },
+		{ Arity = ModeNum1 },
+		{ yes(ModeNum2) = MaybeInternalLabelNum0 },
+		{ MaybeInternalLabelNum = no }
+	),
+	m_remove_suffix("_"),
+
+	%
+	% Process the mangling introduced by unused_args.m
+	% and higher_order.m.
+	% This involves stripping off the `__ua<m>', `__uab<m>',
+	% and/or `__ho<n>' added to the end of the
+	% predicate/function name, where m is the mode number.
+	% 
+	demangle_unused_args(UnusedArgs, ModeNum2, ModeNum3),
+	demangle_higher_order(HigherOrder, ModeNum3, ModeNum),
+
+	%
+	% Make sure special predicates with unused_args 
+	% are reported correctly.
+	%
+
+	( { UnusedArgs = yes(_), Normal = no } ->
+		remove_trailing_int(Arity)
+	;
+		{ true }
+	),
+
+	%
+	% Separate the module name from the predicate name
+	%
+	remove_maybe_module_prefix(MaybeModule0,
+		["IntroducedFrom__", "DeforestationIn__",
+		"AccFrom__", "TypeSpecOf__", "__"]),
+
+	%
+	% Check whether the start of the string matches the name of
+	% one of the special compiler-generated predicates; if so,
+	% set the `category' to the appropriate value and then
+	% skip past the prefix.  Also check that the mode number
+	% is not invalid for the specified category.
+	%
+	handle_compiler_generated_pred(ModeNum, Category0),
+	( { Category0 \= ordinary } ->
+		remove_prefix("__")
+	;
+		[]
+	),
+
+	%
+	% Check that the setting of the category matches the setting
+	% of `Normal' determined above.
+	%
+	{ Normal = yes, Category0 = ordinary
+	; Normal = no, Category0 \= ordinary
+	},
+
+	%
+	% Fix any mangled ascii codes in the predicate name.
+	%
+	% XXX This should be done *before* stripping off
+	% the mangling added by HLDS->HLDS passes such as
+	% unused_args.m and higher_order.m.
+	% (Doing it here means that we won't properly demangle
+	% names that involve both special characters and
+	% unused_args/higher_order specializations.)
+	% But for the MLDS back-end, it needs to be done *after*
+	% removing the module prefix, and currently that can't be
+	% done until after stripping off the `__ua*' and `__ho*' suffixes.
+	%
+	fix_mangled_ascii,
+
+	% 
+	% Fix any mangled ascii codes in the module name, if any.
+	% 
+	{
+		MaybeModule0 = no,
+		MaybeModule = no
+	;
+		MaybeModule0 = yes(ModuleName0),
+		fix_mangled_ascii(ModuleName0, ModuleName),
+		MaybeModule = yes(ModuleName)
+	},
+
+	% Remove any prefixes added for introduced predicates,
+	% and get the predicate name.
+	handle_category_etc(PredName, Category0, Category),
+
+	%
+	% Now, finally, we can construct the demangled symbol name
+	%
+	{ format_proc(Category, MaybeModule, PredOrFunc, PredName,
+		Arity, ModeNum, HigherOrder, UnusedArgs, MaybeInternalLabelNum,
+		Parts, []) },
+	{ string__append_list(Parts, DemangledName) },
+	dcg_set(DemangledName).
+
+
+:- pred demangle_unused_args(maybe(pair(int, bool)), int, int, string, string).
+:- mode demangle_unused_args(out, in, out, in, out) is det.
+demangle_unused_args(UnusedArgs, ModeNum0, ModeNum) -->
+	%
 	% Process the mangling introduced by unused_args.m.
 	% This involves stripping off the `__ua<m>' or `__uab<m>' added to 
 	% the end of the predicate/function name, where m is the mode number.
@@ -163,18 +362,21 @@
 		m_remove_suffix("__ua")
 	->
 		{ UnusedArgs = yes(ModeNum0 - no) },
-		{ ModeNum1 is UA_ModeNum mod 10000 }
+		{ ModeNum is UA_ModeNum mod 10000 }
 	;
 		remove_trailing_int(UA_ModeNum),
 		m_remove_suffix("__uab")
 	->
 		{ UnusedArgs = yes(ModeNum0 - yes) },
-		{ ModeNum1 is UA_ModeNum mod 10000 }
+		{ ModeNum is UA_ModeNum mod 10000 }
 	;
 		{ UnusedArgs = no },
-		{ ModeNum1 = ModeNum0 }
-	),
-		
+		{ ModeNum = ModeNum0 }
+	).
+
+:- pred demangle_higher_order(maybe(int), int, int, string, string).
+:- mode demangle_higher_order(out, in, out, in, out) is det.
+demangle_higher_order(HigherOrder, ModeNum0, ModeNum) -->
 	%
 	% Process the mangling introduced by higher_order.m.
 	% This involves stripping off the `__ho<n>' where
@@ -188,38 +390,40 @@
 	;
 		{ HigherOrder = no }
 	),
-	{ ModeNum = ModeNum1 },
-
-	%
-	% Make sure special predicates with unused_args 
-	% are reported correctly.
-	%
-
-	( { UnusedArgs = yes(_), Category0 \= ordinary } ->
-		remove_trailing_int(Arity)
-	;
-		{ true }
-	),
+	{ ModeNum = ModeNum0 }.
 
 	%
-	% Separate the module name from the type name for the compiler
-	% generated predicates.
+	% Check whether the start of the string matches the name of
+	% one of the special compiler-generated predicates; if so,
+	% set the category to the appropriate value and then
+	% skip past the prefix.  Fails if the mode number
+	% is invalid for the specified category.
 	%
-	( { Category0 \= ordinary } ->
-		remove_prefix("_"),
-		remove_maybe_module_prefix(MaybeModule,
-			["IntroducedFrom__", "DeforestationIn__",
-			"AccFrom__", "TypeSpecOf__"]),
-		{ MaybeModule \= yes("") }
-	;
-		remove_maybe_module_prefix(MaybeModule,
-			["IntroducedFrom__", "DeforestationIn__",
-			"AccFrom__", "TypeSpecOf__"])
-	),
+:- pred handle_compiler_generated_pred(int, pred_category, string, string).
+:- mode handle_compiler_generated_pred(in, out, in, out) is semidet.
+handle_compiler_generated_pred(ModeNum0, Category0) -->
+	( remove_prefix("__Unify__") ->
+		{ Category0 = unify }
+	; remove_prefix("__Compare__") ->
+		{ Category0 = compare },
+		% there should only be one mode for compare/3 preds
+		{ ModeNum0 = 0 }
+	; remove_prefix("__Index__") ->
+		{ Category0 = index },
+		% there should only be one mode for index/2 preds
+		{ ModeNum0 = 0 }
+	;	
+		{ Category0 = ordinary }
+	).
 
+	% Remove any prefixes added for introduced predicates,
+	% and get the predicate name.
+:- pred handle_category_etc(string, pred_category, pred_category, string, string).
+:- mode handle_category_etc(out, in, out, in, out) is semidet.
+handle_category_etc(PredName, Category0, Category) -->
 	%
-	% Now we need to look at the pred name and see if it is an
-	% introduced lambda predicate.
+	% we need to look at the pred name and see if it is an
+	% introduced predicate (lambda, deforestation, accumulator, etc.).
 	% XXX handle multiple prefixes
 	%
 
@@ -297,17 +501,7 @@
 	;
 		{ Category = Category0 },
 		{ PredName = PredName0 }
-	),
-
-
-	%
-	% Now, finally, we can construct the demangled symbol name
-	%
-	{ format_proc(Category, MaybeModule, PredOrFunc, PredName,
-		Arity, ModeNum, HigherOrder, UnusedArgs, MaybeInternalLabelNum,
-		Parts, []) },
-	{ string__append_list(Parts, DemangledName) },
-	dcg_set(DemangledName).
+	).
 
 :- pred format_proc(pred_category, maybe(string), string, string, int, int,
 		maybe(int), maybe(pair(int, bool)), maybe(int), list(string),
@@ -400,13 +594,29 @@
 :- pred demangle_data(string, string).
 :- mode demangle_data(in, out) is semidet.
 demangle_data -->
-	remove_prefix("mercury_data_"),
+	( remove_prefix("mercury_data_") ->
+		% LLDS mangled data
+		{ HighLevel = no }
+	;
+		% MLDS mangled data
+		{ HighLevel = yes },
+		maybe_remove_prefix("mercury__")
+	),
 	remove_maybe_module_prefix(MaybeModule0,
 		["type_ctor_info_", "type_ctor_layout_",
 		"type_ctor_functors_", "common_"]),
 	{ MaybeModule0 = yes("") ->
 		MaybeModule = no
 	;
+		% for the MLDS back-end,
+		% the module qualifiers get include twice (XXX why?)
+		HighLevel = yes,
+		MaybeModule0 = yes(Twice)
+	->
+		Once = string__left(Twice, string__length(Twice) // 2),
+		Once = string__right(Twice, string__length(Twice) // 2),
+		MaybeModule = yes(Once)
+	;
 		MaybeModule = MaybeModule0
 	},
 	( remove_prefix("type_ctor_info_") ->
@@ -471,8 +681,8 @@
 :- pred demangle_typeclass_info(string, string).
 :- mode demangle_typeclass_info(in, out) is semidet.
 demangle_typeclass_info -->
-	remove_prefix("mercury_data_"),
-	remove_prefix("__base_typeclass_info_"),
+	maybe_remove_prefix("mercury_data___"),
+	remove_prefix("base_typeclass_info_"),
 	remove_maybe_module_prefix(yes(ClassName), ["arity"]),
 	{ ClassName \= "" },
 	remove_prefix("arity"),
Index: tests/misc_tests/Mmakefile
===================================================================
RCS file: /home/mercury1/repository/tests/misc_tests/Mmakefile,v
retrieving revision 1.13
diff -u -d -r1.13 Mmakefile
--- tests/misc_tests/Mmakefile	5 Apr 2000 06:11:34 -0000	1.13
+++ tests/misc_tests/Mmakefile	5 Sep 2002 10:19:53 -0000
@@ -12,6 +12,9 @@
 mdemangle_test.out: mdemangle_test.inp
 	mdemangle < mdemangle_test.inp > mdemangle_test.out 2>&1
 
+mdemangle_test_hl.out: mdemangle_test_hl.inp
+	mdemangle < mdemangle_test_hl.inp > mdemangle_test_hl.out 2>&1
+
 pretty_print_test.out: pretty_print_test.ugly
 	cp pretty_print_test.ugly pretty_print_test.out
 
@@ -20,6 +23,9 @@
 PROGS =
 
 OTHER_TESTS = mdemangle_test pretty_print_test
+
+# XXX we do not yet pass mdemangle_test_hl (we don't correctly demangle
+# internal labels, etc.)
 
 #-----------------------------------------------------------------------------#
 
Index: tests/misc_tests/mdemangle_test_hl.exp
===================================================================
RCS file: tests/misc_tests/mdemangle_test_hl.exp
diff -N tests/misc_tests/mdemangle_test_hl.exp
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ tests/misc_tests/mdemangle_test_hl.exp	5 Sep 2002 11:08:56 -0000
@@ -0,0 +1,140 @@
+A collection of symbols for use in testing mdemangle.
+This file tests demangling of symbols generated by the MLDS (--high-level-code) back-end.
+
+Each symbol is followed by the correct decoding.
+    
+	a type ctor info
+<type_ctor_info for type 'builtin:int'/0>
+<type_ctor_info for type 'builtin:int'/0>
+
+	a nested module type ctor info
+<type_ctor_info for type 'mdb:util:trace_port_type'/0>
+<type_ctor_info for type 'mdb:util:trace_port_type'/0>
+
+	a type ctor layout
+the MLDS back-end doesn't generate these yet
+
+	a type ctor functors
+the MLDS back-end doesn't generate these yet
+
+	a mangled name
+<predicate 'foo:!'/0 mode 0>
+<predicate 'foo:!'/0 mode 0>
+
+	a compare predicate
+<compare/3 predicate for type 'list:list'/1>
+<compare/3 predicate for type 'list:list'/1>
+
+	an index predicate
+<index/2 predicate for type 'list:list'/1>
+<index/2 predicate for type 'list:list'/1>
+
+	a unify predicate
+<unification predicate for type 'list:list'/1 mode 0>
+<unification predicate for type 'list:list'/1 mode 0>
+
+	a normal predicate
+<predicate 'list:append'/3 mode 0>
+<predicate 'list:append'/3 mode 0>
+
+	a function
+<function 'list:append'/2 mode 0>
+<function 'list:append'/2 mode 0>
+
+	an internal label 
+<predicate 'list:append'/3 mode 4 label 1>
+<predicate 'list:append'/3 mode 4 label 1>
+
+	unused args
+XXX need to test this
+
+	higher order specialization
+<function 'higher_order_func_test:my_map'/3 mode 0 (specialized [#3])>
+<function 'higher_order_func_test:my_map'/3 mode 0 (specialized [#3])>
+
+	higher order specialization (regression test for 1 char pred names)
+<function 'higher_order_func_test:c'/3 mode 0 (specialized [#3])>
+<function 'higher_order_func_test:c'/3 mode 0 (specialized [#3])>
+
+	higher order specialization and unused args
+<function 'higher_order_func_test:c'/3 mode 1 (specialized [#3]) (minus unused args [#0])>
+<function 'higher_order_func_test:c'/3 mode 1 (specialized [#3]) (minus unused args [#0])>
+
+	mangled name with unused args
+(XXX TODO)
+
+	some tests of symbols that should not be demangled
+	(this is a regression test: previous versions of mdemangle
+	seg faulted for this case)
+mercury_data_foo
+mercury_data_foo
+
+	some lambda goals
+(XXX TODO)
+<pred goal (#9) from 'simplex' in module 'lp' line 262 label 5>
+<pred goal (#9) from 'simplex' in module 'lp' line 262 label 5>
+<func goal (#4) from 'collect_vars' in module 'lp' line 153>
+<func goal (#4) from 'collect_vars' in module 'lp' line 153>
+
+	procedures introduced by deforestation
+(XXX TODO)
+<deforestation procedure (#9) from 'simplex' in module 'lp' line 262 label 5>
+<deforestation procedure (#9) from 'simplex' in module 'lp' line 262 label 5>
+<deforestation procedure (#4) from 'collect_vars' in module 'lp' line 153>
+<deforestation procedure (#4) from 'collect_vars' in module 'lp' line 153>
+
+	procedure introduced by type specialization
+(XXX TODO)
+<function 'sparse_bitset:list_to_set'/1 mode 0 (type specialized [T = var(V_2)])>
+<function 'sparse_bitset:list_to_set'/1 mode 0 (type specialized [T = var(V_2)])>
+
+	type specialization and deforestion
+(XXX TODO)
+	XXX this needs to be fixed
+<predicate 'doubleapp_impl:DeforestationIn__pred__TypeSpecOf__pred_or_func__double_app__[T = int]__21__0'/5 mode 0 (minus unused args [#0])>
+<predicate 'doubleapp_impl:DeforestationIn__pred__TypeSpecOf__pred_or_func__double_app__[T = int]__21__0'/5 mode 0 (minus unused args [#0])>
+
+	A realistic test
+
+/usr/lib/crt1.o: In function `_start':
+/usr/lib/crt1.o(.text+0x18): undefined reference to `main'
+interpreter.o: In function `<predicate 'interpreter:deref'/4 mode 0>':
+interpreter.o(.text+0xcb1): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:not_occurs'/4 mode 0>':
+interpreter.o(.text+0xdcc): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:unify'/4 mode 0>':
+interpreter.o(.text+0xed4): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0xf01): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0x10bb): undefined reference to `<predicate 'tr_store:set_mutvar'/4 mode 0>'
+interpreter.o(.text+0x10fa): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0x117a): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0x11ba): undefined reference to `<predicate 'tr_store:set_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:my_term_to_term'/8 mode 0>':
+interpreter.o(.text+0x192e): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:term_to_my_term'/6 mode 0>':
+interpreter.o(.text+0x1b97): undefined reference to `<predicate 'tr_store:new_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:main_loop_2'/4 mode 0 label 2>':
+interpreter.o(.text+0x2146): undefined reference to `<predicate 'unsafe:unsafe_perform_io'/1 mode 0>'
+collect2: ld returned 1 exit status
+
+/usr/lib/crt1.o: In function `_start':
+/usr/lib/crt1.o(.text+0x18): undefined reference to `main'
+interpreter.o: In function `<predicate 'interpreter:deref'/4 mode 0>':
+interpreter.o(.text+0xcb1): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:not_occurs'/4 mode 0>':
+interpreter.o(.text+0xdcc): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:unify'/4 mode 0>':
+interpreter.o(.text+0xed4): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0xf01): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0x10bb): undefined reference to `<predicate 'tr_store:set_mutvar'/4 mode 0>'
+interpreter.o(.text+0x10fa): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0x117a): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0x11ba): undefined reference to `<predicate 'tr_store:set_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:my_term_to_term'/8 mode 0>':
+interpreter.o(.text+0x192e): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:term_to_my_term'/6 mode 0>':
+interpreter.o(.text+0x1b97): undefined reference to `<predicate 'tr_store:new_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:main_loop_2'/4 mode 0 label 2>':
+interpreter.o(.text+0x2146): undefined reference to `<predicate 'unsafe:unsafe_perform_io'/1 mode 0>'
+collect2: ld returned 1 exit status
+
Index: tests/misc_tests/mdemangle_test_hl.inp
===================================================================
RCS file: tests/misc_tests/mdemangle_test_hl.inp
diff -N tests/misc_tests/mdemangle_test_hl.inp
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ tests/misc_tests/mdemangle_test_hl.inp	5 Sep 2002 11:03:50 -0000
@@ -0,0 +1,140 @@
+A collection of symbols for use in testing mdemangle.
+This file tests demangling of symbols generated by the MLDS (--high-level-code) back-end.
+
+Each symbol is followed by the correct decoding.
+    
+	a type ctor info
+mercury__builtin__builtin__type_ctor_info_int_0
+<type_ctor_info for type 'builtin:int'/0>
+
+	a nested module type ctor info
+mdb__util__mdb__util__type_ctor_info_trace_port_type_0
+<type_ctor_info for type 'mdb:util:trace_port_type'/0>
+
+	a type ctor layout
+the MLDS back-end doesn't generate these yet
+
+	a type ctor functors
+the MLDS back-end doesn't generate these yet
+
+	a mangled name
+foo__f_cut_0_p_0
+<predicate 'foo:!'/0 mode 0>
+
+	a compare predicate
+mercury__list____Compare____list_1_0
+<compare/3 predicate for type 'list:list'/1>
+
+	an index predicate
+mercury__list____Index____list_1_0
+<index/2 predicate for type 'list:list'/1>
+
+	a unify predicate
+mercury__list____Unify____list_1_0
+<unification predicate for type 'list:list'/1 mode 0>
+
+	a normal predicate
+mercury__list__append_3_p_0
+<predicate 'list:append'/3 mode 0>
+
+	a function
+mercury__list__append_2_f_0
+<function 'list:append'/2 mode 0>
+
+	an internal label 
+mercury__list__append_3_p_4_1
+<predicate 'list:append'/3 mode 4 label 1>
+
+	unused args
+XXX need to test this
+
+	higher order specialization
+higher_order_func_test__my_map__ho3_3_f_0
+<function 'higher_order_func_test:my_map'/3 mode 0 (specialized [#3])>
+
+	higher order specialization (regression test for 1 char pred names)
+higher_order_func_test__c__ho3_3_f_0
+<function 'higher_order_func_test:c'/3 mode 0 (specialized [#3])>
+
+	higher order specialization and unused args
+higher_order_func_test__c__ho3__ua1_3_f_0
+<function 'higher_order_func_test:c'/3 mode 1 (specialized [#3]) (minus unused args [#0])>
+
+	mangled name with unused args
+(XXX TODO)
+
+	some tests of symbols that should not be demangled
+	(this is a regression test: previous versions of mdemangle
+	seg faulted for this case)
+mercury_data_foo
+mercury_data_foo
+
+	some lambda goals
+(XXX TODO)
+mercury__lp__IntroducedFrom__pred__simplex__262__9_7_0_i5
+<pred goal (#9) from 'simplex' in module 'lp' line 262 label 5>
+mercury__lp__IntroducedFrom__func__collect_vars__153__4_3_0
+<func goal (#4) from 'collect_vars' in module 'lp' line 153>
+
+	procedures introduced by deforestation
+(XXX TODO)
+mercury__lp__DeforestationIn__pred__simplex__262__9_7_0_i5
+<deforestation procedure (#9) from 'simplex' in module 'lp' line 262 label 5>
+mercury__lp__DeforestationIn__pred__collect_vars__153__4_3_0
+<deforestation procedure (#4) from 'collect_vars' in module 'lp' line 153>
+
+	procedure introduced by type specialization
+(XXX TODO)
+mercury__fn__f_115_112_97_114_115_101_95_98_105_116_115_101_116_95_95_84_121_112_101_83_112_101_99_79_102_95_95_112_114_101_100_95_111_114_95_102_117_110_99_95_95_108_105_115_116_95_116_111_95_115_101_116_95_95_91_84_32_61_32_118_97_114_40_86_95_50_41_93_95_48_95_49_1_0
+<function 'sparse_bitset:list_to_set'/1 mode 0 (type specialized [T = var(V_2)])>
+
+	type specialization and deforestion
+(XXX TODO)
+	XXX this needs to be fixed
+mercury__f_100_111_117_98_108_101_97_112_112_95_105_109_112_108_95_95_68_101_102_111_114_101_115_116_97_116_105_111_110_73_110_95_95_112_114_101_100_95_95_84_121_112_101_83_112_101_99_79_102_95_95_112_114_101_100_95_111_114_95_102_117_110_99_95_95_100_111_117_98_108_101_95_97_112_112_95_95_91_84_32_61_32_105_110_116_93_95_95_50_49_95_95_48_95_95_117_97_48_5_0
+<predicate 'doubleapp_impl:DeforestationIn__pred__TypeSpecOf__pred_or_func__double_app__[T = int]__21__0'/5 mode 0 (minus unused args [#0])>
+
+	A realistic test
+
+/usr/lib/crt1.o: In function `_start':
+/usr/lib/crt1.o(.text+0x18): undefined reference to `main'
+interpreter.o: In function `interpreter__deref_4_p_0':
+interpreter.o(.text+0xcb1): undefined reference to `tr_store__get_mutvar_4_p_0'
+interpreter.o: In function `interpreter__not_occurs_4_p_0':
+interpreter.o(.text+0xdcc): undefined reference to `tr_store__get_mutvar_4_p_0'
+interpreter.o: In function `interpreter__unify_4_p_0':
+interpreter.o(.text+0xed4): undefined reference to `tr_store__get_mutvar_4_p_0'
+interpreter.o(.text+0xf01): undefined reference to `tr_store__get_mutvar_4_p_0'
+interpreter.o(.text+0x10bb): undefined reference to `tr_store__set_mutvar_4_p_0'
+interpreter.o(.text+0x10fa): undefined reference to `tr_store__get_mutvar_4_p_0'
+interpreter.o(.text+0x117a): undefined reference to `tr_store__get_mutvar_4_p_0'
+interpreter.o(.text+0x11ba): undefined reference to `tr_store__set_mutvar_4_p_0'
+interpreter.o: In function `interpreter__my_term_to_term_8_p_0':
+interpreter.o(.text+0x192e): undefined reference to `tr_store__get_mutvar_4_p_0'
+interpreter.o: In function `interpreter__term_to_my_term_6_p_0':
+interpreter.o(.text+0x1b97): undefined reference to `tr_store__new_mutvar_4_p_0'
+interpreter.o: In function `interpreter__main_loop_2_4_p_0_2':
+interpreter.o(.text+0x2146): undefined reference to `unsafe__unsafe_perform_io_1_p_0'
+collect2: ld returned 1 exit status
+
+/usr/lib/crt1.o: In function `_start':
+/usr/lib/crt1.o(.text+0x18): undefined reference to `main'
+interpreter.o: In function `<predicate 'interpreter:deref'/4 mode 0>':
+interpreter.o(.text+0xcb1): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:not_occurs'/4 mode 0>':
+interpreter.o(.text+0xdcc): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:unify'/4 mode 0>':
+interpreter.o(.text+0xed4): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0xf01): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0x10bb): undefined reference to `<predicate 'tr_store:set_mutvar'/4 mode 0>'
+interpreter.o(.text+0x10fa): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0x117a): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o(.text+0x11ba): undefined reference to `<predicate 'tr_store:set_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:my_term_to_term'/8 mode 0>':
+interpreter.o(.text+0x192e): undefined reference to `<predicate 'tr_store:get_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:term_to_my_term'/6 mode 0>':
+interpreter.o(.text+0x1b97): undefined reference to `<predicate 'tr_store:new_mutvar'/4 mode 0>'
+interpreter.o: In function `<predicate 'interpreter:main_loop_2'/4 mode 0 label 2>':
+interpreter.o(.text+0x2146): undefined reference to `<predicate 'unsafe:unsafe_perform_io'/1 mode 0>'
+collect2: ld returned 1 exit status
+
Index: util/mdemangle.c
===================================================================
RCS file: /home/mercury1/repository/mercury/util/mdemangle.c,v
retrieving revision 1.45
diff -u -d -r1.45 mdemangle.c
--- util/mdemangle.c	23 Jul 2002 19:39:00 -0000	1.45
+++ util/mdemangle.c	5 Sep 2002 11:28:19 -0000
@@ -32,7 +32,7 @@
 
 static void demangle(const char *name);
 static const char *strip_module_name(char **start_ptr, char *end,
-		const char *trailing_context[]);
+		const char *special_prefixes[], const char *special_suffixes[]);
 static MR_bool check_for_suffix(char *start, char *position,
 		const char *suffix, int sizeof_suffix, int *mode_num2);
 static char *fix_mangled_ascii(char *str, char **end);
@@ -43,6 +43,7 @@
 static MR_bool cut_trailing_underscore_integer(char *str,
 		char **end, int *num);
 static MR_bool strip_prefix(char **str, const char *prefix);
+static MR_bool strip_suffix(const char *str, char **end, const char *suffix);
 static MR_bool strip_leading_integer(char **start_ptr, int *num);
 
 /*
@@ -142,17 +143,24 @@
 ** human-readable form and then print it to stdout
 */
 
+static void
+demangle(const char *orig_name)
+{
+	if (demangle_2(orig_name, TRUE)
+}
+
 static void 
 demangle(const char *orig_name)
 {
 	static const char entry[]   = "_entry_";
 	static const char mercury[] = "mercury__";
 	static const char func_prefix[] = "fn__"; /* added for functions */
-	static const char unify[]   = "__Unify___";
-	static const char compare[] = "__Compare___";
-	static const char mindex[]  = "__Index___";
-	/* we call it `mindex' rather than `index' to
-	   avoid a naming conflict with strchr's alter ego index() */
+	static const char unify1[]   = "__Unify___";
+	static const char unify2[]   = "__Unify____";
+	static const char compare1[] = "__Compare___";
+	static const char compare2[] = "__Compare____";
+	static const char index1[]  = "__Index___";
+	static const char index2[]  = "__Index____";
 
 	static const char introduced[]  = "IntroducedFrom__";
 	static const char deforestation[]  = "DeforestationIn__";
@@ -170,7 +178,9 @@
 	static const char type_ctor_layout[] = "type_ctor_layout_";
 	static const char type_ctor_info[] = "type_ctor_info_";
 	static const char type_ctor_functors[] = "type_ctor_functors_";
-	static const char base_typeclass_info[] = "__base_typeclass_info_";
+	static const char base_typeclass_info[] = "base_typeclass_info_";
+	static const char underscores_base_typeclass_info[] =
+						"__base_typeclass_info_";
 	static const char common[] = "common";
 	static const char arity_string[] = "arity";
 	static const char underscores_arity_string[] = "__arity";
@@ -183,9 +193,17 @@
 		deforestation,
 		accumulator,
 		type_spec,
+		unify1, compare1, index1,
+		NULL
+	};
+	static const char * trailing_context_1_hl_suffixes[] = {
+		ua_suffix,
+		ua_suffix2,
+		ho_suffix,
 		NULL
 	};
 
+
 	static const char * trailing_context_2[] = {
 		type_ctor_layout,
 		type_ctor_info,
@@ -207,6 +225,8 @@
 	int mode_num;
 	int mode_num2;
 	int arity;
+	MR_bool high_level = MR_TRUE;
+	MR_bool matched = MR_FALSE;
 	const char *pred_or_func; /* either "predicate" or "function" */
 		/* does this proc have any unused arguments */
 	MR_bool unused_args = MR_FALSE;
@@ -271,11 +291,10 @@
 	strip_prefix(&start, entry);
 
 	/*
-	** strip off the `mercury__' prefix
+	** strip off the `mercury__' prefix, if any
 	*/
-
-	if (!strip_prefix(&start, mercury)) {
-		goto not_plain_mercury;
+	if (strip_prefix(&start, mercury)) {
+		matched = MR_TRUE;
 	}
 
 /*
@@ -283,25 +302,16 @@
 */
 
 	/*
-	** strip off the `fn__' prefix, if any
-	*/
-	if (strip_prefix(&start, func_prefix)) {
-		pred_or_func = "function";
-	} else {
-		pred_or_func = "predicate";
-	}
-
-	/*
 	** Get integer from end of string (it might be the mode number,
 	** it might be the internal label number). We'll assume its mode
 	** number for the moment.
 	*/
 
 	if (!cut_trailing_integer(start, &end, &mode_num)) {
-		goto wrong_format;
+		goto not_plain_mercury;
 	}
 
-	if (end == start) goto wrong_format;
+	if (end == start) goto not_plain_mercury;
 
 	/*
 	** if we got to an `i', that means it is an internal
@@ -311,21 +321,54 @@
 	*/
 	if (*--end == 'i') {
 		internal = mode_num;
-		if (end == start || *--end != '_') goto wrong_format;
+		if (end == start || *--end != '_') goto not_plain_mercury;
 
 		if (!cut_trailing_underscore_integer(start, &end, &mode_num)) {
-			goto wrong_format;
+			goto not_plain_mercury;
 		}
 	}
+	if (end == start) goto not_plain_mercury;
+
+	/*
+	** strip off the `fn__' prefix, if any
+	*/
+	if (strip_prefix(&start, func_prefix)) {
+		high_level = MR_FALSE;
+		pred_or_func = "function";
+	} else if (strip_suffix(start, &end, "_f")) {
+		high_level = MR_TRUE;
+		matched = MR_TRUE;
+		pred_or_func = "function";
+	} else if (strip_suffix(start, &end, "_p")) {
+		high_level = MR_TRUE;
+		matched = MR_TRUE;
+		pred_or_func = "predicate";
+	} else {
+		/*
+		** It's not a function.
+		** But it could be either an LLDS predicate,
+		** or an MLDS compiler-generated predicate.
+		*/
+		high_level = (strstr(start, unify2) ||
+		    strstr(start, compare2) ||
+		    strstr(start, index2));
+		pred_or_func = "predicate";
+	}
+
+	if (end == start) goto not_plain_mercury;
 
 	/*
 	** scan back past the arity number and then parse it
 	*/
 
 	if (!cut_trailing_underscore_integer(start, &end, &arity)) {
-		goto wrong_format;
+		goto not_plain_mercury;
 	}
 
+	if (high_level) {
+		module = strip_module_name(&start, end,
+				trailing_context_1, trailing_context_1_hl_suffixes);
+	}
 	/*
 	** Now start processing from the start of the string again.
 	** Check whether the start of the string matches the name of
@@ -334,16 +377,27 @@
 	** skip past the prefix.
 	*/
 
-	if (strip_prefix(&start, unify)) {
+	if (strip_prefix(&start, unify1)) {
 		category = UNIFY;
-	} else if (strip_prefix(&start, compare)) {
+	} else if (strip_prefix(&start, compare1)) {
 		category = COMPARE;
-		if (mode_num != 0) goto wrong_format;
-	} else if (strip_prefix(&start, mindex)) {
+		if (mode_num != 0) goto not_plain_mercury;
+	} else if (strip_prefix(&start, index1)) {
 		category = INDEX;
-		if (mode_num != 0) goto wrong_format;
+		if (mode_num != 0) goto not_plain_mercury;
 	} else {
 		category = ORDINARY;
+		/*
+		** For ordinary predicates, we should have matched
+		** against something by now --
+		** either the "mercury__" prefix, for LLDS mangling,
+		** or the "_f" or "_p" suffix, for MLDS mangling.
+		*/
+		if (!matched) goto not_plain_mercury;
+	}
+
+	if (category != ORDINARY && start[0] == '_') {
+		start++;
 	}
 
 	/*
@@ -417,7 +471,9 @@
 		}
 	}
 
-	module = strip_module_name(&start, end, trailing_context_1);
+	if (!high_level) {
+		module = strip_module_name(&start, end, trailing_context_1, NULL);
+	}
 
 	/*
 	** look for "IntroducedFrom" or "DeforestationIn" or "AccFrom"
@@ -607,16 +663,48 @@
 */
 
 not_plain_mercury:
+	/*
+	** Undo any in-place modifications done while trying to demangle
+	** predicate names.
+	*/
+	strcpy(name, orig_name);
+	start = name;
+	end = name + strlen(name);
 
-	if (!strip_prefix(&start, mercury_data)) {
-		goto wrong_format;
+	/*
+	** skip any leading underscore inserted by the C compiler
+	*/
+	if (*start == '_') {
+		start++;
 	}
 
-	if (strip_prefix(&start, base_typeclass_info)) {
-		goto typeclass_info;
+	if (strip_prefix(&start, mercury_data)) {
+		/* LLDS */
+		high_level = MR_FALSE;
+		if (strip_prefix(&start, underscores_base_typeclass_info)) {
+			goto typeclass_info;
+		}
+	} else {
+		/* MLDS */
+		high_level = MR_TRUE;
+		if (strip_prefix(&start, base_typeclass_info)) {
+			goto typeclass_info;
+		}
+		strip_prefix(&start, mercury);
 	}
 
-	module = strip_module_name(&start, end, trailing_context_2);
+	module = strip_module_name(&start, end, trailing_context_2, NULL);
+	if (high_level) {
+		/*
+		** For MLDS, the module name gets duplicated (XXX why?)
+		** So here we must replace `foo:foo' with just `foo'.
+		*/
+		size_t half_len = strlen(module) / 2;
+		if (strncmp(module, module + half_len + 1, half_len) != 0) {
+			goto wrong_format;
+		}
+		module += half_len + 1;
+	}
 
 	if (strip_prefix(&start, type_ctor_info)) {
 		data_category = INFO;
@@ -690,7 +778,7 @@
 	** layout:
 	**	<module-qualified class name>__arity<arity>__
 	*/
-	class_name = strip_module_name(&start, end, trailing_context_3);
+	class_name = strip_module_name(&start, end, trailing_context_3, NULL);
 	/* XXX fix_mangled_ascii() */
 	if (!(strip_prefix(&start, arity_string)
 		&& strip_leading_integer(&start, &class_arity)
@@ -713,7 +801,7 @@
 		if (class_arg_num != 0) {
 			strcat(class_arg_buf, ", ");
 		}
-		class_arg = strip_module_name(&start, end, trailing_context_3);
+		class_arg = strip_module_name(&start, end, trailing_context_3, NULL);
 		if (!(strip_prefix(&start, arity_string)
 		      && strip_leading_integer(&start, &arity)
 		      && strip_prefix(&start, "__")))
@@ -744,7 +832,8 @@
 	** left.
 	*/
 static const char *
-strip_module_name(char **start_ptr, char *end, const char *trailing_context[])
+strip_module_name(char **start_ptr, char *end,
+		const char *special_prefixes[], const char *special_suffixes[])
 {
 	const char *module;		/* module name */
 	char *module_end;		/* end of the module name */
@@ -765,10 +854,18 @@
 		** Check for special cases
 		*/
 		MR_bool stop = MR_FALSE;
-		for (i = 0; trailing_context[i] != NULL; i++) {
+		for (i = 0; special_prefixes[i] != NULL; i++) {
 			if (strncmp(start,
-				trailing_context[i],
-				strlen(trailing_context[i])) == 0)
+				special_prefixes[i],
+				strlen(special_prefixes[i])) == 0)
+			{
+				stop = MR_TRUE;
+			}
+		}
+		for (i = 0; special_suffixes != NULL && special_suffixes[i] != NULL; i++) {
+			if (strncmp(next_double_underscore,
+				special_suffixes[i],
+				strlen(special_suffixes[i])) == 0)
 			{
 				stop = MR_TRUE;
 			}
@@ -800,11 +897,10 @@
 }
 
 	/*
-	** Remove the prefix from a string, if it has 
-	** it. 
-	** Returns MR_TRUE if it has that prefix, and newstr will
-	** then point to the rest of that string.
-	** If the string doesn't have that prefix, newstr will
+	** Remove the prefix from a string, if it has it. 
+	** Returns MR_TRUE if the string has that prefix, and
+	** *str will then point to the rest of that string.
+	** If the string doesn't have that prefix, *str will
 	** be unchanged, and the function will return MR_FALSE.
 	*/
 static MR_bool 
@@ -816,6 +912,26 @@
 
 	if (strncmp(*str, prefix, len) == 0) {
 		*str += len;
+		return MR_TRUE;
+	}
+	return MR_FALSE;
+}
+
+	/*
+	** Remove the suffix from a string, if it has it. 
+	** Returns MR_TRUE if the string between start and *end
+	** has the specified suffix, and sets *end to point to
+	** the beginning of the suffix.
+	*/
+static MR_bool 
+strip_suffix(const char *start, char **end, const char *suffix) 
+{
+	int len;
+
+	len = strlen(suffix);
+
+	if (*end - start >= len && strncmp(*end - len, suffix, len) == 0) {
+		*end -= len;
 		return MR_TRUE;
 	}
 	return MR_FALSE;
-- 
Fergus Henderson <fjh at cs.mu.oz.au>  |  "I have always known that the pursuit
The University of Melbourne         |  of excellence is a lethal habit"
WWW: <http://www.cs.mu.oz.au/~fjh>  |     -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list