[m-rev.] for review: MLDS demangling
Fergus Henderson
fjh at cs.mu.OZ.AU
Sat Aug 24 03:00:14 AEST 2002
Estimated hours taken: 8
Branches: main
util/mdemangle.c:
profiler/demangle.m:
Add support for demangling code produced by the MLDS back-end.
Workspace: /home/ceres/fjh/mercury
Index: profiler/demangle.m
===================================================================
RCS file: /home/mercury1/repository/mercury/profiler/demangle.m,v
retrieving revision 1.15
diff -u -d -r1.15 demangle.m
--- profiler/demangle.m 25 Sep 2001 09:37:07 -0000 1.15
+++ profiler/demangle.m 23 Aug 2002 14:07:13 -0000
@@ -73,7 +73,9 @@
:- pred demangle_from_c(string, string).
:- mode demangle_from_c(in, out) is semidet.
demangle_from_c -->
- ( demangle_proc ->
+ ( demangle_proc_hl ->
+ { true }
+ ; demangle_proc_ll ->
{ true }
; demangle_data ->
{ true }
@@ -85,9 +87,9 @@
/*---------------------------------------------------------------------------*/
-:- pred demangle_proc(string, string).
-:- mode demangle_proc(in, out) is semidet.
-demangle_proc -->
+:- pred demangle_proc_ll(string, string).
+:- mode demangle_proc_ll(in, out) is semidet.
+demangle_proc_ll -->
remove_prefix("mercury__"),
%
@@ -309,6 +311,249 @@
{ string__append_list(Parts, DemangledName) },
dcg_set(DemangledName).
+:- pred demangle_proc_hl(string, string).
+:- mode demangle_proc_hl(in, out) is semidet.
+demangle_proc_hl -->
+ maybe_remove_prefix("mercury__"),
+ %
+ % Get integer from end of string (it might be the mode number,
+ % it might be the internal label number).
+ %
+ remove_trailing_int(Int),
+
+ ( m_remove_suffix("i") ->
+ %
+ % if we got to an `i', that means it is an internal
+ % label of the form `append_3_p_0_i1'
+ % in that case, save the internal label number and then
+ % get the mode number
+ %
+ { MaybeInternalLabelNum = yes(Int) },
+ m_remove_suffix("_"),
+ remove_trailing_int(ModeNum0)
+ ;
+ { MaybeInternalLabelNum = no },
+ { ModeNum0 = Int }
+ ),
+
+ ( m_remove_suffix("f_") ->
+ { PredOrFunc = "function" },
+ { Normal = yes }
+ ; m_remove_suffix("p_") ->
+ { PredOrFunc = "predicate" },
+ { Normal = yes }
+ ;
+ % it could be a compiler-generated unify or compare predicate
+ { PredOrFunc = "comp-gen predicate" },
+ { Normal = no }
+ ),
+
+ %
+ % scan back past the arity number and then parse it
+ %
+ m_remove_suffix("_"),
+ remove_trailing_int(Arity),
+ m_remove_suffix("_"),
+
+ %
+ % Process the mangling introduced by unused_args.m.
+ % This involves stripping off the `__ua<m>' or `__uab<m>' added to
+ % the end of the predicate/function name, where m is the mode number.
+ %
+ (
+ remove_trailing_int(UA_ModeNum),
+ m_remove_suffix("__ua")
+ ->
+ { UnusedArgs = yes(ModeNum0 - no) },
+ { ModeNum1 is UA_ModeNum mod 10000 }
+ ;
+ remove_trailing_int(UA_ModeNum),
+ m_remove_suffix("__uab")
+ ->
+ { UnusedArgs = yes(ModeNum0 - yes) },
+ { ModeNum1 is UA_ModeNum mod 10000 }
+ ;
+ { UnusedArgs = no },
+ { ModeNum1 = ModeNum0 }
+ ),
+
+ %
+ % Process the mangling introduced by higher_order.m.
+ % This involves stripping off the `__ho<n>' where
+ % n is a unique identifier for this specialized version
+ %
+ (
+ remove_trailing_int(HO_Num),
+ m_remove_suffix("__ho")
+ ->
+ { HigherOrder = yes(HO_Num) }
+ ;
+ { HigherOrder = no }
+ ),
+ { ModeNum = ModeNum1 },
+
+ %
+ % Make sure special predicates with unused_args
+ % are reported correctly.
+ %
+
+ ( { UnusedArgs = yes(_), Normal = no } ->
+ remove_trailing_int(Arity)
+ ;
+ { true }
+ ),
+
+ %
+ % Separate the module name from the predicate name
+ %
+ remove_maybe_module_prefix(MaybeModule0,
+ ["IntroducedFrom__", "DeforestationIn__",
+ "AccFrom__", "TypeSpecOf__", "__"]),
+
+ %
+ % Check whether the start of the string matches the name of
+ % one of the special compiler-generated predicates; if so,
+ % set the `category' to the appropriate value and then
+ % skip past the prefix. Also check that the setting of
+ % the category matches the setting of `Normal' determined above.
+ %
+ ( remove_prefix("__Unify____") ->
+ { Normal = no },
+ { Category0 = unify }
+ ; remove_prefix("__Compare____") ->
+ { Normal = no },
+ { Category0 = compare },
+ % there should only be one mode for compare/3 preds
+ { ModeNum0 = 0 }
+ ; remove_prefix("__Index____") ->
+ { Normal = no },
+ { Category0 = index },
+ % there should only be one mode for index/2 preds
+ { ModeNum0 = 0 }
+ ;
+ { Normal = yes },
+ { Category0 = ordinary }
+ ),
+
+
+ %
+ % Fix any mangled ascii codes in the predicate name.
+ %
+ % XXX This should be done *before* stripping off
+ % the mangling added by HLDS->HLDS passes such as
+ % unused_args.m and higher_order.m.
+ % (Doing it here means that we won't properly demangle
+ % names that involve both special characters and
+ % unused_args/higher_order specializations.)
+ % But for the MLDS back-end, it needs to be done *after*
+ % removing the module prefix, and currently that can't be
+ % done until after stripping off the `__ua*' and `__ho*' suffixes.
+ %
+ fix_mangled_ascii,
+ %
+ % Fix any mangled ascii codes in the module name, if any.
+ %
+ {
+ MaybeModule0 = no,
+ MaybeModule = no
+ ;
+ MaybeModule0 = yes(ModuleName0),
+ fix_mangled_ascii(ModuleName0, ModuleName),
+ MaybeModule = yes(ModuleName)
+ },
+
+ %
+ % Now we need to look at the pred name and see if it is an
+ % introduced lambda predicate.
+ % XXX handle multiple prefixes
+ %
+
+ =(PredName0),
+
+ (
+ (
+ remove_prefix("IntroducedFrom__")
+ ->
+ { IntroducedPredType0 = (lambda) }
+ ;
+ remove_prefix("DeforestationIn__")
+ ->
+ { IntroducedPredType0 = deforestation }
+ ;
+ remove_prefix("AccFrom__")
+ ->
+ { IntroducedPredType0 = accumulator }
+ ;
+ remove_prefix("TypeSpecOf__"),
+ { IntroducedPredType0 = type_spec("") }
+ )
+ ->
+ (
+ remove_prefix("pred__")
+ ->
+ { LambdaPredOrFunc = "pred" }
+ ;
+ remove_prefix("func__")
+ ->
+ { LambdaPredOrFunc = "func" }
+ ;
+ { IntroducedPredType0 = type_spec(_) },
+ remove_prefix("pred_or_func__")
+ ->
+ { LambdaPredOrFunc = "" }
+ ;
+ { fail }
+ ),
+ (
+ remove_maybe_pred_name(MPredName),
+ { MPredName = yes(PredName1) },
+ ( { IntroducedPredType0 = type_spec(_) } ->
+ remove_type_spec(TypeSpec),
+ { IntroducedPredType = type_spec(TypeSpec) },
+ { Seq = 0 },
+ { Line = 0 }
+
+ % The compiler adds a redundant mode
+ % number to the predicate name to avoid
+ % creating two predicates with the same
+ % name (deep profiling doesn't like that).
+ % It isn't used here so we just ignore it.
+ % The compiler also adds a version number
+ % for the argument order used for specialized
+ % versions, which can also be ignored.
+ ;
+ { IntroducedPredType = IntroducedPredType0 },
+ remove_int(Line),
+ remove_prefix("__"),
+ remove_int(Seq)
+ )
+ ->
+ { PredName = PredName1 },
+ { Category = introduced(IntroducedPredType, Line,
+ Seq, LambdaPredOrFunc) }
+ ;
+ % If we get here it usually means that there
+ % were multiple prefixes, which aren't dealt
+ % with properly yet. Just treat it as an
+ % ordinary name for now.
+ { Category = ordinary },
+ { PredName = PredName0 }
+ )
+ ;
+ { Category = Category0 },
+ { PredName = PredName0 }
+ ),
+
+
+ %
+ % Now, finally, we can construct the demangled symbol name
+ %
+ { format_proc(Category, MaybeModule, PredOrFunc, PredName,
+ Arity, ModeNum, HigherOrder, UnusedArgs, MaybeInternalLabelNum,
+ Parts, []) },
+ { string__append_list(Parts, DemangledName) },
+ dcg_set(DemangledName).
+
:- pred format_proc(pred_category, maybe(string), string, string, int, int,
maybe(int), maybe(pair(int, bool)), maybe(int), list(string),
list(string)).
@@ -400,13 +645,29 @@
:- pred demangle_data(string, string).
:- mode demangle_data(in, out) is semidet.
demangle_data -->
- remove_prefix("mercury_data_"),
+ ( remove_prefix("mercury_data_") ->
+ % LLDS mangled data
+ { HighLevel = no }
+ ;
+ % MLDS mangled data
+ { HighLevel = yes },
+ maybe_remove_prefix("mercury__")
+ ),
remove_maybe_module_prefix(MaybeModule0,
["type_ctor_info_", "type_ctor_layout_",
"type_ctor_functors_", "common_"]),
{ MaybeModule0 = yes("") ->
MaybeModule = no
;
+ % for the MLDS back-end,
+ % the module qualifiers get include twice (XXX why?)
+ HighLevel = yes,
+ MaybeModule0 = yes(Twice)
+ ->
+ Once = string__left(Twice, string__length(Twice) // 2),
+ Once = string__right(Twice, string__length(Twice) // 2),
+ MaybeModule = yes(Once)
+ ;
MaybeModule = MaybeModule0
},
( remove_prefix("type_ctor_info_") ->
@@ -471,8 +732,8 @@
:- pred demangle_typeclass_info(string, string).
:- mode demangle_typeclass_info(in, out) is semidet.
demangle_typeclass_info -->
- remove_prefix("mercury_data_"),
- remove_prefix("__base_typeclass_info_"),
+ maybe_remove_prefix("mercury_data___"),
+ remove_prefix("base_typeclass_info_"),
remove_maybe_module_prefix(yes(ClassName), ["arity"]),
{ ClassName \= "" },
remove_prefix("arity"),
Index: util/mdemangle.c
===================================================================
RCS file: /home/mercury1/repository/mercury/util/mdemangle.c,v
retrieving revision 1.45
diff -u -d -r1.45 mdemangle.c
--- util/mdemangle.c 23 Jul 2002 19:39:00 -0000 1.45
+++ util/mdemangle.c 23 Aug 2002 16:53:25 -0000
@@ -43,6 +43,7 @@
static MR_bool cut_trailing_underscore_integer(char *str,
char **end, int *num);
static MR_bool strip_prefix(char **str, const char *prefix);
+static MR_bool strip_suffix(const char *str, char **end, const char *suffix);
static MR_bool strip_leading_integer(char **start_ptr, int *num);
/*
@@ -148,11 +149,12 @@
static const char entry[] = "_entry_";
static const char mercury[] = "mercury__";
static const char func_prefix[] = "fn__"; /* added for functions */
- static const char unify[] = "__Unify___";
- static const char compare[] = "__Compare___";
- static const char mindex[] = "__Index___";
- /* we call it `mindex' rather than `index' to
- avoid a naming conflict with strchr's alter ego index() */
+ static const char unify1[] = "__Unify___";
+ static const char unify2[] = "__Unify____";
+ static const char compare1[] = "__Compare___";
+ static const char compare2[] = "__Compare____";
+ static const char index1[] = "__Index___";
+ static const char index2[] = "__Index____";
static const char introduced[] = "IntroducedFrom__";
static const char deforestation[] = "DeforestationIn__";
@@ -170,7 +172,9 @@
static const char type_ctor_layout[] = "type_ctor_layout_";
static const char type_ctor_info[] = "type_ctor_info_";
static const char type_ctor_functors[] = "type_ctor_functors_";
- static const char base_typeclass_info[] = "__base_typeclass_info_";
+ static const char base_typeclass_info[] = "base_typeclass_info_";
+ static const char underscores_base_typeclass_info[] =
+ "__base_typeclass_info_";
static const char common[] = "common";
static const char arity_string[] = "arity";
static const char underscores_arity_string[] = "__arity";
@@ -183,6 +187,7 @@
deforestation,
accumulator,
type_spec,
+ unify1, compare1, index1,
NULL
};
@@ -207,6 +212,8 @@
int mode_num;
int mode_num2;
int arity;
+ MR_bool high_level = MR_TRUE;
+ MR_bool matched = MR_FALSE;
const char *pred_or_func; /* either "predicate" or "function" */
/* does this proc have any unused arguments */
MR_bool unused_args = MR_FALSE;
@@ -271,11 +278,10 @@
strip_prefix(&start, entry);
/*
- ** strip off the `mercury__' prefix
+ ** strip off the `mercury__' prefix, if any
*/
-
- if (!strip_prefix(&start, mercury)) {
- goto not_plain_mercury;
+ if (strip_prefix(&start, mercury)) {
+ matched = MR_TRUE;
}
/*
@@ -283,25 +289,16 @@
*/
/*
- ** strip off the `fn__' prefix, if any
- */
- if (strip_prefix(&start, func_prefix)) {
- pred_or_func = "function";
- } else {
- pred_or_func = "predicate";
- }
-
- /*
** Get integer from end of string (it might be the mode number,
** it might be the internal label number). We'll assume its mode
** number for the moment.
*/
if (!cut_trailing_integer(start, &end, &mode_num)) {
- goto wrong_format;
+ goto not_plain_mercury;
}
- if (end == start) goto wrong_format;
+ if (end == start) goto not_plain_mercury;
/*
** if we got to an `i', that means it is an internal
@@ -311,21 +308,54 @@
*/
if (*--end == 'i') {
internal = mode_num;
- if (end == start || *--end != '_') goto wrong_format;
+ if (end == start || *--end != '_') goto not_plain_mercury;
if (!cut_trailing_underscore_integer(start, &end, &mode_num)) {
- goto wrong_format;
+ goto not_plain_mercury;
}
}
+ if (end == start) goto not_plain_mercury;
+
+ /*
+ ** strip off the `fn__' prefix, if any
+ */
+ if (strip_prefix(&start, func_prefix)) {
+ high_level = MR_FALSE;
+ pred_or_func = "function";
+ } else if (strip_suffix(start, &end, "_f")) {
+ high_level = MR_TRUE;
+ matched = MR_TRUE;
+ pred_or_func = "function";
+ } else if (strip_suffix(start, &end, "_p")) {
+ high_level = MR_TRUE;
+ matched = MR_TRUE;
+ pred_or_func = "predicate";
+ } else {
+ /*
+ ** It's not a function.
+ ** But it could be either an LLDS predicate,
+ ** or an MLDS compiler-generated predicate.
+ */
+ high_level = (strstr(start, unify2) ||
+ strstr(start, compare2) ||
+ strstr(start, index2));
+ pred_or_func = "predicate";
+ }
+
+ if (end == start) goto not_plain_mercury;
+
/*
** scan back past the arity number and then parse it
*/
if (!cut_trailing_underscore_integer(start, &end, &arity)) {
- goto wrong_format;
+ goto not_plain_mercury;
}
+ if (high_level) {
+ module = strip_module_name(&start, end, trailing_context_1);
+ }
/*
** Now start processing from the start of the string again.
** Check whether the start of the string matches the name of
@@ -334,16 +364,27 @@
** skip past the prefix.
*/
- if (strip_prefix(&start, unify)) {
+ if (strip_prefix(&start, unify1)) {
category = UNIFY;
- } else if (strip_prefix(&start, compare)) {
+ } else if (strip_prefix(&start, compare1)) {
category = COMPARE;
- if (mode_num != 0) goto wrong_format;
- } else if (strip_prefix(&start, mindex)) {
+ if (mode_num != 0) goto not_plain_mercury;
+ } else if (strip_prefix(&start, index1)) {
category = INDEX;
- if (mode_num != 0) goto wrong_format;
+ if (mode_num != 0) goto not_plain_mercury;
} else {
category = ORDINARY;
+ /*
+ ** For ordinary predicates, we should have matched
+ ** against something by now --
+ ** either the "mercury__" prefix, for LLDS mangling,
+ ** or the "_f" or "_p" suffix, for MLDS mangling.
+ */
+ if (!matched) goto not_plain_mercury;
+ }
+
+ if (category != ORDINARY && start[0] == '_') {
+ start++;
}
/*
@@ -417,7 +458,9 @@
}
}
- module = strip_module_name(&start, end, trailing_context_1);
+ if (!high_level) {
+ module = strip_module_name(&start, end, trailing_context_1);
+ }
/*
** look for "IntroducedFrom" or "DeforestationIn" or "AccFrom"
@@ -607,16 +650,48 @@
*/
not_plain_mercury:
+ /*
+ ** Undo any in-place modifications done while trying to demangle
+ ** predicate names.
+ */
+ strcpy(name, orig_name);
+ start = name;
+ end = name + strlen(name);
- if (!strip_prefix(&start, mercury_data)) {
- goto wrong_format;
+ /*
+ ** skip any leading underscore inserted by the C compiler
+ */
+ if (*start == '_') {
+ start++;
}
- if (strip_prefix(&start, base_typeclass_info)) {
- goto typeclass_info;
+ if (strip_prefix(&start, mercury_data)) {
+ /* LLDS */
+ high_level = MR_FALSE;
+ if (strip_prefix(&start, underscores_base_typeclass_info)) {
+ goto typeclass_info;
+ }
+ } else {
+ /* MLDS */
+ high_level = MR_TRUE;
+ if (strip_prefix(&start, base_typeclass_info)) {
+ goto typeclass_info;
+ }
+ strip_prefix(&start, mercury);
}
module = strip_module_name(&start, end, trailing_context_2);
+ if (high_level) {
+ /*
+ ** For MLDS, the module name gets duplicated (XXX why?)
+ ** So here we must replace `foo:foo' with just `foo'.
+ */
+ size_t half_len = strlen(module) / 2;
+ if (strncmp(module, module + half_len + 1, half_len) != 0) {
+ goto wrong_format;
+ }
+ module += half_len + 1;
+ }
if (strip_prefix(&start, type_ctor_info)) {
data_category = INFO;
@@ -800,11 +875,10 @@
}
/*
- ** Remove the prefix from a string, if it has
- ** it.
- ** Returns MR_TRUE if it has that prefix, and newstr will
- ** then point to the rest of that string.
- ** If the string doesn't have that prefix, newstr will
+ ** Remove the prefix from a string, if it has it.
+ ** Returns MR_TRUE if the string has that prefix, and
+ ** *str will then point to the rest of that string.
+ ** If the string doesn't have that prefix, *str will
** be unchanged, and the function will return MR_FALSE.
*/
static MR_bool
@@ -816,6 +890,26 @@
if (strncmp(*str, prefix, len) == 0) {
*str += len;
+ return MR_TRUE;
+ }
+ return MR_FALSE;
+}
+
+ /*
+ ** Remove the suffix from a string, if it has it.
+ ** Returns MR_TRUE if the string between start and *end
+ ** has the specified suffix, and sets *end to point to
+ ** the beginning of the suffix.
+ */
+static MR_bool
+strip_suffix(const char *start, char **end, const char *suffix)
+{
+ int len;
+
+ len = strlen(suffix);
+
+ if (*end - start >= len && strncmp(*end - len, suffix, len) == 0) {
+ *end -= len;
return MR_TRUE;
}
return MR_FALSE;
--
Fergus Henderson <fjh at cs.mu.oz.au> | "I have always known that the pursuit
The University of Melbourne | of excellence is a lethal habit"
WWW: <http://www.cs.mu.oz.au/~fjh> | -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-reviews mailing list
post: mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------
More information about the reviews
mailing list