[m-dev.] for review: The .NET MSIL backend.

Fergus Henderson fjh at cs.mu.OZ.AU
Wed Oct 11 18:30:12 AEDT 2000


On 22-Sep-2000, Tyson Dowd <trd at cs.mu.OZ.AU> wrote:
> Index: compiler/mlds_to_il.m
> +generate_il(MLDS, ILAsm, ContainsCCode, IO, IO) :-
> +	MLDS = mlds(MercuryModuleName, _ForeignCode, Imports, Defns),
> +	ModuleName = mercury_module_name_to_mlds(MercuryModuleName),
> +	il_info_init(ModuleName, Imports, Info0),
> +
> +		% Generate code for all the methods in this module.
> +	list__foldl(generate_method_defn, Defns, Info0, Info1),
> +	or(Info1 ^ file_c_code, Info1 ^ method_c_code, ContainsCCode),

Using infix `or` or bool__or would be clearer here.

> +	Info = Info1 ^ file_c_code := ContainsCCode,
> +	ClassDecls = Info ^ classdecls,
> +	InitInstrs = condense(flatten(Info ^ init_instrs)),
> +	AllocInstrs = condense(flatten(Info ^ alloc_instrs)),

I suggest s/condense/list__condense/ and s/flatten/list__flatten/

> +generate_method_defn(defn(function(PredLabel, ProcId, MaybeSeqNum, PredId), 
> +		Context, DeclsFlags, Entity)) --> 
> +	( { Entity = mlds__function(_PredProcId, Params, MaybeStatement) } ->
> +		il_info_get_module_name(ModuleName),
> +		{ term__type_to_term(defn(function(PredLabel, ProcId, 
> +			MaybeSeqNum, PredId), Context, DeclsFlags, Entity),
> +			MLDSDefnTerm) },
> +		{ Params = mlds__func_params(Args, Returns) },
> +		{ list__map(mlds_arg_to_il_arg, Args, ILArgs) },
> +		{ params_to_il_signature(ModuleName, Params,
> +			ILSignature) },
> +		{ predlabel_to_id(PredLabel, ProcId, MaybeSeqNum,
> +			Id) },
> +
> +		il_info_new_method(ILArgs, ILSignature, id(Id)),
> +		il_info_get_next_block_id(BlockId),
> +		( { MaybeStatement = yes(Statement) } -> 
> +			statement_to_il(Statement, InstrsTree0)
> +		;
> +			{ InstrsTree0 = empty }
> +		),
> +		( { PredLabel = pred(predicate, no, "main", 2) },
> +		  { MaybeSeqNum = no }
> +		->
> +			{ EntryPoint = [entrypoint] },
> +			=(Info10),
> +			dcg_set(Info10 ^ has_main := yes)
> +		;
> +			{ EntryPoint = [] }
> +		),
> +		il_info_get_locals_list(Locals),
> +			% Need to insert a ret for functions returning
> +			% void.
> +		{ Returns = [] ->
> +			MaybeRet = instr_node(ret)
> +		;
> +			MaybeRet = empty
> +		},
> +		{ InstrsTree = tree__list([
> +			instr_node(start_block(scope(Locals), BlockId)),
> +			InstrsTree0, 
> +			MaybeRet,
> +			instr_node(end_block(scope(Locals), BlockId))
> +			])
> +		},
> +		{ Instrs = condense(flatten(InstrsTree)) },
> +
> +		{ list__append(EntryPoint, 
> +			% XXX should avoid hard-coding "100" for
> +			% the maximum static size -- not sure if we even
> +			% need this anymore.
> +			[maxstack(100), 
> +			% note that we only need .zeroinit to ensure
> +			% verifiability; for nonverifiable code,
> +			% we could omit that (it ensures that all
> +			% variables are initialized to zero).
> +			zeroinit,
> +			instrs(Instrs)], 
> +				MethodContents) },
> +		{ ClassDecls = [
> +			comment_term(MLDSDefnTerm),
> +			ilasm__method(methodhead([static], id(Id), 
> +				ILSignature, []), MethodContents)
> +		] },
> +		il_info_add_classdecls(ClassDecls)
> +	;
> +		{ error("entity not a function") }
> +	).

It would be helpful to have some comments here explaining
what each bit of the code does.  It would also be worth
while explaining in comments what the generated code will
look like.

It would be better to use a variable rather than duplicating
the defn(...) term in the call to term__type_to_term.

The XXX about the stack limit should also be mentioned in the
TODO list at the top of the file.

> +generate_method_defn(defn(data(DataName), Context, DeclsFlags, Entity)) --> 
> +	il_info_get_module_name(ModuleName),
> +	{ term__type_to_term(defn(data(DataName), Context, DeclsFlags, Entity),
> +		MLDSDefnTerm) },
> +	{ mangle_dataname(DataName, FieldName) },
> +	{ ModuleStructuredName = mlds_module_name_to_structured_name(
> +		ModuleName) },
> +	( 
> +		{ Entity = mlds__data(_DataType, DataInitializer) }
> +	->
> +		data_initializer_to_instrs(DataInitializer, AllocInstrsTree,
> +			InitInstrTree),
> +		{ FieldRef = make_fieldref(il_array_type,
> +			ModuleStructuredName, FieldName) },
> +		{ AllocComment = comment_node(
> +			string__append("allocation for ", FieldName)) },
> +		{ InitComment = comment_node(
> +			string__append("initializer for ", FieldName)) },
> +		{ AllocInstrsTree = node([]) ->
> +			StoreAllocTree = node([]),
> +			StoreInitTree = node([stsfld(FieldRef)]),
> +			LoadTree = node([])
> +		;
> +			StoreAllocTree = node([stsfld(FieldRef)]),
> +			StoreInitTree = node([pop]),
> +			LoadTree = node([ldsfld(FieldRef)])
> +		},
> +			% Add a store after the alloc instrs (if necessary)
> +		{ AllocInstrs = list__condense(tree__flatten(
> +			tree(AllocComment,
> +			tree(AllocInstrsTree, StoreAllocTree)))) },
> +			% Add a load before the init instrs (if necessary)
> +		{ InitInstrs = list__condense(tree__flatten(
> +			tree(InitComment,
> +			tree(LoadTree, tree(InitInstrTree, StoreInitTree))))) },
> +		il_info_add_alloc_instructions(AllocInstrs),
> +		il_info_add_init_instructions(InitInstrs),
> +		{ Field = field([public, static], il_array_type,
> +			FieldName, no, none) },
> +		{ ClassDecls = [comment_term(MLDSDefnTerm), Field] }
> +	;
> +		{ ClassDecls = [comment_term(MLDSDefnTerm),
> +			comment("This type unimplemented.")] }
> +	),
> +	il_info_add_classdecls(ClassDecls).

It would be better to use a variable rather than duplicating
the defn(...) term in the call to term__type_to_term.

It would also be worth while explaining in comments what the generated
code will look like.  (See export.m for some examples of the kind
of comments I mean.)  This point applies through-out this module, really.

I think rather than generating a "this type unimplemented" comment,
you should call error/1.  It's an invariant of the MLDS data structure
(currently not documented, but you could fix that ;) that the category
of the name (type/function/data) matches the category
of the definition (class/function/data).

> +	% Generate top level declarations for "other" things (e.g.
> +	% anything that is not a methods in the main class).

s/methods/method/

> +	% XXX Really, this should be integrated with the other pass
> +	% (generate_method_defn), and we can generate them all at once.
> +	% This would involve adding the top-level decls list to il_info too.
> +:- pred generate_other_decls(mlds_module_name, mlds__defn, list(ilasm__decl)).
> +:- mode generate_other_decls(in, in, out) is det.
> +generate_other_decls(ModuleName, MLDSDefn, Decls) :-
> +	ModuleStructuredName = mlds_module_name_to_structured_name(ModuleName),
> +	MLDSDefn = mlds__defn(EntityName, _Context, _DeclFlags, Entity), 
> +	term__type_to_term(MLDSDefn, MLDSDefnTerm),
> +	( EntityName = type(TypeName, _Arity),
> +		list__append(ModuleStructuredName, [TypeName],
> +			FullClassName),
> +		( Entity = mlds__class(ClassDefn),
> +			ClassDefn = mlds__class_defn(mlds__class, _Imports, 
> +				_Inherits, _Implements, Defns) ->
> +			list__map(defn_to_class_decl, Defns, ILDefns),
> +			make_constructor(FullClassName, ClassDefn, 
> +				ConstructorILDefn),
> +			Decls = [comment_term(MLDSDefnTerm),
> +				class([public], TypeName, noextend,
> +				noimplement, [ConstructorILDefn | ILDefns])]
> +		; Entity = mlds__class(ClassDefn),
> +			ClassDefn = mlds__class_defn(mlds__struct, _Imports, 
> +				_Inherits, _Implements, Defns) ->
> +			list__map(defn_to_class_decl, Defns, ILDefns),
> +			make_constructor(FullClassName, ClassDefn, 
> +				ConstructorILDefn),
> +			Decls = [comment_term(MLDSDefnTerm),
> +				class([public], TypeName, 
> +				extends(il_envptr_class_name), 
> +				noimplement, [ConstructorILDefn | ILDefns])]
> +		;
> +			Decls = [comment_term(MLDSDefnTerm),
> +				comment("This type unimplemented.")]
> +		)

The indentation here is very poor -- on a first reading, I didn't notice
the `->'s.  They should be on a line of their own.

Also the code to deconstruct ClassDefn = mlds__class_defn(...)
is duplicated.

> +	; EntityName = function(_PredLabel, _ProcId, _MaybeFn, _PredId),
> +		Decls = []
> +			% XXX we don't handle export
> +	; EntityName = export(_),
> +		Decls = []
> +	; EntityName = data(_),
> +		Decls = []
> +	).

It would be clearer to place the comment after the EntityName = export(...)
line (and hence in front of the `Decls = []' line to which it applies).

> +	% initialize this value, leave it on the stack.
> +	% XXX the code generator doesn't box these values
> +	% we need to look ahead at them and box them appropriately.
> +:- pred data_initializer_to_instrs(mlds__initializer::in,
> +	instr_tree::out, instr_tree::out, il_info::in, il_info::out) is det.
> +data_initializer_to_instrs(init_obj(Rval), node([]), InitInstrs) --> 
> +	load(Rval, InitInstrs).

Is the XXX comment here still correct?

> +	% Put the array allocation in AllocInstrs.
> +	% For sub-initializations, we don't worry about keeping AllocInstrs
> +	% and InitInstrs apart, since we are only interested in top level
> +	% allocations.
> +data_initializer_to_instrs(init_array(InitList), AllocInstrs, InitInstrs) -->
> +	{ AllocInstrs = node([ldc(int32, i(list__length(InitList))), 
> +		newarr(il_generic_type)]) },
> +	{ AddInitializer = 
> +		(pred(Init0::in, X0 - Tree0::in, (X0 + 1) - Tree::out,
> +				in, out) is det -->
> +			maybe_box_initializer(Init0, Init),
> +			data_initializer_to_instrs(Init, ATree1, ITree1),
> +			{ Tree = tree(tree(Tree0, node(
> +					[dup, ldc(int32, i(X0))])), 
> +				tree(tree(ATree1, ITree1), 
> +					node([stelem(il_generic_simple_type)]
> +				))) }
> +		) },
> +	list__foldl2(AddInitializer, InitList, 0 - empty, _ - InitInstrs).

This is another example of a place where it would help if you had
some comments explaining what the generated code is supposed to look like.

> +	% XXX shouldn't we re-use the code for creating fieldrefs here?
> +defn_to_class_decl(mlds__defn(Name, _Context, _DeclFlags, 
> +	mlds__data(Type, _Initializer)), ILClassDecl) :-
> +	mlds_type_to_ilds_type(Type, ILType0),

The second line of the clause head should be indented two tabs.

> +	% XXX this needs to be implemented
> +defn_to_class_decl(mlds__defn(_Name, _Context, _DeclFlags,
> +	mlds__function(_PredProcId, _Params, _MaybeStatements)), ILClassDecl) :-
> +		ILClassDecl = comment("unimplemented: functions in classes").
> +
> +	% XXX this might not need to be implemented (nested classes)
> +	% since it will probably be flattened earlier.
> +defn_to_class_decl(mlds__defn(_Name, _Context, _DeclFlags,
> +	mlds__class(_)), _ILClassDecl) :-
> +		error("nested data definition not expected here").

Likewise here.  And the code should only be indented one tab.

> +%-----------------------------------------------------------------------------%
> +%
> +% Convert basic MLDS statements into IL.
> +%
> +
> +:- pred statements_to_il(list(mlds__statement), instr_tree, il_info, il_info).
> +:- mode statements_to_il(in, out, in, out) is det.
> +statements_to_il([], empty, Info, Info).
> +statements_to_il([ S | Statements], tree(Instrs0, Instrs1)) -->
> +	statement_to_il(S, Instrs0),
> +	statements_to_il(Statements, Instrs1).

It's bad style to mix DCG clauses and ordinary clauses or facts
for the same predicate.  Change the first clause to

	statements_to_il([], empty) --> [].

> +statement_to_il(statement(while(Condition, Body, AtLeastOnce), 
> +		_Context), Instrs) -->
> +	generate_condition(Condition, ConditionInstrs, EndLabel),
> +	il_info_make_next_label(StartLabel),
> +	statement_to_il(Body, BodyInstrs),
> +	{ AtLeastOnce = no,
> +		Instrs = tree__list([
> +			comment_node("while"),
> +			instr_node(label(StartLabel)),
> +			ConditionInstrs,
> +			BodyInstrs,
> +			instr_node(br(label_target(StartLabel))),
> +			instr_node(label(EndLabel))
> +		])
> +			% XXX this generates a branch over branch which
> +			% is suboptimal.
> +	; AtLeastOnce = yes, 
> +		Instrs = tree__list([
> +			comment_node("while (actually do ... while)"),
> +			instr_node(label(StartLabel)),
> +			BodyInstrs,
> +			ConditionInstrs,
> +			instr_node(br(label_target(StartLabel))),
> +			instr_node(label(EndLabel))
> +		])

It's not clear whether the comment applies to the `AtLeastOnce = no'
case or to the `AtLeastOnce = yes' case.  If the latter, the comment
should be after the `AtLeastOnce = yes' unification.  If the former,
it should be before the `Instrs = ...'

> +statement_to_il(statement(return(Rvals), _Context), Instrs) -->
> +	( { Rvals = [Rval] } ->
> +		load(Rval, LoadInstrs),
> +		{ Instrs = tree__list([
> +			LoadInstrs,
> +			instr_node(ret)]) }
> +	;
> +		{ sorry("multiple return values") }
> +	).

I suggest adding a comment here

		% MS IL doesn't support multiple return values

before the call to sorry/1, to make it clear that this is due to a
limitation in MS IL, not just because you haven't gotten around to
implementing it.

> +statement_to_il(statement(do_commit(Ref), _Context), Instrs) -->
> +	load(Ref, RefLoadInstrs),
> +	{ Instrs = tree__list([
> +		comment_node("do_commit/1"),
> +		RefLoadInstrs,
> +		instr_node(throw)
> +		]) }.
> +
> +statement_to_il(statement(try_commit(Ref, GoalToTry, CommitHandlerGoal), 
> +		_Context), Instrs) -->
> +
> +	% For commits, we use exception handling.

It would be helpful to put that comment also in the code for do_commit.

> +	%
> +	% .try {	
> +	%	GoalToTry
> +	%	leave label1
> +	% } catch commit_type {
> +	%	pop
> +	% 	CommitHandlerGoal
> +	%	leave label1
> +	% }
> +	% label1:
> +	% 

I suggest adding

	% We generate code of the following form:

before that comment (and then indenting the commented IL psuedo-code).
Also you should add "// discard the exception object"
before (or on the RHS of) the `pop' instruction.

> +atomic_statement_to_il(new_object(Target, _MaybeTag, Type, Size, _CtorName,
> +		Args, ArgTypes), Instrs) -->
> +	% If this is an env_ptr we should call the constructor...
> +	% This is how we will handle high-level data
> +    ( { Type = mlds__generic_env_ptr_type 
> +      ; Type = mlds__class_type(_, _, _) } ->
> +	{ mlds_type_to_ilds_type(Type, ILType) },

The `->' should be on a line of its own, and the condition should be
indented.  Also you're mixing 4- and 8-character indentation.

A comment here explaining what kind of code you generate would be very
helpful.

> +load(mkword(_Tag, _Rval), Instrs, Info, Info) :- 
> +		Instrs = comment_node("unimplemented load rval mkword").

That should only be indented one tab.

> +	% XXX check these, what should we do about multi strings, 
> +	% characters, etc.
> +load(const(Const), Instrs, Info, Info) :- 
> +	( Const = true,
> +		Instrs = instr_node(ldc(int32, i(1)))
> +	; Const = false,
> +		Instrs = instr_node(ldc(int32, i(0)))

Doesn't IL have a bool type?

> +	; Const = data_addr_const(DataAddr),
> +		data_addr_constant_to_fieldref(DataAddr, FieldRef),
> +		Instrs = instr_node(ldsfld(FieldRef))
> +			% We might consider loading an integer for 
> +			% null function types.
> +	; Const = null(_MLDSType),
> +		Instrs = instr_node(ldnull)

The comment here should go after the `Const = null(...)' line,
so (a) it is immediately before the `ldnull' instruction that it is
referring to and (b) it is clear that it applies to the code below
rather than the code above.

> +load(mem_addr(Lval), Instrs, Info0, Info) :- 
> +	( Lval = var(Var),
> +		mangle_mlds_var(Var, MangledVarStr),
> +		Info0 = Info,
> +		( is_local(MangledVarStr, Info) ->
> +			Instrs = instr_node(ldloca(name(MangledVarStr)))
> +		;
> +			Instrs = instr_node(ldarga(name(MangledVarStr)))
> +		)
> +	; Lval = field(_MaybeTag, Rval, FieldNum, FieldType, ClassType),
> +		get_fieldref(FieldNum, FieldType, ClassType, FieldRef),
> +		load(Rval, RvalLoadInstrs, Info0, Info),
> +		Instrs = tree__list([
> +			RvalLoadInstrs, 
> +			instr_node(ldflda(FieldRef))
> +			])
> +	; Lval = mem_ref(_, _),
> +		Info0 = Info,
> +		Instrs = throw_unimplemented("load mem_addr lval mem_ref")
> +	).

Hmm, should there be an XXX comment above the call to throw_unimplemented?
Or is there some reason why this case won't arise?

> +store(mem_ref(_Rval, _Type), _Instrs, Info, Info) :- 
> +			% you always need load the reference first, then
> +			% the value, then stind it.  There's no swap
> +			% instruction.  Annoying, eh?
> +	unexpected("store into mem_ref").

The comment is too far indented.

> +unaryop_to_il(std_unop(hash_string), _,
> +	throw_unimplemented("unimplemented hash_string unop")) --> [].

That one will need to be implemented sometime,
since I plan to make use of it for string switches...
The code for this can just be a call to string__hash in library/string.m.

> +:- pred binaryop_to_il(binary_op, instr_tree, il_info,
> +	il_info) is det.
> +:- mode binaryop_to_il(in, out, in, out) is det.
> +
> +binaryop_to_il((+), instr_node(I)) -->
> +	{ I = add(nocheckoverflow, signed) }.
> +
> +binaryop_to_il((-), instr_node(I)) -->
> +	{ I = sub(nocheckoverflow, signed) }.
> +
> +binaryop_to_il((*), instr_node(I)) -->
> +	{ I = mul(nocheckoverflow, signed) }.

It would be great to add an option to do integer overflow checking...
with the IL back-end this would be quite easy to implement.

> +	% Integer comparison
> +binaryop_to_il((<), node([clt(signed)])) --> [].
> +binaryop_to_il((>), node([cgt(signed)])) --> [].
> +binaryop_to_il((<=), node([cgt(signed), ldc(int32, i(0)), ceq])) --> [].
> +binaryop_to_il((>=), node([clt(signed), ldc(int32, i(0)), ceq])) --> [].

All of those hard-coded `int32's here and elsewhere are probably
not a good idea.  For most of these, it would be better to use
a more abstract name, e.g. `mercury_int_il_type'.
On a 64-bit architecture it might make more sense to map Mercury ints
to int64 rather than int32.

> +%-----------------------------------------------------------------------------
> +%
> +% Class constructors (.cctors) are used to fill in the RTTI information
> +% needed for any types defined in the module.  The RTTI is stored in
> +% static fields of the class.
> +
> +	% .cctors can be called at practically any time by the runtime
> +	% system, but must be called before a static field is loaded
> +	% (the runtime will ensure this happens).
> +	% Since all the static fields in RTTI reference other RTTI static
> +	% fields, we could run into problems if we load a field from another
> +	% class before we initialize it.  Often the RTTI in one module will
> +	% refer to another, creating exactly this cross-referencing problem.
> +	% To avoid problems, we initialize them in 3 passes.
> +	%
> +	% 1. We allocate all the RTTI data structures but leave them blank.
> +	% When this is complete we set a flag to say we have completed this
> +	% pass.  After this pass is complete, it is safe for any other module
> +	% to reference our data structures.
> +	%
> +	% 2. We call all the .cctors for RTTI data structures that we
> +	% import.  We do this because we can't load fields from them until we
> +	% know they have been allocated.
> +	%
> +	% 3. We fill in the RTTI info in the already allocated structures.
> +	%
> +	% To ensure that pass 2 doesn't cause looping, the first thing done
> +	% in all .cctors is a check to see if the flag is set.  If it is, we
> +	% return immediately (we have already been called and our
> +	% initialization is either complete or at pass 2).
> +
> +
> +	% Generate a classdecl for a .cctor, including a test to see if
> +	% we have already initialized.
> +	%

The comments above are good, but it would be helpful to spell out the
exact form of the generated code here, e.g.

	%	// if (rtti_initialized) return;
	%	ldsfld rtti_initialized
	%	brfalse done_label
	%	ret
	%     done_label:
	%
	%	// rtti_initialized = true
	%	ldc.i4.1
	%	stsfld rtti_initialized
	%
	%	// allocate RTTI data structures.
	%	...
	%
	%	// call .cctors
	%	...
	%
	%	// fill in fields of RTTI data structures
	%	...

(with more details in the ... parts if possible).

Comments like this make it much easier to debug things if something
goes wrong and easier to understand in case the code needs modifying in
future.

> +:- pred mlds_signature_to_il_return_param(mlds__func_signature, ret_type).
> +:- mode mlds_signature_to_il_return_param(in, out) is det.
> +mlds_signature_to_il_return_param(func_signature(_, Returns), Param) :-
> +	( Returns = [] ->
> +		Param = void
> +	; Returns = [ReturnType] ->
> +		mlds_type_to_ilds_type(ReturnType, ReturnParam),
> +		ReturnParam = ilds__type(_, SimpleType),
> +		Param = simple_type(SimpleType)
> +	;
> +		error("cannot handle multiple return values")
> +	).

Earlier you called sorry/1, here it calls error/1.
Probably sorry/1 is better.

My earlier point about commenting this applies here too.

> +params_to_il_signature(ModuleName, mlds__func_params(Inputs, Outputs),
> +		 ILSignature) :-
> +	list__map(input_param_to_ilds_type(ModuleName), Inputs, ILInputTypes),
> +	( Outputs = [] ->
> +		Param = void
> +	; Outputs = [ReturnType] ->
> +		mlds_type_to_ilds_type(ReturnType, ReturnParam),
> +		ReturnParam = ilds__type(_, SimpleType),
> +		Param = simple_type(SimpleType)
> +	;
> +		sorry("multiple return values")

Likewise here.

> +:- pred input_param_to_ilds_type(mlds_module_name, 
> +		pair(entity_name, mlds__type), ilds__param).
> +:- mode input_param_to_ilds_type(in, in, out) is det.
> +input_param_to_ilds_type(ModuleName, EntityName - MldsType, 
> +		ILType - yes(Id)) :-
> +	mangle_entity_name(EntityName, VarName),
> +	mangle_mlds_var(qual(ModuleName, VarName), Id),
> +	mlds_type_to_ilds_type(MldsType, ILType).
> +	
> +
> +:- pred output_param_to_ilds_type(mlds__type, ilds__param).
> +:- mode output_param_to_ilds_type(in, out) is det.
> +output_param_to_ilds_type(MldsType, ILType - no) :-
> +	mlds_type_to_ilds_type(MldsType, ILType0),
> +	make_reference(ILType0, ILType).
> +
> +:- pred make_reference(ilds__type, ilds__type).
> +:- mode make_reference(in, out) is det.
> +make_reference(ILType0, ILType) :-
> +	ILType = ilds__type([], '&'(ILType0)).
> +
> +	% XXX make sure all the types are converted correctly
> +
> +mlds_type_to_ilds_type(mlds__rtti_type(_RttiName), ILType) :-
> +	ILType = il_array_type.

These preds could all be functions.


This pred should definitely be a function.

> +mlds_type_to_ilds_type(mlds__native_int_type, ILType) :-
> +	ILType = ilds__type([], int32).

There should be some comments here explaining why we use
`int32' rather than the IL `native_int' type.

> +mangle_mlds_var(qual(_ModuleName, VarName), Str) :-
> +	string__format("%s", [s(VarName)], Str).

Why not just `Str = VarName'?

A comment here explaining why no mangling is needed would be helpful.

> +%
> +% Preds and funcs to find the types of rvals.
> +%
> +
> +	% This gives us the type of an rval. 
> +	% This type is with respect to IL (for example we map code
> +	% address and data address constants to the MLDS version of our
> +	% representation).  This is so you can generate appropriate
> +	% box rvals for rval_consts.

I found that comment a bit confusing.
s/our representation/their IL representation/?

> +rval_to_type(mkword(_Tag, _Rval), Type, I, I) :- 
> +	ModuleName = mercury_module_name_to_mlds(unqualified("mercury")),
> +	Type = mlds__class_type(qual(ModuleName, "incorrect"), 0, mlds__class).
> +rval_to_type(unop(_, _), Type, I, I) :- 
> +	ModuleName = mercury_module_name_to_mlds(unqualified("mercury")),
> +	Type = mlds__class_type(qual(ModuleName, "incorrect"), 0, mlds__class).
> +rval_to_type(binop(_, _, _), Type, I, I) :- 
> +	ModuleName = mercury_module_name_to_mlds(unqualified("mercury")),
> +	Type = mlds__class_type(qual(ModuleName, "incorrect"), 0, mlds__class).
> +rval_to_type(mem_addr(_), Type, I, I) :-
> +	ModuleName = mercury_module_name_to_mlds(unqualified("mercury")),
> +	Type = mlds__class_type(qual(ModuleName, "incorrect"), 0, mlds__class).

That code is duplicated four times.  Also I'm not sure what it is
trying to do.  What's this "mercury.incorrect" class type for?

> +:- func convert_to_object(ilds__type) = methodref.
> +
> +convert_to_object(Type) = methoddef(call_conv(no, default), 
> +		simple_type(il_generic_simple_type),
> +		member_name(ConvertClass, id("ToObject")), [Type]) :-
> +	ConvertClass = ["mercury", "mr_convert"].

"mr_" should be "MR_" (throughout).

> +:- func simple_type_to_value_class_name(simple_type) = string.
> +simple_type_to_value_class_name(int8) = "Int8".
> +simple_type_to_value_class_name(int16) = "Int16".
> +simple_type_to_value_class_name(int32) = "Int32".
> +simple_type_to_value_class_name(int64) = "Int64".
> +simple_type_to_value_class_name(uint8) = "Int8".
> +simple_type_to_value_class_name(uint16) = "UInt16".
> +simple_type_to_value_class_name(uint32) = "UInt32".
> +simple_type_to_value_class_name(uint64) = "UInt64".
> +simple_type_to_value_class_name(float32) = "Single".
> +simple_type_to_value_class_name(float64) = "Double".
> +simple_type_to_value_class_name(bool) = "Bool".
> +simple_type_to_value_class_name(char) = "Char".
> +simple_type_to_value_class_name(refany) = _ :-
> +	error("no value class name for refany").
> +simple_type_to_value_class_name(class(Name)) = VCName :-
> +	( Name = ["mscorlib", "System", "String"] ->
> +		VCName = "String"
> +	;
> +		error("unknown class name")
> +	).

Is `String' really a value class??

> +:- func il_string_class_name = structured_name.
> +il_string_class_name = ["mscorlib", "System", "String"].

I think it would be a good idea to abstract out all the occurrences
of "mscorlib" and put them in a function -- MS might change the name
at some point.

> +:- func make_methoddecls(instr_tree) = list(methoddecl).
> +make_methoddecls(InstrTree) = MethodDecls :-
> +	Instrs = list__condense(flatten(tree(InstrTree, instr_node(ret)))),
> +	MethodDecls = [
> +		maxstack(100),
> +		instrs(Instrs)
> +		].

There's that magic number 100 again, this time without even an XXX...
Why does this code occur twice?  Do you need a .zeroinit here too?

> +	% XXX This should really be generated at a higher level	
> +	% XXX For now we only call the constructor if it is an env_ptr.
...
> +		( 
> +			ILType = il_envptr_type, 
> +			ClassName = il_envptr_class_name
> +		->
> +			Instrs = [ldarg(index(0)),
> +				newobj_constructor(ClassName),
> +				stfld(FieldRef)]
> +		;
> +			ILType = il_commit_type,
> +			ClassName = il_commit_class_name
> +		->
> +			Instrs = [ldarg(index(0)),
> +				newobj_constructor(ClassName),
> +				stfld(FieldRef)]

The comment is out-of-date -- it should say envptr or commit type.

> +il_info_new_method(ILArgs, ILSignature, MethodName,
> +	il_info(ModuleName, Imports, FileCCode,
> +		AllocInstrs, InitInstrs, ClassDecls, HasMain, ClassCCode,
> +		__Locals, _InstrTree, _LabelCounter, _BlockCounter, MethodCCode,
> +		_Args, _Name, _Signature),
> +	il_info(ModuleName, Imports, NewFileCCode,
> +		AllocInstrs, InitInstrs, ClassDecls, HasMain, NewClassCCode,
> +		map__init, empty, counter__init(1), counter__init(1), no,
> +		ILArgs, MethodName, ILSignature)) :-
> +	or(ClassCCode, MethodCCode, NewClassCCode),
> +	or(FileCCode, MethodCCode, NewFileCCode).

IMHO the `or's would look better as infix and/or module qualified.

> +:- pred il_info_get_mlds_type(ilds__id, mlds__type, il_info, il_info).
> +:- mode il_info_get_mlds_type(in, out, in, out) is det.
> +il_info_get_mlds_type(Id, Type, Info0, Info0) :- 
> +	( 
> +		map__search(Info0 ^ locals, Id, Type0)
> +	->
> +		Type = Type0
> +	;
> +		assoc_list__search(Info0 ^ arguments, Id, Type0)
> +	->
> +		Type = Type0
> +	;
> +		% If it isn't a local or an argument, it can only be a
> +		% "global variable" -- used by RTTI.  We will assume this
> +		% is an integer for now.
> +		Type = native_int_type
> +	).

You should add an XXX here.

Also it would be helpful if this code was linked with the
other occurrence of this, e.g. by abstracting the use of
native_int_type here into a separate function called from
both places.

> Index: compiler/options.m

[... to be continued ...]

-- 
Fergus Henderson <fjh at cs.mu.oz.au>  |  "I have always known that the pursuit
WWW: <http://www.cs.mu.oz.au/~fjh>  |  of excellence is a lethal habit"
PGP: finger fjh at 128.250.37.3        |     -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to:       mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions:          mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------



More information about the developers mailing list