[m-rev.] for review: IL back-end: options for verifiable code

Fergus Henderson fjh at cs.mu.OZ.AU
Thu Aug 2 06:17:39 AEST 2001


This patch is basically a revised and extended version of the one that I posted
earlier for putting nondet environments on the stack rather than the heap.
It addresses Tyson's concerns about keeping things verifiable by introducing
a bunch of options related to generating verifiable code.

There's a couple of things I still need to fix in mlds_to_il.m
(I've done them before in different patches but haven't yet
merged them into this patch) which are marked with XXXs below,
but I'd appreciate any review comments at this stage.

----------

Improve the support for generating verifiable code and/or efficient but
unverifiable code for the IL back-end.

We now allocate nondet environments on the stack by default, but they will get
allocated on the heap if you specify --verifiable-code.

compiler/options.m:
	Add several new options:
		--verifiable-code
		--il-funcptr-types
		--il-refany-fields
		--il-byref-tailcalls
		--put-nondet-env-on-heap
	Currently these are all not officially documented, for various reasons,
	so I have not yet added grade components for any of these options.

compiler/handle_options.m:
	Set --put-nondet-env-on-heap if --verifiable-code is specified,
	unless both --il-funcptr-types and --il-refany-fields are specified.

compiler/ml_elim_nested.m:
	Support the --put-nondet-env-on-heap option:
	allocate the environment on the stack by default.

compiler/mlds_to_il.m:
	Add a new field to the il_data_rep_type to handle the different
	representations for the environment pointer types.
	Add a new procedure get_il_data_rep for computing it.

	XXX TODO: handle casts to/from refany
	XXX TODO: handle casts to/from unmanaged pointers (don't cast)

compiler/mlds_to_csharp.m
compiler/mlds_to_mcpp.m
	Use get_il_data_rep rather than constructing it manually.

Workspace: /mnt/mars/home/mars/fjh/ws3/mercury
Index: compiler/options.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/options.m,v
retrieving revision 1.330
diff -u -d -r1.330 options.m
--- compiler/options.m	2001/08/01 15:46:30	1.330
+++ compiler/options.m	2001/08/01 18:04:39
@@ -215,6 +215,21 @@
 		;	det_copy_out
 		;	nondet_copy_out
 		;	put_commit_in_own_func
+		;	put_nondet_env_on_heap
+			% IL back-end compilation model options
+		;	verifiable_code
+		;	il_refany_fields
+		;	il_funcptr_types
+		;	il_byref_tailcalls % Currently this is not really a
+					   % compilation model option,
+					   % i.e. it doesn't affect the ABI.
+					   % In future it might become one,
+					   % though -- we should return
+					   % multiple values in value types,
+					   % rather than using byrefs.
+					   % Also it's nicer to keep it with
+					   % the other IL back-end options here.
+
 
 	% Options for internal use only
 	% (the values of these options are implied by the
@@ -656,7 +671,14 @@
 	gcc_nested_functions	-	bool(no),
 	det_copy_out		-	bool(no),
 	nondet_copy_out		-	bool(no),
-	put_commit_in_own_func	-	bool(no)
+	put_commit_in_own_func	-	bool(no),
+	put_nondet_env_on_heap	-	bool(no),
+
+		% IL back-end compilation model options
+	verifiable_code		-	bool(no),
+	il_funcptr_types	- 	bool(no),
+	il_refany_fields	- 	bool(no),
+	il_byref_tailcalls	- 	bool(no)
 ]).
 option_defaults_2(internal_use_option, [
 		% Options for internal use only
@@ -1083,6 +1105,16 @@
 long_option("det-copy-out",		det_copy_out).
 long_option("nondet-copy-out",		nondet_copy_out).
 long_option("put-commit-in-own-func",	put_commit_in_own_func).
+long_option("put-nondet-env-on-heap",	put_nondet_env_on_heap).
+	% IL back-end compilation model options
+long_option("verifiable-code",		verifiable_code).
+long_option("verifiable",		verifiable_code).
+long_option("il-funcptr-types",		il_funcptr_types).
+long_option("IL-funcptr-types",		il_funcptr_types).
+long_option("il-refany-fields",		il_refany_fields).
+long_option("IL-refany-fields",		il_refany_fields).
+long_option("il-byref-tailcalls",	il_byref_tailcalls).
+long_option("IL-byref-tailcalls",	il_byref_tailcalls).
 
 % internal use options
 long_option("backend-foreign-languages",
@@ -2190,6 +2222,47 @@
 %		"\twhere commits are implemented via setjmp()/longjmp(),",
 %		"\tsince longjmp() may clobber any non-volatile local vars",
 %		"\tin the function that called setjmp().",
+% The --put-nondet-env-on-heap option is not documented because
+% it is enabled automatically (by handle_options) in the situations
+% where it is needed; the user should never need to set it.
+%		"--put-nondet-env-on-heap",
+%		"\tAllocate the environment structures used for",
+%		"\tnondeterministic Mercury procedures on the heap,",
+%		"\trather than on the stack.",
+%
+%	]),
+%	io__write_string("\n      IL back-end compilation model options:\n"),
+%	write_tabbed_lines([
+%
+% The --verifiable-code option is not yet documented because it is not yet fully
+% implemented.
+%		"--verifiable, --verifiable-code\t\t\t",
+%		"\tEnsure that the generated IL code is verifiable.",
+%
+% The --il-refany-fields option is not documented because currently there
+% are no IL implementations for which it is useful.
+%		"--il-refany-fields",
+%		"\tGenerate IL code that assumes that the CLI implementation",
+%		"\tsupports value types with fields of type `refany'.",
+%		"\tUsing this option could in theory allow more efficient",
+%		"\tverifiable IL code for nondeterministic Mercury procedures,",
+%		"\tif the CLI implementation supported it."
+%		"\tHowever, the current Microsoft CLR does not support it."
+%
+% The --il-byref-tailcalls option is not documented because currently there
+% are no IL implementations for which it is useful.
+%		"--il-byref-tailcalls",
+%		"\tGenerate IL code that assumes that the CLI verifier",
+%		"\tsupports tail calls with byref arguments."
+%
+% The --il-funcptr-types option is not documented because currently there
+% are no IL implementations for which it is useful.
+%		"--il-funcptr-types",
+%		"\tGenerate IL code that assumes that the IL assembler",
+%		"\tsupports function pointer types."
+%		"\tThe ECMA CLI specification allows function pointer types,"
+%		"\tbut the current (Beta 2) Microsoft CLR implementation"
+%		"\tdoes not support them."
 	]),
 
 	io__write_string("\n    Developer compilation model options:\n"),
Index: compiler/handle_options.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/handle_options.m,v
retrieving revision 1.113
diff -u -d -r1.113 handle_options.m
--- compiler/handle_options.m	2001/07/20 14:13:24	1.113
+++ compiler/handle_options.m	2001/08/01 18:23:27
@@ -294,6 +294,18 @@
 	;
 		[]
 	),
+
+	% Set --put-nondet-env-on-heap if --verifiable-code is specified,
+	% unless both --il-funcptr-types and --il-refany-fields
+	% are specified.
+	globals__io_lookup_bool_option(il_funcptr_types, ILFuncPtrTypes),
+	globals__io_lookup_bool_option(il_refany_fields, ILRefAnyFields),
+	( { ILFuncPtrTypes = yes, ILRefAnyFields = yes } ->
+		[]
+	;
+		option_implies(verifiable_code, put_nondet_env_on_heap, bool(yes))
+	),
+
 	% Generating Java implies high-level code, turning off nested functions,
 	% using copy-out for both det and nondet output arguments,
 	% using no tags, not optimizing tailcalls and no static ground terms.
Index: compiler/mlds_to_il.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/mlds_to_il.m,v
retrieving revision 1.56
diff -u -d -r1.56 mlds_to_il.m
--- compiler/mlds_to_il.m	2001/07/26 10:58:35	1.56
+++ compiler/mlds_to_il.m	2001/08/01 19:52:39
@@ -104,9 +104,13 @@
 :- pred mangle_mlds_var(mlds__var, ilds__id).
 :- mode mangle_mlds_var(in, out) is det.
 
+	% This type stores information affecting our IL data representation.
 :- type il_data_rep ---> il_data_rep(
-	highlevel_data	:: bool		% do we use high level data?
+		highlevel_data	:: bool,	% do we use high-level data?
+		il_envptr_type :: ilds__type	% what IL type do we use for
+						% mlds__generic_env_ptr_type?
 	).
+:- pred get_il_data_rep(il_data_rep::out, io__state::di, io__state::uo) is det.
 
 	% Get the corresponding ILDS type for an MLDS type 
 	% (this depends on which representation you happen to be using).
@@ -195,11 +199,11 @@
 	ModuleName = mercury_module_name_to_mlds(MercuryModuleName),
 	prog_out__sym_name_to_string(mlds_module_name_to_sym_name(ModuleName),
 			".", AssemblyName),
-	globals__io_lookup_bool_option(highlevel_data, HighLevelData, IO0, IO1),
+	get_il_data_rep(ILDataRep, IO0, IO1),
 	globals__io_lookup_bool_option(debug_il_asm, DebugIlAsm, IO1, IO),
 
 	IlInfo0 = il_info_init(ModuleName, AssemblyName, Imports,
-			il_data_rep(HighLevelData), DebugIlAsm),
+			ILDataRep, DebugIlAsm),
 
 	list__map_foldl(mlds_defn_to_ilasm_decl, Defns, ILDecls,
 			IlInfo0, IlInfo),
@@ -233,6 +237,12 @@
 	Namespace = [namespace(NamespaceName, ILDecls)],
 	ILAsm = list__condense([ThisAssembly, ExternAssemblies, Namespace]).
 
+get_il_data_rep(ILDataRep, IO0, IO) :-
+	globals__io_get_globals(Globals, IO0, IO),
+	globals__lookup_bool_option(Globals, highlevel_data, HighLevelData),
+	ILEnvPtrType = choose_il_envptr_type(Globals),
+	ILDataRep = il_data_rep(HighLevelData, ILEnvPtrType).
+
 %-----------------------------------------------------------------------------%
 %-----------------------------------------------------------------------------%
 
@@ -2468,7 +2478,8 @@
 
 mlds_type_to_ilds_type(_, mlds__commit_type) = il_commit_type.
 
-mlds_type_to_ilds_type(_, mlds__generic_env_ptr_type) = il_envptr_type.
+mlds_type_to_ilds_type(ILDataRep, mlds__generic_env_ptr_type) =
+	ILDataRep^il_envptr_type.
 
 	% XXX we ought to use the IL bool type
 mlds_type_to_ilds_type(_, mlds__native_bool_type) = ilds__type([], int32).
@@ -3241,18 +3252,44 @@
 
 %-----------------------------------------------------------------------------%
 %
-% The mapping to the environment type.
+% The mapping to the generic environment pointer type.
 %
 
-:- func il_envptr_type = ilds__type.
-il_envptr_type = ilds__type([], il_envptr_simple_type).
+% Unfortunately the .NET CLR doesn't have any verifiable way of creating a
+% generic pointer to an environment, unless you allocate them on the heap.
+% Using "refany" (a.k.a. "typedref") *almost* works, except that we need
+% to be able to put these pointers in environment structs, and the CLR
+% doesn't allow that (see ECMA CLI Partition 1, 8.6.1.3 "Local Signatures").
+% So we only do that if the --il-refany-fields option is set.
+% If it is not set, then handle_options.m will ensure that we allocate
+% the environments on the heap if verifiable code is requested.
 
-:- func il_envptr_simple_type = simple_type.
-il_envptr_simple_type = class(il_envptr_class_name).
+% For unverifiable code we allocate environments on the stack and use
+% unmanaged pointers.
 
-:- func il_envptr_class_name = ilds__class_name.
-il_envptr_class_name = mercury_runtime_name(["Environment"]).
+:- func choose_il_envptr_type(globals) = ilds__type.
+choose_il_envptr_type(Globals) = ILType :-
+	globals__lookup_bool_option(Globals, put_nondet_env_on_heap, OnHeap),
+	globals__lookup_bool_option(Globals, verifiable_code, Verifiable),
+	( OnHeap = yes ->
+		% Use an object reference type
+		ILType = il_heap_envptr_type
+	; Verifiable = yes ->
+		% Use "refany", the generic managed pointer type
+		ILType = ilds__type([], refany)
+	;
+		% Use unmanaged pointers
+		ILType = ilds__type([], int32)  % XXX we should use native_int
+	).
+
+:- func il_heap_envptr_type = ilds__type.
+il_heap_envptr_type = ilds__type([], il_heap_envptr_simple_type).
 
+:- func il_heap_envptr_simple_type = simple_type.
+il_heap_envptr_simple_type = class(il_heap_envptr_class_name).
+ 
+:- func il_heap_envptr_class_name = ilds__class_name.
+il_heap_envptr_class_name = mercury_runtime_name(["Environment"]).
 
 %-----------------------------------------------------------------------------%
 %
Index: compiler/ml_elim_nested.m
===================================================================
RCS file: /home/mercury1/repository/mercury/compiler/ml_elim_nested.m,v
retrieving revision 1.39
diff -u -d -r1.39 ml_elim_nested.m
--- compiler/ml_elim_nested.m	2001/07/18 10:20:53	1.39
+++ compiler/ml_elim_nested.m	2001/08/01 18:57:06
@@ -376,12 +376,11 @@
 	%		<LocalVars>
 	%	};
 	%
-		% IL uses classes instead of structs, so the code
-		% generated needs to be a little different.
-		% XXX Perhaps if we used value classes this could go
-		% away.
-	globals__get_target(Globals, Target),
-	( Target = il ->
+		% If we're allocating it on the heap, then we need to use 
+		% a class type rather than a struct (value type).
+		% This is needed for the IL back-end.
+	globals__lookup_bool_option(Globals, put_nondet_env_on_heap, OnHeap),
+	( OnHeap = yes ->
 		EnvTypeKind = mlds__class
 	;
 		EnvTypeKind = mlds__struct
@@ -392,11 +391,10 @@
 	EnvTypeFlags = env_type_decl_flags,
 	Fields = list__map(convert_local_to_field, LocalVars),
 
-		% IL uses classes instead of structs, so the code
-		% generated needs to be a little different.
-		% XXX Perhaps if we used value classes this could go
-		% away.
-	( Target = il ->
+		% If we're allocating it on the heap, then we're using
+		% a class type, and (for some back-ends, at least, e.g. IL)
+		% that means we need to define a constructor for it.
+	( OnHeap = yes ->
 			% Generate a ctor for the class.
 
 			% We generate an empty block for the body of the
@@ -412,12 +410,14 @@
 			% determined by the backend convention.
 		CtorDefn = mlds__defn(export("<constructor>"), Context,
 				CtorFlags, Ctor),
-		Ctors = [CtorDefn]
+		Ctors = [CtorDefn],
+		BaseClasses = [mlds__generic_env_ptr_type]
 	;
-		Ctors = []
+		Ctors = [],
+		BaseClasses = []
 	),
 	EnvTypeDefnBody = mlds__class(mlds__class_defn(EnvTypeKind, [], 
-		[mlds__generic_env_ptr_type], [], Ctors, Fields)),
+		BaseClasses, [], Ctors, Fields)),
 	EnvTypeDefn = mlds__defn(EnvTypeEntityName, Context, EnvTypeFlags,
 		EnvTypeDefnBody),
 
@@ -438,11 +438,12 @@
 	%
 	EnvVar = qual(ModuleName, mlds__var_name("env", no)),
 
-		% IL uses classes instead of structs, so the code
-		% generated needs to be a little different.
-		% XXX Perhaps if we used value classes this could go
-		% away.
-	( Target = il ->
+	%
+	% generate code to initialize the environment pointer,
+	% either by allocating an object on the heap, or by
+	% just taking the address of the struct we put on the stack
+	%
+	( OnHeap = yes ->
 		EnvVarAddr = lval(var(EnvVar, EnvTypeName)),
 		ml_init_env(EnvTypeName, EnvVarAddr, Context, ModuleName,
 			 Globals, EnvPtrVarDecl, InitEnv0),
@@ -502,7 +503,7 @@
 	% to
 	%	<Ret> <Func>(<Args>) {
 	%		struct <EnvClassName> *env_ptr;
-	%		env_ptr = &env_ptr_arg;
+	%		env_ptr = (<EnvClassName> *) env_ptr_arg;
 	%		<Body>
 	%	}
 	%
@@ -524,7 +525,12 @@
 				mlds__var_name("env_ptr_arg", no)),
 				mlds__generic_env_ptr_type)),
 		EnvPtrVarType = ml_make_env_ptr_type(Globals, TypeName),
+
+			% Insert a cast, to downcast from
+			% mlds__generic_env_ptr_type to the specific
+			% environment type for this procedure.
 		CastEnvPtrVal = unop(cast(EnvPtrVarType), EnvPtrVal),
+
 		ml_init_env(TypeName, CastEnvPtrVal, Context, ModuleName,
 			Globals, EnvPtrDecl, InitEnvPtr),
 		FuncBody = mlds__statement(block([EnvPtrDecl],
@@ -539,13 +545,12 @@
 	).
 
 :- func ml_make_env_ptr_type(globals, mlds__type) = mlds__type.
-ml_make_env_ptr_type(Globals, EnvType)  = EnvPtrType :-
+ml_make_env_ptr_type(Globals, EnvType) = EnvPtrType :-
+	globals__lookup_bool_option(Globals, put_nondet_env_on_heap, OnHeap),
+	globals__get_target(Globals, Target),
+	( Target = il, OnHeap = yes ->
 		% IL uses classes instead of structs, so the type
 		% is a little different.
-		% XXX Perhaps if we used value classes this could go
-		% away.
-	globals__get_target(Globals, Target),
-	( Target = il ->
 		EnvPtrType = EnvType
 	;
 		EnvPtrType = mlds__ptr_type(EnvType)

-- 
Fergus Henderson <fjh at cs.mu.oz.au>  |  "I have always known that the pursuit
The University of Melbourne         |  of excellence is a lethal habit"
WWW: <http://www.cs.mu.oz.au/~fjh>  |     -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-reviews mailing list
post:  mercury-reviews at cs.mu.oz.au
administrative address: owner-mercury-reviews at cs.mu.oz.au
unsubscribe: Address: mercury-reviews-request at cs.mu.oz.au Message: unsubscribe
subscribe:   Address: mercury-reviews-request at cs.mu.oz.au Message: subscribe
--------------------------------------------------------------------------



More information about the reviews mailing list