[m-dev.] For Review: Bytecode interpreter

Fergus Henderson fjh at cs.mu.OZ.AU
Mon Jan 29 19:33:13 AEDT 2001


On 24-Jan-2001, Levi Cameron <l.cameron2 at ugrad.unimelb.edu.au> wrote:
> Index: mb_mem.c
...
> +void
> +MB_GC_free(void *mem)
> +{
> +	return MB_free(mem);
> +}

`return <expr>' in a void-valued function is non-portable.

> Index: mb_stack.c
> +typedef struct MB_Stack_Tag {
>  	MB_Word*data;

s/_Tag/_Struct/
s/*data/ *data/

> Index: mb_util.c
> +MB_CString
> +MB_str_new(MB_Word len)
> +{
> +	MB_CString c = MB_GC_new_array_atomic(char, len+1);

s/+/ + /

> +MB_CString
> +MB_str_new_cat(MB_CString_Const a, MB_CString_Const b)
> +{
> +	memcpy(new_str, a, len_a);
> +	memcpy(new_str+len_a, b, len_b);
> +
> +	new_str[len_a+len_b] = 0;

s/+/ + /g

> +MB_CString
> +MB_str_dup(MB_CString_Const str)
> +{
> +	MB_CString c = MB_str_new(strlen(str));

s/strlen(str)/strlen(str) + 1/

> Index: mb_util.h
> @@ -31,12 +31,23 @@
>  void
>  MB_util_error(const char *fmt, ...);
>  
> +void SAY(const char* fmr, ...);

s/fmr/fmt/ ?

> +/* allocate space for a new string*/
> +MB_CString	MB_str_new(MB_Word len);	/* len is w/o null terminator */

s at string*/@string */@

> +++ mb_interface.h	Wed Jan 24 18:44:34 2001
> +/* Entry point for a native code call to det bytecode */
> +MB_Word*	MB_bytecode_call_entry(MB_Call* bytecode_call);
> +
> +/* Return to deterministic code after call to native code */
> +MB_Word*	MB_bytecode_return_det(void);

What's the meaning of the return type here?

> +/* Returns pointer to the stub that calls bytecode_return_det */
> +MB_Word*	MB_native_get_return_det(void);
>
> +/* Find the native code entry point for a procedure */
> +MB_Word*	MB_code_find_proc_native(MB_CString_Const module,
> +			MB_CString_Const pred, MB_Word proc,
> +			MB_Word arity, MB_Byte is_func);

The return type of those two functions should be `MR_Code *',
I think.

Also the argument types could be more informative.
Generally you should use MB_Word only for uninterpreted raw memory.
If you're giving that memory some specific interpretetation, e.g.
as an integer, then use some more specific type, e.g. MB_Unsigned.

> +++ mb_machine_def.h	Wed Jan 24 18:44:34 2001
> +struct MB_Machine_State_Tag {
...
> +	struct {
> +		/* The determinism of the currently executing function */
> +		/* (set to a return value from MB_code_get_det) */

Fix the comment layout.

> +/* Return the number  of bytecodes there are */
> +MB_Word		MB_code_size(void);
> +/* Read the bytecode at a given address */
> +/*MB_Bytecode	MB_code_get(MB_Word*adr);*/
> +/* Get the bytecode type at a given address */
> +MB_Byte		MB_code_get_id(MB_Word*adr);

s/*adr/ *adr/g

Some more blank lines would help.

> +++ mb_module.c	Wed Jan 24 18:44:34 2001
> +/* XXX: no fixed limits */
> +#define MAX_CODE_COUNT		10000
> +#define MAX_CODE_DATA_COUNT	160000
> +#define MAX_MODULES		64

The comment here would be better phrased as
"XXX we should avoid these fixed limits".

> +#define FILEVERSION	9

What's that for?

> +/*
> +** Translates calls from a predicate name/procedure to an actual code address
> +** Translates call & higher_or
> +*/

That comment is incomplete.

> +static MB_Bool
> +translate_calls(MB_Word*bc, MB_Word number_codes)

s/*bc/ *bc/

> +{
> +	/* XXX: temporarily table the procs, instead of re-searching
> +	** each time, but since there is usually only one proc per predicate,
> +	** don't bother for now
> +	*/

The comment would be clearer if it started with "We should", I think.

Fix the comment layout.

> +	MB_Word i;
> +	for (i = 0; i < number_codes; i++, bc++) {

It would be better to use `MR_Unsigned', or just `int',
rather than `MB_Word' here.

> +			target_is_native=&call_arg->call.is_native;

s/=/ = /

> +		/* Find the predicate start */
> +		if (pred_name != NULL) {
> +			/* First check if we can find it in the bytecode */
> +			MB_Word* adr = MB_code_find_proc(module_name,
> +				pred_name, proc_id,
> +				arity, is_func);
> +
> +			if (adr == MB_CODE_INVALID_ADR) {
> +				SAY(" Not found in bytecode");
> +				/* Otherwise look in the native code */
> +				adr = MB_code_find_proc_native(module_name,
> +					pred_name, proc_id, arity, is_func);
> +
> +				SAY(" Address from native: %08x", adr);
> +				if (adr == NULL) {
> +					MB_util_error(
> +						"Reference in bytecode %08x"
> +							" to unknown"
> +							" %s %s__%s/%d (%d)",
> +						(int)i,
> +						is_func ? "func" : "pred",
> +						module_name,
> +						pred_name,
> +						(int)arity,
> +						(int)proc_id);
> +					MB_fatal("(Are you sure the module"
> +						" was compiled with trace"
> +						" information enabled?)");
> +				} else {
> +					*target_is_native = FALSE;

Why do you set *target_is_native to FALSE in this case ...

> +				}
> +
> +			} else {
> +				*target_is_native = TRUE;

... and to TRUE here?  Isn't that backwards?

> +/* Translates labels to code addresses for those instructions that need it
> +** those translated are:
> +** enter_if, endof_then, enter_disjunction, enter_disjunct, endof_disjunct
> +** enter_switch, enter_switch_arm, endof_switch_arm, enter_negation, enter_proc
> +*/
> +static MB_Bool
> +translate_labels(MB_Word* bc, MB_Word number_codes, MB_Stack* label_stack)
> +{

Fix the comment layout.

You should document the meaning of the return value.

> +	MB_Word i;
> +	MB_Bytecode_Arg* cur_proc_arg = NULL;
> +
> +	for (i = 0; i < number_codes; i++, bc++) {
> +		MB_Bytecode_Arg* cur_arg =
> +			MB_code_get_arg(bc);
> +		#define XLATLABEL(bytecodetype, lbl) \
> +			cur_arg->bytecodetype.lbl.adr = \
> +				((cur_arg->bytecodetype.lbl.index < \
> +					cur_proc_arg->enter_proc.label_count) \
> +				 && (cur_arg->bytecodetype.lbl.index > 0) \
> +				? (MB_Word*)MB_stack_peek(label_stack, \
> +					cur_proc_arg->enter_proc.label_index + \
> +					cur_arg->bytecodetype.lbl.index) \
> +				: MB_CODE_INVALID_ADR)

You should document what that macro does, and the meanings of its
arguments.

It think it would be clearer if you made that macro a separate
function, with arguments `cur_arg', `cur_proc_arg', and
`label', which would be `&cur_arg->bytecodetype.lbl'.

Then instead of
				XLATLABEL(enter_negation, end_label);
you'd have
				translate_label(cur_arg, cur_proc_arg,
					&cur_arg.enter_negation.end_label);

> +		switch (MB_code_get_id(bc)) {
...
> +			case MB_BC_enter_negation:
> +				XLATLABEL(enter_negation, end_label);
> +				break;
> +
> +			default:
> +			
> +		}

Is the default case supposed to be empty?
A brief comment would be helpful here.

> +/* Store the procedure's determinism that each instruction is executing
> under */
> +static MB_Bool
> +translate_detism(MB_Word* bc, MB_Word number_codes)
> +{

You should document the meaning of the return value.

The comment here is not very clear; it would help
if you could elaborate a bit.

> +	MB_Word i;
> +	MB_Byte bc_id;
> +	MB_Byte cur_detism = MB_BCID_ISDET;
> +	
> +	for (i = 0; i < number_codes; i++, bc++) {
> +		bc_id = MB_code_get_id(bc);
> +		if (bc_id == MB_BC_enter_proc) {
> +			switch (MB_code_get_arg(bc)->enter_proc.det) {
> +				case MB_DET_DET:
> +				case MB_DET_SEMIDET:
> +					cur_detism = MB_BCID_ISDET;
> +					break;
> +				case MB_DET_MULTIDET:
> +				case MB_DET_NONDET:
> +					cur_detism = 0;
> +					break;

Why `= 0' here?  Why not `= MB_BCID_IS_NONDET' or something like that?

> +				case MB_DET_INVALID:
> +					cur_detism = 0;
> +					break;

What's MB_DET_INVALID for?

> +				default:
> +					assert(FALSE);

You should handle the other determinisms (erroneous,
failure, cc_nondet, cc_multi) here.  They should be
treated the same as det, semidet, semidet, det respectively.
See compiler/code_model.m.

> +		if (cur_detism) {
> +			*bc |= cur_detism;
> +		}

It would be nicer to hide the bit manipulation here behind an access
macro, or to use a bitfield...

> +/*
> +** A native code procedure wishes to call a deterministic bytecode procedure
> +*/
> +MB_Module*
> +MB_module_load_name(MB_CString_Const module_name)

The comment here looks like a cut-and-paste error.

> +{
> +	MB_Module* module;
> +	MB_CString filename = MB_str_new_cat(module_name, ".mbc");
> +
> +	FILE* fp = fopen(filename, "rb+");

Why "rb+"?  Why not just "rb"?
You don't need to write to the file, do you?

> +#define ARGSIZE(name)	(sizeof(((MB_Bytecode*)NULL)->opt.name) + \
> +				sizeof(MB_Word)-1) \
> +				/ sizeof(MB_Word)

Use the `MR_bytes_to_words' macro, or something similar.

> +/*
> +** Load a module
> +** If fp is NULL then that means there is no bytecode information
> +** for this module -- revert to native code.
> +*/
> +MB_Module* MB_module_load(MB_CString_Const module_name, FILE* fp) {

The function name and the { should each be at the start of a new line.

> +	MB_Short version;
> +	MB_Word module_code_count = 0;
> +	MB_Word*module_start = code_id + code_count;

s/*/ */

> +		if (bc.id == MB_BC_label) {
> +			/* XXX: we strictly don't actually need to save the
> +			** labels but it makes label translations a lot faster.
> +			** After translation, the label stack is deleted
> +			*/

Fix the comment layout.

> +		/* copy the bytecode arguments into the code.data
> +		** structure, save the index & increment code.data
> +		** counters
> +		*/

Likewise.

> +		if (bc.id < sizeof(argument_size)/sizeof(argument_size[0]))
> +		{

The { should be on the same line as the if.

> +void
> +MB_module_unload(MB_Module* module)
> +{
> +	if (module != NULL) {
> +		/* the stacks will always be allocated since it will
> +		** have aborted if their allocation failed
> +		*/

Fix comment layout.

> +/* Get a bytecode's procedure's determinism */
> +MB_Byte
> +MB_code_get_det(MB_Word* adr)
> +{
> +	assert(MB_ip_normal(adr));
> +	
> +	/* return the determinism flag */
> +	return MB_BCID_DET(*adr) ? MB_ISDET_YES : MB_ISDET_NO;
> +}

Any reason why you used `MB_Byte' as the return type here,
rather than an enumeration type?

> +/*
> +** Returns true if a given instruction pointer points to a normal
> +** address
> +*/
> +MB_Bool
> +MB_ip_normal(MB_Word* ip)
> +{
> +	return ((ip >= code_id) && (ip < code_id+MAX_CODE_COUNT));
> +}

s/+/ + /

> +MB_Bool
> +MB_ip_special(MB_Word* ip)
> +{
> +	return ((MB_Unsigned)ip > (MB_Unsigned)MB_CODE_INVALID_ADR);
> +}

You should put a space after the `)' in type casts.


That's it for this review.  I'd like to see both full and relative
diffs when you've addressed my review comments.

Cheers,
	Fergus.

-- 
Fergus Henderson <fjh at cs.mu.oz.au>  |  "I have always known that the pursuit
                                    |  of excellence is a lethal habit"
WWW: <http://www.cs.mu.oz.au/~fjh>  |     -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to:       mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions:          mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------



More information about the developers mailing list