[m-dev.] For Review: Bytecode interpreter

Fergus Henderson fjh at cs.mu.OZ.AU
Thu Jan 25 13:59:17 AEDT 2001


On 24-Jan-2001, Levi Cameron <l.cameron2 at ugrad.unimelb.edu.au> wrote:
> 
> bytecode/mb_bytecode.c
> bytecode/mb_bytecode.h
...
> 	MB_read_cstring changed (see comments for new arguments)

The log message here is somewhat misleading, since
the change to MB_read_cstring didn't add any new arguments.

Also the log message doesn't explain how or (more importantly) why
it was changed.

> 	Added distinction between functions and predicates
> 	Added enter_else
> 	Code addresses are all pointers rather than simple ints
> 
> bytecode/mb_disasm.c
> bytecode/mb_disasm.h
> 	Added endof_negation_goal & enter_else
> 	Output strings are now easier to read
> 	MB_listing does not display anything for invalid addresses
> 	MB_listing takes line length argument
> 
> bytecode/mb_interface.c
> bytecode/mb_interface.h
> bytecode/mb_interface_stub.m
> 	Interfacing between native/bytecode
> 
> bytecode/mb_machine.c
> bytecode/mb_machine.h
> bytecode/mb_machine_def.h
> 	Large sections of code branched off into mb_module.?
> 	Most instructions completed, but not integrated with native
> 	code.
> 	Most of mb_machine_def has been removed as the native
> 	code functions provide the same functionality.
> 
> bytecode/mb_machine_show.c
> bytecode/mb_machine_show.h
> 	Completely changed. Less information now as a lot of what
> 	was being displayed before cannot be determined as easily
> 	now that it is stored in the mercury runtime.
> 
> bytecode/mb_mem.c
> bytecode/mb_mem.h
> 	Added routines for garbage collected memory
> 
> bytecode/mb_module.c
> bytecode/mb_module.h
> 	Loading & accessing bytecode. Argument data indexes & id are now
> 	stored in a single word. (see MB_BCID_xxx macros).
> 	Call & label addresses are now calculated at load time.
> 
> bytecode/mb_stack.c
> bytecode/mb_stack.h
> 	Added options for garbage collection of MB_Stack memory
> 
> bytecode/mb_util.c
> bytecode/mb_util.h
> 	Miscellaneous string functions added and SAY() for debugging
> 
> bytecode/simple01.m
>         Added. Simple test program. (replace with whatever
>         program is being tested at the time).
...
> Index: mb_bytecode.c
> @@ -540,10 +597,14 @@
>  				bc_p->opt.call.pred_id =
>  					pred_id;
>  				bc_p->opt.call.arity = arity;
> +				bc_p->opt.call.is_func = is_func;
>  				bc_p->opt.call.proc_id = proc_id;
> +
> +				bc_p->opt.call.is_native = TRUE;
> +				bc_p->opt.call.adr = NULL;
>  				return TRUE;

Why do you set is_native = TRUE here?

That won't always be the case, so there should at least be a comment
explaining why you set it to TRUE here.

> @@ -845,50 +903,55 @@
>  }
>  
>  /*
> -** MB_read_cstring MB_mallocs a string each time. The caller MB_frees
> it.
> -** Starts assuming string won't be more than a certain length,
> -** reallocates if it gets too long
> +** Returned string is allocated with string routines MB_str_xxx
> +** It is the responsibility of the caller to free it using
> MB_str_delete
> +**
> +** Will read from the file a complete string, but will only store in
> +** memory string of maximum size of buffer below
>  static MB_Bool
>  MB_read_cstring(FILE *fp, MB_CString *str_p)

Why does it only save up the the maximum size of the buffer?
Why not handle arbitrary-length strings?

The old code, which handled arbitrary length strings,
seems much nicer than the new code.

> Index: mb_bytecode.h
> ===================================================================
> RCS file: /home/mercury1/repository/mercury/bytecode/mb_bytecode.h,v
> retrieving revision 1.1
> diff -u -r1.1 mb_bytecode.h
> --- mb_bytecode.h	2001/01/24 07:42:22	1.1
> +++ mb_bytecode.h	2001/01/24 07:44:34
> @@ -1,49 +1,26 @@
>  
>  /*
> -** Copyright (C) 1997 The University of Melbourne.
> +** Copyright (C) 1997-2001 The University of Melbourne.

That should be

	** Copyright (C) 1997, 2000-2001 The University of Melbourne.

since that file wasn't touched in 1998 or 1999.
Likewise for everything else in this directly, I imagine.

>  #include <stdio.h>
>  
> -#include "mercury_conf.h"
> -#include "mercury_types.h"
> -#include "mercury_float.h"
> +#include <mercury_conf.h>
> +#include <mercury_types.h>
> +#include <mercury_float.h>

You should use "..." rather than <...> for non-system headers.
<...> should only be used for system headers.

> @@ -78,6 +55,12 @@
>  #define	MB_DET_CC_NONDET		5
>  #define	MB_DET_ERRONEOUS		6
>  #define	MB_DET_FAILURE			7
> +	/*
> +	** Invalid is used to indicate that there is something wrong with
> +	** this predicate. (Probably contains foreign code) and the bytecode
> +	** version should not be used
> +	*/
> +#define	MB_DET_INVALID			99

Please try to use grammatically correct English in your comments.

>  typedef struct MB_Op_arg_struct {
>  	MB_Byte	id;
> @@ -128,7 +111,13 @@
>  			MB_CString	module_id;
>  			MB_CString	pred_id;
>  			MB_Short	arity;
> +			MB_Byte		is_func;
>  			MB_Byte		proc_id;
> +
> +			/* Whether call is a native code call */
> +			MB_Byte		is_native;

Why is that MB_Byte rather than `bool'?

> +			/* code address to call */
> +			MB_Word*	adr;

I suggest s/adr/addr/g

>  		} pred_const;
>  		struct {
>  			MB_CString	module_id;
> @@ -159,9 +148,18 @@
>  #define	MB_CONSID_BASE_TYPE_INFO_CONST	6
>  #define	MB_CONSID_CHAR_CONST		7
>  
> +/*
> +** Internal label structure. index is read from the file and translated
> +** into adr, which is the actual code address of a label
> +*/
> +typedef union {
> +	MB_Short	index;
> +	MB_Word*	adr;
> +} MB_Label;

I notice that this is an untagged union.
The documentation should make it clearer
which of these two fields is valid when.

> -		MB_CString	*var_info_list; /* XXX: malloc */
> +		MB_CString*	var_info;

Our convention is that put the `*' next to the name
of the variable or type being declared.
(I don't like that one much, but it's better to be consistent.)

>  	struct {
> +		/* start of proc (not in file) */
> +		MB_Word*	proc_start;

Likewise here.  And in lots of other places.
I suggested doing 

	grep '[a-zA-Z]\*' *.h *.c

> Index: mb_disasm.c
> +/* XXX: the format strings used below assume that MB_Word is an int and
> +** that casting MB_Word to int for printing will produce no strange effects
> +*/

The layout for comments is

	/*
	** ...
	** ...
	*/

not

	/* ...
	** ...
	*/

> +** and calculated arguments (ie not in the bytecode file) in round
> brackets

Your mailer seems to be wrapping long lines.
You should turn that off when posting diffs.

> +	switch (bc_id) {
>  		case MB_BC_enter_pred:
>  			Print()
>  				" %s %s/%d (%d procs)",
> -				bc.opt.enter_pred.is_func ? "func" : "pred",
> -				bc.opt.enter_pred.pred_name,
> -				bc.opt.enter_pred.pred_arity,
> -				bc.opt.enter_pred.proc_count
> +				bca->enter_pred.is_func ? "func" : "pred",
> +				bca->enter_pred.pred_name,
> +				(int)bca->enter_pred.pred_arity,
> +				(int)bca->enter_pred.proc_count

s/(int)/(int) /g
             ^

Likewise elsewhere.

> +			len = bca->enter_proc.list_length;
>  			for (i = 0; i < len; i++) {
>  				Print()
>  					" %s",
> -					bc.opt.enter_proc.var_info_list[i]
> +					bca->enter_proc.var_info[i]
> +
>  				EndPrint()

It looks those Print() and EndPrint() must be macros.  If so, they
should be in uppercase, i.e. PRINT and END_PRINT respectively.

>  		case MB_BC_construct: {
>  			MB_Short	len;
>  			MB_Short	i;
>  
>  			Print()
> -				" %d ",
> -				bc.opt.construct.to_var
> +				" [var %d] <= ",
> +				(int)bca->construct.to_var
>  			EndPrint()
>  
> -			PrintCall(str_cons_id,bc.opt.construct.consid)
> +			PrintCall(str_cons_id,bca->construct.consid)

Likewise for PrintCall.

>  		case MB_CONSID_INT_CONST:
> -			/*
> -			** (This comment is labelled "CAST COMMENT".
> -			** If you remove this comment, also
> -			** remove references to it in this file.
> -			** Search for "CAST COMMENT".)
> -			**
> -			** XXX: The cast to `long' in the following code
> -			** is needed to remove a warning. `int_const' has
> -			** type `Integer', but Integer may be typedef'ed
> -			** to `int', `long', `long long' or whatever.
> -			** The correct solution may be to define a
> -			** format string for Integer in conf.h.
> -			*/
>  			Print()
> -				"int_const %ld",
> -				(long) cons_id.opt.int_const
> +				"int_const %x",
> +				(int)cons_id.opt.int_const

Why did you change the cast from int to long?
And why did you remove the XXX comment?  It still applies, I think.

Why do you print int constants in hex rather than decimal?

If you are going to print in hex, you should definitely
precede it with "0x" to make it clear that the output is
in hex.  Otherwise output like "10" would be ambiguous.

If you think hex output would be useful, it may be better
to output both decimal _and_ hex, e.g. "int_const 16 (0x10)".

>  			EndPrint()
>  			break;
>  		case MB_CONSID_STRING_CONST: {
>  			Print()
> -				"string_const "
> +				"string_const"
>  			EndPrint()
>  			buffer[buffer_len-1] = 0; /* snprintf may not do it */
>  
> @@ -609,12 +701,14 @@
>  			break;
>  		case MB_CONSID_PRED_CONST:
>  			Print()
> -				"%s %s %s %d %d",
> -				"pred_const ",
> +				"%s %s %s__%s/%d proc %d (%p)",

I suggest s/proc/mode/

>  		case MB_CONSID_CHAR_CONST:
>  			if (isprint(cons_id.opt.char_const.ch)) {
>  				Print()
>  					"%s '%c'",
> -					"char_const ",
> +					"char_const",
>  					cons_id.opt.char_const.ch
>  				EndPrint()
>  			} else {
>  				Print()
>  					"%s %2X",
> -					"char_const ",
> +					"char_const",
>  					(int)cons_id.opt.char_const.ch

I suggest prefixing the hex value with 0x and preceding it with
the decimal value, i.e. "%s %d (0x%2X)".

> Index: mb_machine.c
>  /* Imports */
> +#include	<mercury_imp.h>

Use "..." rather than <...>.

> +	if (MB_ip_special(new_ip)) {
> +		switch ((MB_Word)new_ip) {
> +			case (MB_Word)MB_CODE_DO_FAIL:
> +				instr_do_fail(ms, NULL);
> +				break;
> +			case (MB_Word)MB_CODE_DO_REDO:
> +				instr_do_redo(ms, NULL);
> +				break;

Why do you need the casts to MB_Word here?

s/(MB_Word)/(MB_Word) /g
                     ^

Better do a grep for it:

	grep ')[A-Za-z]' *.c *.h

or

	grep -n ')[A-Za-z]' *.c *.h | sed "s/:[0-9]*:/& /" | error -v


> +/* Check which function we are in & save pointers to temp & var slots
> */
> +void
> +MB_func_type_check(MB_Machine_State* ms)

I think "function" is not the right word.
Perhaps you mean "check what code_model the procedure are in has, ..."?

Also the function name doesn't match the comment.

>  {
> -	MB_Bytecode bc;
> +	/*
> +	** The old method (based on stack frame sizes) wasn't as good as
> +	** this one because a det function can contain commits within which

s/function/procedure/

> +	** temporary nondet stack frames are on top of the nondet stack,
> +	** So you can't tell whether a nondet or det function was really

Likewise.

> +	/* Check that we are actually in a function and not just entering one */

Likewise.

> +	if (MB_code_get_id(ip) != MB_BC_enter_proc) {
...
> +	} else {
> +		SAY(" not getting func type of unentered function");

Likewise.

> +/* Get a variable */
> +/* XXX no range check (even when debugging) */
> +MB_Word
> +MB_var_get(MB_Machine_State* ms, MB_Word idx)
>  {
> -	if (adr < 0 || adr >= ms->code.count)
> -		return MB_BC_debug_invalid;
> -	return ms->code.id[adr];
> +	return ms->cur_proc.var[-idx];
>  }

The comment "Get a variable" is not much more informative
than the function name "MB_var_get".

Why `-idx' rather than `idx'?
That should be explained in comments.

> -/* Get the bytecode argument at a given address */
> -MB_Bytecode_Arg*
> -MB_code_get_arg(MB_Machine_State* ms, MB_Word adr)
> +/* Set a variable on the det stack */
> +void
> +MB_var_set(MB_Machine_State* ms, MB_Word idx, MB_Word value)
>  {
> -	MB_Word data_index;
> +	ms->cur_proc.var[-idx] = value;
> +}

Likewise for -idx here.

> +static void instr_invalid	(MB_Machine_State*ms,const MB_Bytecode_Arg*bca);

s/,/, /g

Another one to grep for:

	grep ',[^ ]' *.h *.c

> +static void (*instruction_table[])(MB_Machine_State*, const
> MB_Bytecode_Arg*) = {

It would be clearer to use a typedef.

> +	instr_builtin_binop,
> +	instr_builtin_unop,	/**** unop */
> +	instr_builtin_bintest,
> +	instr_builtin_untest,	/**** unop test */

`untest' is not a very good name.
I think it would be better to rename untest as unop_test
and then delete those two comments.

> +static void
> +instr_invalid(MB_Machine_State* ms, const MB_Bytecode_Arg* bca)
> +{
> +	assert(FALSE);
> +}

It would be better to call MB_fatal() here.

> +/* Enter/exit procedure */
> +static void
> +instr_enter_proc(MB_Machine_State* ms, const MB_Bytecode_Arg* bca)
> +{
> +	switch (bca->enter_proc.det) {
> +		case MB_DET_SEMIDET:
> +			MB_fatal("semidet");
> +		case MB_DET_DET: {
> +			MB_Word detframe_size = 
> +					bca->enter_proc.temp_count+
> +					bca->enter_proc.list_length+
> +					MB_DETFRAME_SIZE;
> +			MB_incr_sp(detframe_size);
> +
> +			/* save succip */
> +			MB_stackitem(MB_DETFRAME_SUCCIP) = (MB_Word)MB_succip;
> +			MB_stackitem(MB_DETFRAME_BC_SUCCIP) = (MB_Word)NULL;

s/(MB_Word)/(MB_Word) /g

> +			MB_ip_set(ms, MB_ip_get(ms)+1);

s/+/ + /

> +		case MB_DET_MULTIDET:
> +		case MB_DET_NONDET: {
>  
> +			#if 0
> +			MB_fatal("enter_proc/multidet,nondet");

The `#if 0' should be after the call to MB_fatal(),
shouldn't it?

> +	if (bca->enter_proc.det == MB_DET_SEMIDET) {
> +		/*
> +		** If a semidet procedure then mark our success slot as failure
> +		** until we know otherwise.
> +		*/
> +		MB_temp_set(ms, MB_SEMIDET_SUCCESS_SLOT, MB_SEMIDET_FAILURE);

Rather than checking for MB_DET_SEMIDET (determinism semidet)
you should be checking that it has a model_semi code model.
In other words, `MB_DET_CC_NONDET' and `MB_DET_FAILURE'
should be treated the same as MB_DET_SEMIDET.
See compiler/code_model.m (although you'll have to write
your own code to do it, rather than reusing the code there).

> +				/* allocate heap memory */
> +				heap_data = (MB_Word*)MB_GC_malloc(
> +					sizeof(MB_Word) * (list_length + extra),
> +					MB_GC_NOT_ATOMIC);

Please define an MB_GC_NEW_ARRAY() macro, like MR_GC_NEW_ARRAY(), and
use that.

> +				for (i = 0; i < list_length; i++)
> +				{

The `{' should be on the same line as the `for'.

> +					heap_data[i+extra] =

s/+/ + /

> +instr_construct(MB_Machine_State* ms, const MB_Bytecode_Arg* bca)
>  {
> +	MB_Word val;
> +	/* construct a variable into a slot */
> +	/* XXX */

There should be a comment explaining what the XXX is for.

>  
>  /*
> -**	XXX: Currently we depend on the order of elements in the table.
> +** XXX ORDER Currently we depend on the order of elements in the table.
>  */
> -static MB_Word (*binop_table[])(MB_Machine_State* ms, const
> MB_Bytecode_Arg* bca) = {
> +static MB_Word (*binop_table[])(MB_Machine_State* ms,
> +			const MB_Bytecode_Arg* bca) =
> +{

Please use a typedef.

> +/* Execute the current instruction. Returns false if instruction could
> */
> +/* not be executed */

Use

	/*
	** ...
	** ...
	*/

not

	/* ... */
	/* ... */

>  /* Single step execute */
>  void
>  MB_step(MB_Machine_State* ms)
>  {
> +#if 0
>  	MB_Word ip = MB_ip_get(ms);
>  
>  	MB_Byte bc_id = MB_code_get_id(ms, ip);
> -	if (bc_id >= sizeof(instruction_table) / sizeof(instruction_table[0]))
> {
> +	if (!dispatch(bc_id, ms)) {
> +		MB_fatal("Invalid instruction encountered\n");
>  		instr_noop(ms, NULL);
> -	} else {
> -		instruction_table[bc_id](ms, MB_code_get_arg(ms, ip));
>  	}
> +#endif
>  }

Why is that code commented out (with `#if 0')?

If it is commented out because it is not yet implemented
properly, shouldn't there be a call to MB_fatal()?

>  void
>  MB_step_over(MB_Machine_State* ms)
>  {
> +#if 0

Likewise.

>  /* Run until invalid instruction or debug_trap bytecode encountered */
>  void
>  MB_run(MB_Machine_State* ms)
>  {
> +#if 0

Likewise.

[to be continued]

-- 
Fergus Henderson <fjh at cs.mu.oz.au>  |  "I have always known that the pursuit
                                    |  of excellence is a lethal habit"
WWW: <http://www.cs.mu.oz.au/~fjh>  |     -- the last words of T. S. Garp.
--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to:       mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions:          mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------



More information about the developers mailing list