[m-dev.] Mercury's non-local gotos and gcc 4

Peter Hawkins peter at hawkins.emu.id.au
Fri Jul 8 13:21:16 AEST 2005


Hi...

I've been delving into why Mercury's asm_fast grade and gcc 4.0 aren't 
happy together. Consider the following module:
:- module hello.
:- interface.
:- import_module io.
:- pred main(io.state::di, io.state::uo) is det.
:- implementation.
:- import_module int.

:- func fn(int, int) = int.
fn(X, Y) = X + Y.

main(!IO) :-
    X = apply(fn,2,3),
    io.print(X, !IO).

I compiled this module to C using the ROTD from the 7th of July 2005 
(the compiler was built with gcc 3.4).
When the resulting C code was built with gcc 3.4 or gcc 4.0 with -O0, 
the module worked fine. When built with gcc 4.0 with -O1 the module 
crashed with a segmentation fault. An examination of the generated C and 
assembly reveals why...

The relevant C code is this:
MR_BEGIN_MODULE(hello_module1)
    MR_init_entry1(fn__hello__fn_2_0);
MR_BEGIN_CODE

MR_def_static(fn__hello__fn_2_0)
    MR_r1 = ((MR_Integer) MR_r1 + (MR_Integer) MR_r2);
    MR_proceed();
MR_END_MODULE

When preprocessed and run through indent, this turns into this:
static void
hello_module1 (void)
{
  __asm__ __volatile__ (""::"g" (hello_module1));
  __asm__ __volatile__ (""::"g" (&&hello_module1_dummy_label));
  goto *MR_dummy_identify_function (&&hello_module1_dummy_label);
hello_module1_dummy_label:{
    __asm__ __volatile__ (""::"g" (&&mercury__fn__hello__fn_2_0));;;
  }
  return;
  {
  _entry_mercury__fn__hello__fn_2_0:__asm__ __volatile__ (" .type 
_entry_" "mercury__fn__hello__fn_2_0"
              ", at function\n" "_entry_"
              "mercury__fn__hello__fn_2_0" ":\n");
  skip_mercury__fn__hello__fn_2_0:;
  }
mercury__fn__hello__fn_2_0: __asm__ __volatile__ ("": :"g" 
(&&_entry_mercury__fn__hello__fn_2_0));
  {;
    (MR_mr2) =
      ((MR_Integer) (MR_mr2) +
       (MR_Integer) (((MR_engine_base.MR_eng_fake_reg))[3]));

... followed by more irrelevant junk.

Here is the resulting (working) i386 assembly produced by gcc-3.4 with -O1:
.LFE30:
    .size   hello_module0, .-hello_module0
    .type   hello_module1, @function
hello_module1:
.LFB31:
    .loc 1 148 0
    pushl   %ebp
.LCFI4:
    movl    %esp, %ebp
.LCFI5:
    subl    $20, %esp
.LCFI6:
    .loc 1 148 0
    pushl   $.L16
.LCFI7:
    call    MR_dummy_identify_function
    addl    $16, %esp
.L17:
    jmp *%eax
.L16:
    .loc 1 149 0
    .loc 1 155 0
    leave
    ret
.L19:
    .loc 1 152 0
#APP
        .type _entry_mercury__fn__hello__fn_2_0, at function
_entry_mercury__fn__hello__fn_2_0:

.L20:
#NO_APP
.L18:
    .loc 1 153 0
    addl    MR_engine_base+12, %ebx
.LBB4:
    .loc 1 154 0
.LBE4:
    movl    %edi, %eax
    jmp .L17

... plus a bit more irrelevant stuff.

Here is the same code (non-working) as produced by gcc-4.0 with -O1.
.LFE30:
    .size   hello_module0, .-hello_module0
    .type   hello_module1, @function
hello_module1:
.LFB31:
    .loc 1 148 0
    pushl   %ebp
.LCFI4:
    movl    %esp, %ebp
.LCFI5:
    subl    $20, %esp
.LCFI6:
    .loc 1 148 0
    pushl   $.L14
.LCFI7:
    call    MR_dummy_identify_function
    addl    $16, %esp
    .loc 1 153 0
    movl    MR_engine_base+12, %edx
    movl    %edx, -8(%ebp)
    .loc 1 154 0
    movl    %edi, -4(%ebp)
.LVL6:
.L15:
    jmp *%eax
.L14:
    .loc 1 149 0
    .loc 1 155 0
    leave
.LVL7:
    ret
.LVL8:
.L17:
    .loc 1 152 0
#APP
        .type _entry_mercury__fn__hello__fn_2_0, at function
_entry_mercury__fn__hello__fn_2_0:

#NO_APP
.L16:
    .loc 1 153 0
    addl    -8(%ebp), %ebx
.LBB4:
    .loc 1 154 0
    movl    -4(%ebp), %eax
    jmp .L15

In other words, gcc-4.0 reorders some of the calculations to occur 
before the mercury function entry point, thus breaking the non-local gotos.

I assume that the 'return' that mercury generates before the function 
entry point is supposed to stop this sort of thing happening, but 
apparently it no longer works. Perhaps gcc knows that there is only one 
possible target of a computed goto in the function and therefore it can 
hoist code out of it?

Turning off optimization for gcc 4 seems to fix the problem, but it's 
hardly ideal. I don't think the optimization causing the problem is 
controlled by a '-f' flag, since the combination of '-O1' with 
'-fno-...' for each flag implied by '-O1' still produces buggy code.

Any suggestions as to how to convince gcc not to do this?

=)
Peter
--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to:       mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions:          mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------



More information about the developers mailing list