[m-dev.] Tool for generating DU types in C

Ralph Becket rafe at csse.unimelb.edu.au
Sun Mar 21 22:58:09 AEDT 2010


Is there a sensible place to put this?

Following a conversation with Andrew last week, I've spent some time
this weekend putting the attached awk script together to generate all
the boilerplate for discriminated union types in C.

This is the sort of thing I find myself churning out all too often
when hacking C.

As an example, the following file, 'a_b_tree.du':

    @ type a_b_tree
    @ constructor a
    @ constructor b
    @ constructor branch
    l : a_b_tree
    r : a_b_tree
    @{
        if (l == NULL || r == NULL) {
        fprintf(stderr, "a_b_tree: branch cannot have NULL arguments.\n");
        }
    }@
    @ end

will, when run through 'gen-du-type a_b_tree.du', produce 'a_b_tree.h':

    /* Automatically generated from a_b_tree.du. */

    #if !defined(__a_b_tree_h__)
    #define __a_b_tree_h__

    typedef struct _a_b_tree *a_b_tree;

    struct a_b_tree_a {
        int _tag; /* Private. */
    };

    /* Construct a 'a' value. */
    extern a_b_tree
    a();

    /* Returns NULL on a mismatch. */
    extern struct a_b_tree_a *
    is_a(a_b_tree this);

    struct a_b_tree_b {
        int _tag; /* Private. */
    };

    /* Construct a 'b' value. */
    extern a_b_tree
    b();

    /* Returns NULL on a mismatch. */
    extern struct a_b_tree_b *
    is_b(a_b_tree this);

    struct a_b_tree_branch {
        int _tag; /* Private. */
        a_b_tree l;
        a_b_tree r;
    };

    /* Construct a 'branch' value. */
    extern a_b_tree
    branch(a_b_tree l, a_b_tree r);

    /* Returns NULL on a mismatch. */
    extern struct a_b_tree_branch *
    is_branch(a_b_tree this);

    #endif

and 'a_b_tree.c':

    /* Automatically generated from a_b_tree.du. */

    #include <stdio.h>
    #include <stdlib.h>
    #include "a_b_tree.h"

    struct _a_b_tree {};

    #define _a_b_tree_a_tag 0
    #define _a_b_tree_b_tag 1
    #define _a_b_tree_branch_tag 2

    struct a_b_tree_a _a_b_tree_a_sentinel =
        { _a_b_tree_a_tag };

    a_b_tree
    a()
    {
        return (a_b_tree)&_a_b_tree_a_sentinel;
    }

    struct a_b_tree_a *
    is_a(a_b_tree this)
    {
        struct a_b_tree_a *ctor =
            (struct a_b_tree_a *)this;
        return (ctor == NULL || ctor->_tag != _a_b_tree_a_tag)
             ? NULL
             : ctor;
    }

    struct a_b_tree_b _a_b_tree_b_sentinel =
        { _a_b_tree_b_tag };

    a_b_tree
    b()
    {
        return (a_b_tree)&_a_b_tree_b_sentinel;
    }

    struct a_b_tree_b *
    is_b(a_b_tree this)
    {
        struct a_b_tree_b *ctor =
            (struct a_b_tree_b *)this;
         return (ctor == NULL || ctor->_tag != _a_b_tree_b_tag)
              ? NULL
              : ctor;
    }

    a_b_tree
    branch(a_b_tree l, a_b_tree r)
    {
        struct a_b_tree_branch *this =
        (struct a_b_tree_branch *)
            malloc(sizeof(struct a_b_tree_branch));
        if (this == NULL) {
            perror("a_b_tree:branch");
            exit(1);
        }
        this->_tag = _a_b_tree_branch_tag;
        this->l = l;
        this->r = r;
        if (l == NULL || r == NULL) {
            fprintf(stderr, "a_b_tree: branch cannot have NULL arguments.\n");
        }
        return (a_b_tree)this;
    }

    struct a_b_tree_branch *
    is_branch(a_b_tree this)
    {
        struct a_b_tree_branch *ctor =
            (struct a_b_tree_branch *)this;
        return (ctor == NULL || ctor->_tag != _a_b_tree_branch_tag)
             ? NULL
             : ctor;
    }

-------------- next part --------------
#!/usr/bin/awk -f
# gen-du-type
# Ralph Becket <rafe at csse.unimelb.edu.au>
# Thu Mar 18 14:17:45 EST 2010
# vim: ft=awk ts=4 sw=4 et
#
# Automatically generate constructors and deconstructors for discriminated
# union types (DUs) from a specification.
#
#
# TODO
# - Optimise the case where there is only a single data constructor
#   (we don't need the tag or the auxiliary struct).
# - Provide support for using non-malloc allocators.
#
#
# DONE
# - Support @{ ... }@ extensions to constructors
#   (e.g., to support validity testing of arguments).
# - Optimise for empty constructors
#   (we can simply use a statically allocated var in the .c file).
#
#
# A specification file for a DU looks like this (lines starting with
# '//', '/*', or '**' or ending with '*/' are taken to be comments and
# are ignored).
#
#   @ type type_name
#
#   @ public_header
#   ... code that will appear at the top of the .h file
#
#   @ private_header
#   ... code that will appear at the top of the .c file
#
#   @ constructor ctor_name
#   field_name : c_type
#   ...
#   @{
#       ... optional initialisation or checking code
#       ... ('this' points to the new, populated structure)
#   }@
#
#   ...
#
#   @ public_footer
#   ... code that will appear at the end of the .h file
#
#   @ private_footer
#   ... code that will appear at the end of the .c file
#
#   @ end
#
#
# Example 1: a singly linked list of ints:
#
#   @ type int_list
#   @ constructor cons
#   head : int
#   tail : int_list
#   @ end
#
# This will generate in int_list.h:
#
#   #if !defined(__int_list_h__)
#   #define __int_list_h__
#
#   typedef struct _int_list *int_list;
#
#   struct int_list_cons {
#       int _tag /* Private. */;
#       int head;
#       int_list tail;
#   };
#
#   extern int_list
#   cons(int head, int_list tail);
#
#   extern struct int_list_cons *
#   is_cons(int_list this);
#
#   #endif
#
# and in int_list.c:
#
#   #include <stdio.h>
#   #include "int_list.h"
#
#   struct _int_list {};
#
#   #define _int_list_cons_tag 0;
#
#   extern int_list
#   cons(int head, int_list tail)
#   {
#       struct _int_list_cons *this =
#           (struct _int_list_cons *)malloc(sizeof(struct _int_list_cons));
#
#       if (this == NULL) {
#           perror("int_list:cons");
#           exit(1);
#       }
#
#       this->_tag = _int_list_cons_id;
#       this->head = head;
#       this->tail = tail;
#
#       return (int_list)this;
#   }
#
#   extern struct int_list_cons *
#   is_cons(int_list this)
#   {
#       struct int_list_cons *ctor = (struct _int_list_cons *)this;
#
#       return (ctor == NULL || ctor->_tag != _int_list_cons_tag)
#            ? NULL
#            : ctor;
#   }
#
#
# Example 2: a tree of ints:
#
#   @ type int_tree
#   @ constructor leaf
#   data : int
#   @ constructor branch
#   l : int_tree
#   r : int_tree
#   @{
#       if (l == NULL || r == NULL) {
#           fprintf(stderr, "branch arguments cannot be NULL\n");
#           exit(1);
#       }
#   }@
#   @ end

BEGIN {
    comment_regex = "(^[[:space:]]*[*/][*/])|([*][/][[:space:]]*$)";

    looking_for_type_name           = 1;
    looking_for_public_header       = 1 + looking_for_type_name;
    scanning_public_header          = 1 + looking_for_public_header;
    looking_for_private_header      = 1 + scanning_public_header;
    scanning_private_header         = 1 + looking_for_private_header;
    looking_for_constructor         = 1 + scanning_private_header;
    scanning_constructor            = 1 + looking_for_constructor;
    scanning_constructor_code       = 1 + scanning_constructor;
    looking_for_public_footer       = 1 + scanning_constructor_code;
    scanning_public_footer          = 1 + looking_for_public_footer;
    looking_for_private_footer      = 1 + scanning_public_footer;
    scanning_private_footer         = 1 + looking_for_private_footer;
    looking_for_end                 = 1 + scanning_private_code;

    n_ctors = 0;
    ctor_no = 0;
    ctor_field_no = 0;

    state = looking_for_type_name;
    aborted = 0;
}

#{
# print state, $0;
#}

# Skip over blank lines and comments.
#
NF == 0 || $0 ~ comment_regex {
    next;
}

state == looking_for_type_name {

    # Otherwise expect '@ type type_name' and start the .h and .c files.
    #
    if (NF == 3 && $1 == "@" && $2 == "type") {

        type_name = $3;
        h_file = type_name".h";
        c_file = type_name".c";
        state = looking_for_public_header;

        provenance = sprintf("/* Automatically generated from %s. */\n\n",
            FILENAME);

        printf(provenance) > h_file;
        printf("#if !defined(__%s_h__)\n", type_name) >> h_file;
        printf("#define __%s_h__\n", type_name) >> h_file;
        printf("\n") >> h_file;

        printf(provenance) > c_file;
        printf("#include <stdio.h>\n") >> c_file;
        printf("#include <stdlib.h>\n") >> c_file;
        printf("#include \"%s\"\n", h_file) >> c_file;
        printf("\n") >> c_file;

        next;

    } else {

        abort("syntax error while looking for '@ type <type_name>'.");

    }
}

state == looking_for_public_header {
    if ($1 == "@") {
        if (NF == 2 && $2 == "public_header") {
            state = scanning_public_header;
            next;
        } else {
            state = looking_for_private_header;
        }
    } else {
        abort("syntax error while looking for\n" \
            "  '@ public_header',\n" \
            "  '@ private_header', or\n" \
            "  '@ constructor <constructor_name>'.");
    }
}

state == scanning_public_header {
    if ($1 == "@") {
        printf("\n") >> h_file;
        state = looking_for_private_header;
    } else {
        printf("%s\n", $0) >> h_file;
        next;
    }
}

state == looking_for_private_header {
    if ($1 == "@") {
        if (NF == 2 && $2 == "private_header") {
            state = scanning_private_header;
            next;
        } else {
            state = looking_for_constructor;
        }
    } else {
        abort("syntax error while looking for\n" \
            "  '@ public_header',\n" \
            "  '@ private_header', or\n" \
            "  '@ constructor <constructor_name>'.");
    }
}

state == scanning_private_header {
    if ($1 == "@") {
        printf("\n") >> c_file;
        state = looking_for_constructor;
    } else {
        printf("%s\n", $0) >> c_file;
        next;
    }
}

state == looking_for_constructor {
    if ($1 == "@") {
        if (NF == 3 && $2 == "constructor") {
            new_ctor($3);
            state = scanning_constructor;
            next;
        } else {
            output_du_type();
            state = looking_for_public_footer;
        }
    } else {
        abort("syntax error while looking for\n" \
            "  '@ constructor <constructor_name>',\n" \
            "  '@ public_footer', or" \
            "  '@ private_footer', or" \
            "  '@ end'.");
    }
}

state == scanning_constructor {

    if ($1 == "@{") {

        ctor_n_fields[ctor_no] = ctor_field_no;

        if (NF == 1) {
            state = scanning_constructor_code;
            next;
        } else {
            abort("syntax error while scanning constructor:\n" \
                "  '@{' must appear on a line on its own.");
        }

    } else if (NF >= 3 && $1 != "@" && $2 == ":") {

# print "read field", $1
        ctor_field_name[ctor_no, ctor_field_no] = $1;
        field_type = $3;
        for(i = 4; i <= NF; i++) {
            field_type = field_type" "$i;
        }
        ctor_field_type[ctor_no, ctor_field_no] = field_type;
        n_code_lines = 0;
        ctor_field_no++;
        next;

    } else {

        ctor_n_fields[ctor_no] = ctor_field_no;

        if ($1 == "@") {
            if (NF == 3 && $2 == "constructor") {
                new_ctor($3);
                next;
            } else {
                output_du_type();
                state = looking_for_public_footer;
            }
        } else {
            abort("syntax error while looking for\n" \
                "  '<field_name> : <field_type>',\n" \
                "  '@{ ... }@',\n" \
                "  '@ constructor <constructor_name>',\n" \
                "  '@ public_footer', or\n" \
                "  '@ private_footer', or\n" \
                "  '@ end'.");
        }

    }
}

state == scanning_constructor_code {
    if ($1 == "@}") {
        abort("syntax error while scanning constructor code:\n" \
            "  terminator is '}@', not '@}'.");
    } else if ($1 == "}@") {
        if (NF == 1) {
            n_ctor_code_lines[ctor_no] = n_code_lines;
            state = looking_for_constructor;
            next;
        } else {
            abort("syntax error while scanning constructor code:\n" \
                "  '}@' must appear on a line on its own.");
        }
    } else if ($1 == "@") {
        abort("syntax error while scanning constructor code:\n" \
            "  expected '}@' before next '@' directive.");
    } else {
        ctor_code_line[ctor_no, n_code_lines++] = $0;
        next;
    }
}

state == looking_for_public_footer {
    if ($1 == "@") {
        if (NF == 2 && $2 == "public_footer") {
            state = scanning_public_footer;
            next;
        } else {
            state = looking_for_private_footer;
        }
    }
}

state == scanning_public_footer {
    if ($1 == "@") {
        printf("\n") >> h_file;
        state = looking_for_private_footer;
    } else {
        printf("%s\n", $0) >> h_file;
        next;
    }
}

state == looking_for_private_footer {
    if ($1 == "@") {
        if (NF == 2 && $2 == "private_footer") {
            state = scanning_private_footer;
            next;
        } else {
            state = looking_for_end;
        }
    }
}

state == scanning_private_footer {
    if ($1 == "@") {
        printf("\n") >> c_file;
        state = looking_for_end;
    } else {
        printf("%s\n", $0) >> c_file;
        next;
    }
}

state == looking_for_end {
    if ($1 == "@" && NF == 2 && $2 == "end") {
        end();
        state = -1; # Dummy state - ignore anything else.
    } else {
        abort("syntax error while looking for '@ end'.");
    }
}

{
    abort("internal error!");
}

END {
# print "END", aborted
    if (!aborted && state != looking_for_end) {
        abort("syntax error: missing '@ end' terminator.");
    }
}

function new_ctor(name) {
    ctor_no = n_ctors++;
    ctor_name[ctor_no] = name;
    ctor_field_no = 0;
# print "found constructor", ctor_no, "named", name
}

function output_du_type() {

# print n_ctors, "constructors"

    tab = "    ";

    tn = type_name;

    # FILL IN THE .h FILE

    # Declare a typedef as a name for the hidden DU type representation.
    #
    printf("typedef struct _%s *%s;\n", tn, tn) >> h_file;
    printf("\n") >> h_file;

    # Write the structs, constructors, and deconstructors for each
    # constructor to the .h file.
    #
    for(c = 0; c < n_ctors; c++) {

        cn = ctor_name[c];
        cnf = ctor_n_fields[c];
 print "constructor", c, "name", cn, "fields", cnf

        # The struct.
        #
        #   struct type_name_ctor_name {
        #       int _tag;
        #       field_1_type field_1_name;
        #       ...
        #       field_n_type field_n_name;
        #   };
        #
        printf("struct %s_%s {\n", tn, cn) >> h_file;
        # Don't forget the tag field.
        printf("%sint _tag; /* Private. */\n", tab) >> h_file;
        for (f = 0; f < cnf; f++) {
            cfn = ctor_field_name[c, f];
            cft = ctor_field_type[c, f];
            printf("%s%s %s;\n", tab, cft, cfn) >> h_file;
        }
        printf("};\n") >> h_file;
        printf("\n") >> h_file;

        # The constructor.
        #
        #   extern type_name
        #   ctor_name(field_1_type field_1_name, ...);
        #
        printf("/* Construct a '%s' value. */\n", cn) >> h_file;
        printf("extern %s\n", tn) >> h_file;
        printf("%s", ctor_name[c]) >> h_file;
        output_ctor_args(h_file, c);
        printf(";\n") >> h_file;
        printf("\n") >> h_file;

        # The deconstructor.
        #
        #   extern struct type_name_ctor_name *
        #   is_ctor_name(type_name this);
        #
        printf("/* Returns NULL on a mismatch. */\n") >> h_file;
        printf("extern struct %s_%s *\n", tn, cn) >> h_file;
        printf("is_%s(%s this);\n", cn, tn) >> h_file;
        printf("\n") >> h_file;

    }

    # FILL IN THE .c FILE

    # The abstract DU type struct is just a dummy.
    #
    printf("struct _%s {};\n", tn) >> c_file;
    printf("\n") >> c_file;

    # The constructor tags.
    #
    #   #define _type_name_ctor_1_name_tag 1
    #   ...
    #   #define _type_name_ctor_n_name_tag n
    #
    for(c = 0; c < n_ctors; c++) {
        cn = ctor_name[c];
        printf("#define _%s_%s_tag %d\n", tn, cn, c) >> c_file;
    }
    printf("\n") >> c_file;

    for(c = 0; c < n_ctors; c++) {

        cn = ctor_name[c];
        cnf = ctor_n_fields[c];
        sn = sprintf("struct %s_%s", tn, cn);

        if (cnf == 0) {
            # The constructor body (constructor has no arguments).
            #
            #   struct type_name_ctor_name _type_name_ctor_name_sentinel =
            #       { _type_name_ctor_name_tag };
            #
            #   type_name
            #   ctor_name()
            #   {
            #       return (type_name)&_type_name_ctor_name_sentinel;
            #   }
            #
            printf("%s _%s_%s_sentinel =\n", sn, tn, cn) >> c_file;
            printf("%s{ _%s_%s_tag };\n", tab, tn, cn) >> c_file;
            printf("\n") >> c_file;
            printf("%s\n", tn) >> c_file;
            printf("%s()", cn) >> c_file;
            printf("\n{\n") >> c_file;
            for(i = 0; i < 0 + n_ctor_code_lines[ctor_no]; i++) {
                printf("%s\n", ctor_code_line[ctor_no, i]) >> c_file;
            }
            printf("%sreturn (%s)&_%s_%s_sentinel;\n", \
                tab, tn, tn, cn) >> c_file;
            printf("}\n") >> c_file;
            printf("\n") >> c_file;

        } else{
            # The constructor body (constructor has some arguments).
            #
            #   type_name
            #   ctor_name(field_1_type field_1_name, ...)
            #   {
            #       struct type_name_ctor_name *this =
            #           (struct type_name_ctor_name *)
            #               malloc(sizeof(struct type_name_ctor_name));
            #       if (this == NULL) {
            #           perror("type_name:ctor_name");
            #           exit(1);
            #       }
            #       this->_tag = _type_name_ctor_tag;
            #       this->field_1_name = field_1_name;
            #       ...
            #       return (type_name)this;
            #   }
            #
            printf("%s\n", tn) >> c_file;
            printf("%s", cn) >> c_file;
            output_ctor_args(c_file, c);
            printf("\n{\n") >> c_file;
            printf("%s%s *this =\n", tab, sn) >> c_file;
            printf("%s%s(%s *)\n", tab, tab, sn) >> c_file;
            printf("%s%s%smalloc(sizeof(%s));\n", tab, tab, tab, sn) >> c_file
            printf("%sif (this == NULL) {\n", tab) >> c_file;
            printf("%s%sperror(\"%s:%s\");\n", tab, tab, tn, cn) >> c_file;
            printf("%s%sexit(1);\n", tab, tab) >> c_file;
            printf("%s}\n", tab) >> c_file;
            printf("%sthis->_tag = _%s_%s_tag;\n", tab, tn, cn) >> c_file;
            for (f = 0; f < cnf; f++) {
                cfn = ctor_field_name[c, f];
                printf("%sthis->%s = %s;\n", tab, cfn, cfn) >> c_file;
            }
            for(i = 0; i < 0 + n_ctor_code_lines[ctor_no]; i++) {
                printf("%s\n", ctor_code_line[ctor_no, i]) >> c_file;
            }
            printf("%sreturn (%s)this;\n", tab, tn) >> c_file;
            printf("}\n") >> c_file;
            printf("\n") >> c_file;
        }

        # The deconstructor body.
        #
        #   struct type_name_ctor_name *
        #   is_ctor_name(type_name this)
        #   {
        #       struct type_name_ctor_name *ctor =
        #           (struct type_name_ctor_name *)this;
        #       return (ctor == NULL || ctor->_tag != _type_name_ctor_name_tag)
        #            ? NULL
        #            : ctor;
        #   }
        #
        printf("%s *\n", sn) >> c_file;
        printf("is_%s(%s this)\n", cn, tn) >> c_file;
        printf("{\n") >> c_file;
        printf("%s%s *ctor =\n", tab, sn) >> c_file;
        printf("%s%s(%s *)this;\n", tab, tab, sn) >> c_file;
        printf("%s return (ctor == NULL || ctor->_tag != _%s_%s_tag)\n", \
            tab, tn, cn) >> c_file
        printf("%s      ? NULL\n", tab) >> c_file;
        printf("%s      : ctor;\n", tab) >> c_file;
        printf("}\n") >> c_file;
        printf("\n") >> c_file;
    }

}

function output_ctor_args(file, c,    f) {
# print "constructor", c, "has", ctor_n_fields[c], "fields"
    printf("(") >> file;
    for(f = 0; f < ctor_n_fields[c]; f++) {
# print "field", f, "is named", ctor_field_name[c, f]
        if (f > 0) {
            printf(", ") >> file;
        }
        printf("%s %s", ctor_field_type[c, f], ctor_field_name[c, f]) >> file;
    }
    printf(")") >> file;
}

function end() {
    printf("#endif\n") >> h_file;
    close(h_file);
    close(c_file);
    exit 0;
}

function abort(msg) {
 # printf("%s:%d: %s\n", FILENAME, NR, msg);
    printf("%s:%d: %s\n", FILENAME, NR, msg) > "/dev/stderr";
    aborted = 1;
    exit 1;
}


More information about the developers mailing list