[m-rev.] Re: for review: parsing_utils improvements

Ian MacLarty maclarty at csse.unimelb.edu.au
Tue Sep 29 15:02:59 AEST 2009


On Tue, Sep 29, 2009 at 02:32:21PM +1000, Ralph Becket wrote:
> Ralph Becket, Tuesday, 29 September 2009:
> > (3) Why are errors not part of the ordinary parser result?  If you want
> > a "quick bail out" error scheme, you can throw an exception.
> 

I don't want a "quick bail out" error scheme.

> I guess what I'm looking for here is a demonstration that your changes
> make it noticably easier to handle errors than with the existing
> interface.

Consider this partial sparql (http://www.w3.org/TR/rdf-sparql-query/)
parser that I wrote.  I've attached a copy of the old version which
uses a result type to keep track of errors (sparql.m).  I've also
attached a reworked version (sparql.m.new) using the new error handling.
It is considerably shorter and would have been much less of a pain to
write.

If you can tell me a better way to implement the error handling using
the old system, then please let me know.

Ian.
-------------- next part --------------
%----------------------------------------------------------------------------%
:- module sparql.
% A parser for a subset of SPARQL, with some custom extensions (in particular
% you can use ~"val" for literal_like expressions).
%----------------------------------------------------------------------------%

:- interface.

:- import_module flat_terms.
:- import_module namespace.
:- import_module rdf.
:- import_module result.

:- import_module list.

%----------------------------------------------------------------------------%

:- type var == string.
:- type sparql_term(T) == flat_terms.term(var, T).
:- type prefixes == namespace.namespace_abbreviations.

:- type triple_pattern
    --->    triple_pattern(
                subj_term       :: sparql_term(rdf.subject),
                pred_term       :: sparql_term(rdf.predicate),
                obj_term        :: sparql_term(object_pattern)
            ).

:- type object_pattern
    --->    object(rdf.object)
    ;       literal_like(string).

:- type sparql_command
    --->    select(list(var), list(triple_pattern))
    ;       delete(list(triple_pattern), list(triple_pattern))
    ;       insert(list(triple_pattern), list(triple_pattern)).

:- type parse_error
    --->    parse_error(
                line        :: int,
                position    :: int,
                message     :: string
            ).

:- pred parse_sparql_command(prefixes::in, string::in, 
    result(sparql_command, parse_error)::out) is det.

%----------------------------------------------------------------------------%
:- implementation.

:- import_module language_tag.
:- import_module special_uri.
:- import_module uri.

:- import_module char.
:- import_module maybe.
:- import_module parsing_utils.
:- import_module string.

%----------------------------------------------------------------------------%

parse_sparql_command(Prefixes, Str, Result) :-
    some [!PS] (
        new_src_and_ps(Str, consume_whitespace_and_comments, Src, !:PS),
        ( sparql_command(Prefixes, Src, Result0, !.PS, PS1) ->
            !:PS = PS1,
            ( Result0 = ok(_),
                ( eof(Src, _, !.PS, _) ->
                    Result1 = Result0
                ;
                    current_offset(Src, OS1, !PS),
                    Result1 = error({OS1, "trailing characters"})
                )
            ; Result0 = error(_),
                Result1 = Result0
            )
        ;
            current_offset(Src, OS2, !PS),
            Result1 = error({OS2, "unrecognised command"})
        ),
        !.PS = _,
        ( Result1 = ok(Cmd),
            Result = ok(Cmd)
        ; Result1 = error({Offset, Msg}),
            offset_to_line_number_and_position(src_to_line_numbers(Src),
                Offset, Line, Pos),
            Result = error(parse_error(Line, Pos, Msg))
        )
    ).
 
%----------------------------------------------------------------------------%

:- type pres(T) == result(T, {int, string}).

:- pred sparql_command(prefixes::in, src::in, pres(sparql_command)::out,
    ps::in, ps::out) is semidet.

sparql_command(Prefixes, Src, Res) -->
    identifier(init_id_chars ++ "@", id_chars, Src, Word0),
    { Word = string.to_lower(Word0) },
    ( { Word = "select" },
        select(Prefixes, Src, Res)
    ; { Word = "insert" },
        modify_template(Prefixes, Src, TempRes),
        ( { TempRes = ok({Template, Where}) },
            { Res = ok(insert(Template, Where)) }
        ; { TempRes = error(Err) },
            { Res = error(Err) }
        )
    ; { Word = "delete" },
        modify_template(Prefixes, Src, TempRes),
        ( { TempRes = ok({Template, Where}) },
            { Res = ok(delete(Template, Where)) }
        ; { TempRes = error(Err) },
            { Res = error(Err) }
        )
    ).

:- pred select(prefixes::in, src::in, pres(sparql_command)::out,
    ps::in, ps::out) is semidet.

select(Prefixes, Src, Res) -->
    one_or_more(variable, Src, Vars),
    ( identifier(init_id_chars, id_chars, Src, "where") ->
        group_graph_pattern(Prefixes, Src, PatternRes),
        ( { PatternRes = ok(Where) },
            { Res = ok(select(Vars, Where)) }
        ; { PatternRes = error(Err) },
            { Res = error(Err) }
        )
    ;
        current_offset(Src, Offset),
        { Res = error({Offset, "expecting where clause"}) }
    ).

:- pred modify_template(prefixes::in, src::in,
    pres({list(triple_pattern), list(triple_pattern)})::out,
    ps::in, ps::out) is semidet.

modify_template(Prefixes, Src, Res) -->
    group_graph_pattern(Prefixes, Src, TemplateRes),
    ( { TemplateRes = ok(Template) },
        (
            identifier(init_id_chars, id_chars, Src, WhereID),
            { string.to_lower(WhereID, "where") }
        ->
            group_graph_pattern(Prefixes, Src, WhereRes),
            ( { WhereRes = ok(Where) },
                { Res = ok({Template, Where}) }
            ; { WhereRes = error(Err) },
                { Res = error(Err) }
            )
        ;
            { Res = ok({Template, []}) }
        )
    ; { TemplateRes = error(Err) },
        { Res = error(Err) }
    ).

:- pred variable(src::in, var::out, ps::in, ps::out) is semidet.

variable(Src, Var) -->
    next_char(Src, C),
    { C = '?' ; C = '$' },
    identifier(init_id_chars, id_chars, Src, Var).

:- pred group_graph_pattern(prefixes::in, src::in,
    pres(list(triple_pattern))::out, ps::in, ps::out) is semidet.

group_graph_pattern(Prefixes, Src, Res) -->
    ( punct("{", Src, _) ->
        ( punct("}", Src, _) ->
            { Res = ok([]) }
        ;
            triples_block(Prefixes, Src, BlockRes),
            ( { BlockRes = ok(Block) },
                ( punct("}", Src, _) ->
                    { Res = ok(Block) }
                ;
                    current_offset(Src, OS),
                    { Res = error({OS, "expecting '}'"}) }
                )
            ; { BlockRes = error(Err) },
                { Res = error(Err) }
            )
        )
    ;
        current_offset(Src, OS),
        { Res = error({OS, "expecting '{'"}) }
    ).

:- pred triples_block(prefixes::in, src::in, pres(list(triple_pattern))::out,
    ps::in, ps::out) is semidet.

triples_block(Prefixes, Src, Res) -->
    triples_same_subject(Prefixes, Src, SameSubjRes),
    ( { SameSubjRes = ok(Block0) },
        ( punct(".", Src, _) ->
            ( peek_punct("}", Src) ->
                { Res = ok(Block0) }
            ;
                triples_block(Prefixes, Src, Res1),
                ( { Res1 = ok(Block1) },
                    { Res = ok(Block0 ++ Block1) }
                ; { Res1 = error(Err) },
                    { Res = error(Err) }
                )
            )
        ;
            { Res = ok(Block0) }
        )
    ; { SameSubjRes = error(Err) },
        { Res = error(Err) }
    ).

:- pred triples_same_subject(prefixes::in, src::in, pres(list(triple_pattern))::out,
    ps::in, ps::out) is semidet.

triples_same_subject(Prefixes, Src, Res) -->
    subj_term(Prefixes, Src, SubjTermRes),
    ( { SubjTermRes = ok(SubjTerm) },
        property_list_not_empty(Prefixes, SubjTerm, Src, Res)
    ; { SubjTermRes = error(Err) },
        { Res = error(Err) }
    ).

:- pred property_list_not_empty(prefixes::in, sparql_term(rdf.subject)::in, src::in,
    pres(list(triple_pattern))::out, ps::in, ps::out) is semidet.

property_list_not_empty(Prefixes, Subj, Src, Res) -->
    verb(Prefixes, Src, VerbRes),
    ( { VerbRes = ok(Verb) },
        object_list(Prefixes, Src, ObjListRes),
        ( { ObjListRes = ok(Objs) },
            { Triples0 = list.map(func(O) = triple_pattern(Subj, Verb, O),
                Objs) },
            (
                punct(";", Src, _),
                property_list_not_empty(Prefixes, Subj, Src, PLRes)
            ->
                ( { PLRes = ok(Triples1) },
                    { Res = ok(Triples0 ++ Triples1) }
                ; { PLRes = error(Err) },
                    { Res = error(Err) }
                )
            ;
                optional(punct(";"), Src, _),
                { Res = ok(Triples0) }
            )
        ; { ObjListRes = error(Err) },
            { Res = error(Err) }
        )
    ; { VerbRes = error(Err) },
        { Res = error(Err) }
    ).

:- pred object_list(prefixes::in, src::in,
    pres(list(sparql_term(object_pattern)))::out, ps::in, ps::out) is det.

object_list(Prefixes, Src, Res) -->
    object(Prefixes, Src, ObjRes),
    ( { ObjRes = ok(Obj0) },
        (
            punct(",", Src, _),
            object_list(Prefixes, Src, Res1)
        ->
            ( { Res1 = ok(Objs1) },
                { Res = ok([Obj0 | Objs1]) }
            ; { Res1 = error(Err) },
                { Res = error(Err) }
            )
        ;
            { Res = ok([Obj0]) }
        )
    ; { ObjRes = error(Err) },
        { Res = error(Err) }
    ).

:- pred subj_term(prefixes::in, src::in, pres(sparql_term(rdf.subject))::out,
    ps::in, ps::out) is det.

subj_term(Prefixes, Src, Res) -->
    ( variable(Src, Var) ->
        { Res = ok(term_var(Var)) }
    ; iri_ref(Prefixes, Src, RefRes) ->
        ( { RefRes = ok(URI) },
            { Res = ok(term_value(uri(URI))) }
        ; { RefRes = error(Err) },
            { Res = error(Err) }
        )
    ; blank_node_label(Src, BlankRes) ->
        ( { BlankRes = ok(Label) },
            { Res = ok(term_value(anon(Label))) }
        ; { BlankRes = error(Err) },
            { Res = error(Err) }
        )
    ;
        current_offset(Src, OS),
        { Res = error({OS, "expecting a variable, URI or blank"}) }
    ).

:- pred iri_ref(prefixes::in, src::in, pres(uri)::out,
    ps::in, ps::out) is semidet.

iri_ref(Prefixes, Src, Res) -->
    ( next_char(Src, '<') ->
        (
            identifier(init_id_chars, uri_chars, Src, URI)
        ->
            ( punct(">", Src, _) ->
                { Res = ok(URI) }
            ;
                current_offset(Src, OS),
                { Res = error({OS, "expecting '>'"}) }
            )
        ;
            current_offset(Src, OS),
            { Res = error({OS, "illegal characters in URI"}) }
        )
    ;
        identifier(init_id_chars, id_chars, Src, Prefix),
        next_char(Src, ':'),
        identifier(init_id_chars, id_chars, Src, LocalName),
        (
            { namespace.search_namespace(Prefixes, prefix(Prefix),
                namespace_name(Namespace)) }
        ->
            { Res = ok(Namespace ++ LocalName) }
        ;
            current_offset(Src, OS),
            { Res = error({OS, "prefix '" ++ Prefix ++ "' is not defined"}) }
        )
    ).

:- pred blank_node_label(src::in, pres(string)::out,
    ps::in, ps::out) is semidet.

blank_node_label(Src, Res) -->
    next_char(Src, '_'),
    next_char(Src, ':'),
    ( identifier(init_id_chars, id_chars, Src, Id) ->
        { Res = ok(Id) }
    ;
        current_offset(Src, OS),
        { Res = error({OS, "illegal characters in blank node label"}) }
    ).

:- pred verb(prefixes::in, src::in, pres(sparql_term(rdf.predicate))::out,
    ps::in, ps::out) is det.

verb(Prefixes, Src, Res) -->
    ( variable(Src, Var) ->
        { Res = ok(term_var(Var)) }
    ; iri_ref(Prefixes, Src, RefRes) ->
        ( { RefRes = ok(URI) },
            { Res = ok(term_value(uri(URI))) }
        ; { RefRes = error(Err) },
            { Res = error(Err) }
        )
    ; keyword(id_chars, "a", Src, _) ->
        { Res = ok(term_value(uri(special_uri.rdf_type))) }
    ;
        current_offset(Src, OS),
        { Res = error({OS, "expecting a variable or URI"}) }
    ).

:- pred object(prefixes::in, src::in, pres(sparql_term(object_pattern))::out,
    ps::in, ps::out) is det.

object(Prefixes, Src, Res) -->
    ( variable(Src, Var) ->
        { Res = ok(term_var(Var)) }
    ; iri_ref(Prefixes, Src, RefRes) ->
        ( { RefRes = ok(URI) },
            { Res = ok(term_value(object(resource(uri(URI))))) }
        ; { RefRes = error(Err) },
            { Res = error(Err) }
        )
    ; blank_node_label(Src, BlankRes) ->
        ( { BlankRes = ok(Label) },
            { Res = ok(term_value(object(resource(anon(Label))))) }
        ; { BlankRes = error(Err) },
            { Res = error(Err) }
        )
    ; rdf_literal(Prefixes, Src, LitRes) ->
        ( { LitRes = ok(Lit) },
            { Res = ok(term_value(object(literal(Lit)))) }
        ; { LitRes = error(Err) },
            { Res = error(Err) }
        )
    ; literal_like(Src, LikeRes) ->
        ( { LikeRes = ok(Str) },
            { Res = ok(term_value(literal_like(Str))) }
        ; { LikeRes = error(Err) },
            { Res = error(Err) }
        )
    ;
        current_offset(Src, OS),
        { Res = error({OS, "expecting a variable, URI, blank, literal or ~ expression"}) }
    ).

:- pred rdf_literal(prefixes::in, src::in, pres(rdf.literal)::out,
    ps::in, ps::out) is semidet.

rdf_literal(Prefixes, Src, Res) -->
    string(Src, Val),    
    ( punct("^^", Src, _) ->
        ( iri_ref(Prefixes, Src, RefRes) ->
            ( { RefRes = ok(URI) },
                { Res = ok(literal(Val, typed(URI))) }
            ; { RefRes = error(Err) },
                { Res = error(Err) }
            )
        ;
            current_offset(Src, OS),
            { Res = error({OS, "expecting a type URI"}) }
        )
    ; next_char(Src, '@') ->
        ( identifier(init_id_chars, id_chars, Src, Lang) ->
            { Res = ok(literal(Val, plain(yes(lt(Lang))))) }
        ;
            current_offset(Src, OS),
            { Res = error({OS, "expecting a language tag"}) }
        )
    ;
        { Res = ok(literal(Val, plain(no))) }
    ).

:- pred literal_like(src::in, pres(string)::out,
    ps::in, ps::out) is semidet.

literal_like(Src, Res) -->
    punct("~", Src, _),
    ( string(Src, Str) ->
        { Res = ok(Str) }
    ;
        current_offset(Src, OS),
        { Res = error({OS, "expecting a string"}) }
    ).

:- pred string(src::in, string::out, ps::in, ps::out) is semidet.

string(Src, Str) -->
    some [!Str] (
        string_literal('"', Src, !:Str),
        {
            string.replace_all(!.Str, "\\t", "\t", !:Str),
            string.replace_all(!.Str, "\\n", "\n", !:Str),
            string.replace_all(!.Str, "\\r", "\r", !:Str),
            string.replace_all(!.Str, "\\\"", "\"", !:Str),
            Str = !.Str
        }
    ).

%----------------------------------------------------------------------------%

:- func id_chars = string.

id_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_-".

:- func init_id_chars = string.

init_id_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".

:- func uri_chars = string.

uri_chars = id_chars ++ "/:@&?=%~#+.;".

:- func consume_whitespace_and_comments(src, ps) = ps.

consume_whitespace_and_comments(Src, PS0) = PS :-
    ( next_char(Src, C, PS0, PS1) ->
        ( char.is_whitespace(C) ->
            PS = consume_whitespace_and_comments(Src, PS1)
        ; C = '#' ->
            ( skip_to_eol(Src, _, PS1, PS2) ->
                PS = consume_whitespace_and_comments(Src, PS2)
            ;
                PS = PS0
            )
        ;
            PS = PS0
        )
    ;
        PS = PS0
    ).

:- pred peek_punct(string::in, src::in, ps::in, ps::out) is semidet.

peek_punct(Punct, Src, PS, PS) :-
    punct(Punct, Src, _, PS, _).

%----------------------------------------------------------------------------%
:- end_module sparql.
%----------------------------------------------------------------------------%
-------------- next part --------------
%----------------------------------------------------------------------------%
:- module sparql.
% A parser for a subset of SPARQL, with some custom extensions (in particular
% you can use ~"val" for literal_like expressions).
%----------------------------------------------------------------------------%

:- interface.

:- import_module flat_terms.
:- import_module namespace.
:- import_module rdf.

:- import_module list.
:- import_module parsing_utils.

%----------------------------------------------------------------------------%

:- type var == string.
:- type sparql_term(T) == flat_terms.term(var, T).
:- type prefixes == namespace.namespace_abbreviations.

:- type triple_pattern
    --->    triple_pattern(
                subj_term       :: sparql_term(rdf.subject),
                pred_term       :: sparql_term(rdf.predicate),
                obj_term        :: sparql_term(object_pattern)
            ).

:- type object_pattern
    --->    object(rdf.object)
    ;       literal_like(string).

:- type sparql_command
    --->    select(list(var), list(triple_pattern))
    ;       delete(list(triple_pattern), list(triple_pattern))
    ;       insert(list(triple_pattern), list(triple_pattern)).

:- pred parse_sparql_command(prefixes::in, string::in, 
    parsing_utils.parse_result(sparql_command)::out) is cc_multi.

%----------------------------------------------------------------------------%
:- implementation.

:- import_module language_tag.
:- import_module rdf.parse.
:- import_module special_uri.
:- import_module uri.

:- import_module char.
:- import_module maybe.
:- import_module string.
:- import_module unit.

%----------------------------------------------------------------------------%

parse_sparql_command(Prefixes, Str, Result) :-
    parse(Str, consume_whitespace_and_comments,
        sparql_command(Prefixes), Result).
 
%----------------------------------------------------------------------------%

:- pred sparql_command(prefixes::in, src::in, sparql_command::out,
    ps::in, ps::out) is semidet.

sparql_command(Prefixes, Src, Command) -->
    ( ikeyword(id_chars, "select", Src, _) ->
        select(Prefixes, Src, Command)
    ; ikeyword(id_chars, "insert", Src, _) ->
        modify_template(Prefixes, Src, {Template, Where}),
        { Command = insert(Template, Where) }
    ; ikeyword(id_chars, "delete", Src, _) ->
        modify_template(Prefixes, Src, {Template, Where}),
        { Command = delete(Template, Where) }
    ;
        fail_with_message("unknown command", Src, Command)
    ).

:- pred select(prefixes::in, src::in, sparql_command::out,
    ps::in, ps::out) is semidet.

select(Prefixes, Src, Select) -->
    one_or_more(variable, Src, Vars),
    ikeyword(id_chars, "where", Src, _),
    group_graph_pattern(Prefixes, Src, Where),
    { Select = select(Vars, Where) }.

:- pred modify_template(prefixes::in, src::in,
    {list(triple_pattern), list(triple_pattern)}::out,
    ps::in, ps::out) is semidet.

modify_template(Prefixes, Src, {Template, Where}) -->
    group_graph_pattern(Prefixes, Src, Template),
    ( 
        ikeyword(id_chars, "where", Src, _)
    ->
        group_graph_pattern(Prefixes, Src, Where)
    ;
        { Where = [] }
    ).

:- pred variable(src::in, var::out, ps::in, ps::out) is semidet.

variable(Src, Var) -->
    next_char(Src, C),
    { ( C = '?' ; C = '$' ) },
    identifier(init_id_chars, id_chars, Src, Var).

:- pred group_graph_pattern(prefixes::in, src::in,
    list(triple_pattern)::out, ps::in, ps::out) is semidet.

group_graph_pattern(Prefixes, Src, Block) -->
    punct("{", Src, _),
    ( punct("}", Src, _) ->
        { Block = [] }
    ;
        triples_block(Prefixes, Src, Block),
        punct("}", Src, _)
    ).

:- pred triples_block(prefixes::in, src::in, list(triple_pattern)::out,
    ps::in, ps::out) is semidet.

triples_block(Prefixes, Src, Block) -->
    triples_same_subject(Prefixes, Src, Block0),
    ( punct(".", Src, _) ->
        ( peek_punct("}", Src) ->
            { Block = Block0 }
        ;
            triples_block(Prefixes, Src, Block1),
            { Block = Block0 ++ Block1 }
        )
    ;
        { Block = Block0 }
    ).

:- pred triples_same_subject(prefixes::in, src::in, list(triple_pattern)::out,
    ps::in, ps::out) is semidet.

triples_same_subject(Prefixes, Src, Triples) -->
    subj_term(Prefixes, Src, SubjTerm),
    property_list_not_empty(Prefixes, SubjTerm, Src, Triples).

:- pred property_list_not_empty(prefixes::in, sparql_term(rdf.subject)::in, src::in,
    list(triple_pattern)::out, ps::in, ps::out) is semidet.

property_list_not_empty(Prefixes, Subj, Src, Triples) -->
    verb(Prefixes, Src, Verb),
    object_list(Prefixes, Src, Objs),
    { Triples0 = list.map(func(O) = triple_pattern(Subj, Verb, O),
        Objs) },
    (
        punct(";", Src, _),
        property_list_not_empty(Prefixes, Subj, Src, Triples1)
    ->
        { Triples = Triples0 ++ Triples1 }
    ;
        optional(punct(";"), Src, _),
        { Triples = Triples0 }
    ).

:- pred object_list(prefixes::in, src::in,
    list(sparql_term(object_pattern))::out, ps::in, ps::out) is semidet.

object_list(Prefixes, Src, Objs) -->
    object(Prefixes, Src, Obj0),
    (
        punct(",", Src, _),
        object_list(Prefixes, Src, Objs1)
    ->
        { Objs = [Obj0 | Objs1] }
    ;
        { Objs = [Obj0] }
    ).

:- pred subj_term(prefixes::in, src::in, sparql_term(rdf.subject)::out,
    ps::in, ps::out) is semidet.

subj_term(Prefixes, Src, Term) -->
    ( variable(Src, Var) ->
        { Term = term_var(Var) }
    ; parse_turtle_subject(Prefixes, Src, Subj) ->
        { Term = term_value(Subj) }
    ;
        { fail }
    ).

:- pred verb(prefixes::in, src::in, sparql_term(rdf.predicate)::out,
    ps::in, ps::out) is semidet.

verb(Prefixes, Src, Term) -->
    ( variable(Src, Var) ->
        { Term = term_var(Var) }
    ; parse_turtle_uri(Prefixes, Src, URI) ->
        { Term = term_value(uri(URI)) }
    ; keyword(id_chars, "a", Src, _) ->
        { Term = term_value(uri(special_uri.rdf_type)) }
    ;
        { fail }
    ).

:- pred object(prefixes::in, src::in, sparql_term(object_pattern)::out,
    ps::in, ps::out) is semidet.

object(Prefixes, Src, Term) -->
    ( variable(Src, Var) ->
        { Term = term_var(Var) }
    ; parse_turtle_object(Prefixes, Src, Obj) ->
        { Term = term_value(object(Obj)) }
    ; literal_like(Src, Like) ->
        { Term = term_value(literal_like(Like)) }
    ;
        { fail }
    ).


:- pred literal_like(src::in, string::out,
    ps::in, ps::out) is semidet.

literal_like(Src, Like) -->
    punct("~", Src, _),
    string(Src, Like).

:- pred string(src::in, string::out, ps::in, ps::out) is semidet.

string(Src, Str) -->
    some [!Str] (
        string_literal('"', Src, !:Str),
        {
            string.replace_all(!.Str, "\\t", "\t", !:Str),
            string.replace_all(!.Str, "\\n", "\n", !:Str),
            string.replace_all(!.Str, "\\r", "\r", !:Str),
            string.replace_all(!.Str, "\\\"", "\"", !:Str),
            Str = !.Str
        }
    ).

:- pred parse_turtle_subject(namespace.namespace_abbreviations::in, parsing_utils.src::in,
    rdf.subject::out, ps::in, ps::out) is semidet.

parse_turtle_subject(Prefixes, Src, Subj) -->
    ( parse_turtle_uri(Prefixes, Src, URI) ->
        { Subj = uri(URI) }
    ; 
        parse_blank_node_label(Src, Label),
        { Subj = anon(Label) }
    ).

:- pred parse_turtle_predicate(namespace.namespace_abbreviations::in,
    parsing_utils.src::in, rdf.predicate::out, ps::in, ps::out) is semidet.

parse_turtle_predicate(Prefixes, Src, uri(URI)) -->
    parse_turtle_uri(Prefixes, Src, URI).

:- pred parse_turtle_uri(namespace_abbreviations::in, src::in, uri::out,
    ps::in, ps::out) is semidet.

parse_turtle_uri(Prefixes, Src, URI) -->
    current_offset(Src, Start),
    ( next_char(Src, '<') ->
        identifier(init_id_chars, uri_chars, Src, URI),
        punct(">", Src, _)
    ;
        identifier(init_id_chars, id_chars, Src, Prefix),
        next_char(Src, ':'),
        identifier(init_id_chars, id_chars, Src, LocalName),
        (
            { namespace.search_namespace(Prefixes, prefix(Prefix),
                namespace_name(Namespace)) }
        ->
            { URI = Namespace ++ LocalName }
        ;
            fail_with_message("unknown prefix: " ++ Prefix, Start, Src, URI)
        )
    ).

:- pred parse_blank_node_label(src::in, string::out,
    ps::in, ps::out) is semidet.

parse_blank_node_label(Src, Label) -->
    next_char(Src, '_'),
    next_char(Src, ':'),
    identifier(init_id_chars, id_chars, Src, Label).

:- pred parse_turtle_literal(namespace_abbreviations::in, src::in, rdf.literal::out,
    ps::in, ps::out) is semidet.

parse_turtle_literal(Prefixes, Src, Lit) -->
    string(Src, Val),    
    ( punct("^^", Src, _) ->
        parse_turtle_uri(Prefixes, Src, URI),
        { Lit = literal(Val, typed(URI)) }
    ; next_char(Src, '@') ->
        identifier(init_id_chars, id_chars, Src, Lang),
        { Lit = literal(Val, plain(yes(lt(Lang)))) }
    ;
        { Lit = literal(Val, plain(no)) }
    ).

:- pred parse_turtle_object(namespace_abbreviations::in, src::in, rdf.object::out,
    ps::in, ps::out) is semidet.

parse_turtle_object(Prefixes, Src, Obj) -->
    ( parse_turtle_uri(Prefixes, Src, URI) ->
        { Obj = resource(uri(URI)) }
    ; parse_blank_node_label(Src, Label) ->
        { Obj = resource(anon(Label)) }
    ; 
        parse_turtle_literal(Prefixes, Src, Lit),
        { Obj = literal(Lit) }
    ).

%----------------------------------------------------------------------------%

:- func id_chars = string.

id_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_-".

:- func init_id_chars = string.

init_id_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".

:- func uri_chars = string.

uri_chars = id_chars ++ "/:@&?=%~#+.;".

:- func consume_whitespace_and_comments(src, ps) = ps.

consume_whitespace_and_comments(Src, PS0) = PS :-
    ( next_char(Src, C, PS0, PS1) ->
        ( char.is_whitespace(C) ->
            PS = consume_whitespace_and_comments(Src, PS1)
        ; C = '#' ->
            ( skip_to_eol(Src, _, PS1, PS2) ->
                PS = consume_whitespace_and_comments(Src, PS2)
            ;
                PS = consume_whitespace_and_comments(Src, PS1)
            )
        ;
            PS = PS0
        )
    ;
        PS = PS0
    ).

:- pred peek_punct(string::in, src::in, ps::in, ps::out) is semidet.

peek_punct(Punct, Src, PS, PS) :-
    punct(Punct, Src, _, PS, _).

%----------------------------------------------------------------------------%
:- end_module sparql.
%----------------------------------------------------------------------------%


More information about the reviews mailing list