[m-dev.] for review: namespaces in xml interface

Ina Cheng inch at students.cs.mu.oz.au
Thu Jan 4 13:25:56 AEDT 2001


Hi,

Here is the new cvs diff after addressing Tyson's comment.
Thank you.

Ina

======================================================================== 

Estimated hours taken: 44 

Add a module to turn an xml document into a namespace-aware
xml document. 

extras/xml/tryit.m 
	change the sample program to print out the new xml output 
extras/xml/xml.m 	
	add xml.ns.m to the wrapper module
extra/xml/xml.ns.m 
        the new module containing predicates to turn an xml document to a 
        namespace-aware xml document 


cvs diff: Diffing .
Index: tryit.m
===================================================================
RCS file: /home/mercury1/repository/mercury/extras/xml/tryit.m,v
retrieving revision 1.2
diff -u -r1.2 tryit.m
--- tryit.m     2000/12/01 06:07:37     1.2
+++ tryit.m     2001/01/01 23:39:23
@@ -17,7 +17,7 @@

 :- implementation.

-:- import_module parsing, xml, xml:cat, xml:encoding, xml:parse.
+:- import_module parsing, xml, xml:cat, xml:encoding, xml:parse, xml:ns.
 :- import_module char, list, map, std_util, string.

 main -->
@@ -29,7 +29,7 @@

 main([]) --> [].
 main([File|Files]) -->
-    see(File, Res0),
+    see(File, Res0),
     ( { Res0 = ok } ->
            io__read_file_as_string(_, Text),
            pstate(mkEntity(Text), mkEncoding(utf8), init),
@@ -69,13 +69,20 @@
            document,
            finish(Res),
            (
-               { Res = ok(_) }
+               { Res = ok((DTD, Doc)) },
+               { nsTranslate(Doc, NsDoc) },
+               { New = cat:ok((DTD, NsDoc)) },
+               write(New)
+               % if don't want to turn the doc to namespace awared,
+               % change the above three lines to
+               % write(Res)
            ;
-               { Res = error(Err) },
-               stderr_stream(StdErr),
-               format(StdErr, "%s: %s\n", [s(File), s(Err)])
+               { Res = error(Err) },
+               stderr_stream(StdErr),
+               format(StdErr, "%s: %s\n", [s(File), s(Err)]),
+               write(Res)
            ),
-           write(Res), nl,
+           nl,
            []
     ;
        []
Index: xml.m
===================================================================
RCS file: /home/mercury1/repository/mercury/extras/xml/xml.m,v
retrieving revision 1.1
diff -u -r1.1 xml.m
--- xml.m       2000/09/05 22:33:59     1.1
+++ xml.m       2000/12/22 02:05:53
@@ -16,4 +16,5 @@
 :- include_module xml:dtd.
 :- include_module xml:encoding.
 :- include_module xml:parse.
+:- include_module xml:ns.

Index: xml.ns.m
===================================================================
RCS file: xml.ns.m
diff -N xml.ns.m
--- /dev/null   Wed Nov 15 09:24:47 2000
+++ xml.ns.m    Thu Jan  4 13:07:52 2001
@@ -0,0 +1,276 @@
+%---------------------------------------------------------------------------%
+% Copyright (C) 2000, 2001 The University of Melbourne.
+% This file may only be copied under the terms of the GNU Library General
+% Public License - see the file COPYING.LIB in the Mercury distribution.
+%---------------------------------------------------------------------------%
+%
+% File: xml.ns.m
+% Main author: conway at cs.mu.oz.au, inch at students.cs.mu.oz.au
+%
+% This module provides predicates to turn an XML document into a namespace
+% aware XML document. A normal XML document containing multiple elements
+% and attributes may encounter problems of recognition and collision, ie.
+% same element type or attribute name may have different scope. A namespace
+% aware XML document solves this problem by using URI references to identify
+% elements and attributes.
+%
+% Reference:
+% <http://www.w3.org/TR-REC-xml-names>
+%
+%---------------------------------------------------------------------------%
+:- module xml:ns.
+
+:- interface.
+
+:- import_module list, array, string, std_util.
+:- import_module xml:doc.
+
+:- type nsDocument
+       --->    nsDoc(
+                   prestuff    :: list(ref(nsContent)),
+                                       % array index pointing to prestuff
+                   root        :: ref(nsContent),
+                                       % root of the document tree
+                   poststuff   :: list(ref(nsContent)),
+                                       % array index pointing to poststuff
+                   content     :: array(nsContent)
+                                       % array storing all document content
+               ).
+
+:- type nsContent
+       --->    nsElement(nsElement)    % element or attribute
+       ;       pi(string, string)      % processing instruction
+       ;       comment(string)         % comment
+       ;       data(string)            % data
+       .
+
+:- type nsElement
+       --->    nsElement(
+                   eName       :: qName,
+                                       % qualified name
+                   eAttrs      :: list(nsAttribute),
+                                       % list of attributes
+                   eContent    :: list(ref(content)),
+                                       % list of index pointing to children
+                                       % in the document tree
+                   eNamespaces :: nsList
+                                       % list of (Prefix - URI)
+               ).
+
+:- type nsAttribute
+       --->    nsAttribute(
+                   aName       :: qName,       % qualified name
+                   aValue      :: string       % attribute value
+               ).
+
+:- type qName
+       --->    qName(
+                   localName   :: string,      % local name without prefix
+                   nsURI       :: nsURI        % URI reference
+               ).
+
+:- type nsList == list(pair(string, string)).
+
+:- type nsURI == string.
+
+       % nsTranslate() takes in a normal XML document and returns a namespace
+       % aware XML document.
+:- pred nsTranslate((xml:doc):document::in, nsDocument::out) is det.
+
+%---------------------------------------------------------------------------%
+:- implementation.
+%---------------------------------------------------------------------------%
+
+:- import_module assoc_list, int, map, require.
+:- import_module xml:parse.
+
+:- type namespaces == map(string, string).
+
+nsTranslate(Doc, NsDoc) :-
+       traverse(Doc, [], NsDocContent),
+       NsDoc = nsDoc(Doc^prestuff, Doc^root, Doc^poststuff,
+                 array(NsDocContent)).
+
+       % traverse takes in a normal XML document and an accumulator,
+       % creates an empty tree, traverse and translate the document tree,
+       % and gives back a namespace aware document.
+:- pred traverse(document, list(nsContent), list(nsContent)).
+:- mode traverse(in, in, out) is det.
+
+traverse(Doc, Acc0, Acc) :-
+       traverse(Doc^content, map__init, "", Doc^root, Acc0, Acc).
+
+       % Carries out the actual traverse and transformation.
+       % If the content is an element, change it to a namespace aware
+       % element and visit its siblings, otherwise, convert the type.
+:- pred traverse(array(content), namespaces, string, ref(content),
+       list(nsContent), list(nsContent)).
+:- mode traverse(in, in, in, in, in, out) is det.
+
+traverse(ContentArray, Namespaces0, Default0, ContentRef, Acc0, Acc) :-
+       lookup(ContentArray, ContentRef, Content),
+       (
+               Content = element(Elem) ->
+
+               % examine the attributes to find any default namespaces
+               (
+                       defaultNamespace(Elem^eAttrs, Default1, Attrs0)
+               ->
+                       Default = Default1,
+                       Attrs1 = Attrs0
+               ;
+                       Default = Default0,
+                       Attrs1 = Elem^eAttrs
+               ),
+
+               % extract any namespace declaration and insert into tree
+               extractNamespaceDecls(Attrs1, NSList, Attrs2),
+               list__foldl((pred((Pref - URI)::in, NSs1::in,
+                       NSs2::out) is det :-
+                       map__set(NSs1, Pref, URI, NSs2)
+               ), NSList, Namespaces0, Namespaces),
+
+               % change element and attributes to namespace aware
+               namespaceizeName(Namespaces, Default, Elem^eName, Name),
+               map((pred(Attr0::in, Attr::out) is det :-
+                       Attr0 = attribute(AttrName0, Value),
+                       namespaceizeName(Namespaces, Default,
+                                       AttrName0, AttrName),
+                       Attr = nsAttribute(AttrName, Value)
+               ), Elem^eAttrs, Attrs),
+
+               % visit its siblings
+               Kids = Elem^eContent,
+               list__reverse(Kids, Kids0),
+               NsElem = nsElement(nsElement(Name, Attrs, Elem^eContent,
+                                       NSList)),
+               Acc1 = [NsElem|Acc0],
+               xml__ns__foldl(traverse, ContentArray, Namespaces, Default,
+                                      Kids0, Acc1, Acc)
+       ;
+               inst_cast_content(Content, Content1),
+               Acc = [convert_type(Content1) | Acc0]
+       ).
+
+       % Searches for any default namespaces.
+:- pred defaultNamespace(list(attribute), string, list(attribute)).
+:- mode defaultNamespace(in, out, out) is semidet.
+
+defaultNamespace([], _, _) :- fail.
+defaultNamespace([Attr|Attrs], Default, NewAttrs) :-
+       (
+               % If a default namespace is found, return the namespace
+               % and the list of attributes without the default namespace
+               is_xmlns(Attr^aName)
+       ->
+               Default = Attr^aValue,
+               NewAttrs = Attrs
+       ;
+               % Otherwise keep searching
+               Default = Default0,
+               NewAttrs = NewAttrs0,
+               defaultNamespace(Attrs, Default0, NewAttrs0)
+       ).
+
+
+       % Searches the list of attributes and extract any namespace
+       % declarations.
+:- pred        extractNamespaceDecls(list(attribute), nsList,
list(attribute)).+:- mode extractNamespaceDecls(in, out, out) is det.
+
+extractNamespaceDecls([], [], []).
+extractNamespaceDecls([Attr|Attrs], NSList, NewAttrs) :-
+       split_on_colon(Attr^aName, Prefix, Suffix),
+       (
+               % for case like < book xmlns:isbn="someURI" >
+               % Prefix = xmlns
+               % Suffix = isbn
+               is_xmlns(Prefix)
+       ->
+               NSList = [(Suffix - Attr^aValue) | NSList0],
+               NewAttrs = NewAttrs0
+       ;
+               NSList = NSList0,
+               NewAttrs = [Attr|NewAttrs0]
+       ),
+       extractNamespaceDecls(Attrs, NSList0, NewAttrs0).
+
+
+       % Change Name to QName by matching Name with the Namespaces list.
+       % If fails, applies default namespace.
+:- pred namespaceizeName(namespaces, string, string, qName).
+:- mode namespaceizeName(in, in, in, out) is det.
+
+namespaceizeName(Namespaces, Default, Name, QName) :-
+       split_on_colon(Name, Prefix, Suffix),
+       (
+               % for case when element name = prefix:suffix
+               map__search(Namespaces, Prefix, URI)
+       ->
+               QName = qName(Suffix, URI)
+       ;
+               % for case when attribute name = xmlns:suffix
+               is_xmlns(Prefix),
+               map__search(Namespaces, Suffix, URI)
+       ->
+               QName = qName(Suffix, URI)
+       ;
+               % for case when element name has no prefix
+               QName = qName(Suffix, Default)
+       ).
+
+
+       % Split a name into prefix and suffix.
+:- pred split_on_colon(string::in, string::out, string::out) is det.
+
+split_on_colon(Name, Prefix, Suffix) :-
+       (
+           string__sub_string_search(Name, ":", Index)
+       ->
+           string__length(Name, Length),
+           string__right(Name, Length-(Index+1), Suffix),
+           string__left(Name, Index, Prefix)
+       ;
+           Suffix = Name,
+           Prefix = ""
+       ).
+
+
+       % According to the namespaces specification `Namespaces in XML'
+       % <http://www.w3.org/TR-REC-xml-names>, a namespace is declared
+       % as an attribute with `xmlns' as a prefix.
+:- pred is_xmlns(string::in) is semidet.
+is_xmlns("xmlns").
+
+:- inst not_an_element ==
+       bound(
+                       comment(ground)
+               ;       data(ground)
+               ;       pi(ground, ground)
+
+       ).
+
+:- func convert_type(content) = nsContent.
+:- mode convert_type(in(not_an_element)) = out is det.
+
+convert_type(comment(S)) = comment(S).
+convert_type(data(S)) = data(S).
+convert_type(pi(S,S0)) = pi(S,S0).
+
+:- pred inst_cast_content(content, content).
+:- mode inst_cast_content(in, out(not_an_element)) is det.
+:- pragma c_code(inst_cast_content(F0::in, F1::out((not_an_element))),
+        [thread_safe, will_not_call_mercury],
+       "F1 = F0;").
+
+       % Traverse children in the document tree.
+:- pred xml__ns__foldl(pred(array(content), namespaces, string, ref(content),
+       T, T), array(content), namespaces, string, list(ref(content)), T, T).
+:- mode xml__ns__foldl(pred(in, in, in, in, in, out) is det, in, in, in, in,
+       in, out) is det.
+
+xml__ns__foldl(_Pred, _, _, _, [], Acc, Acc).
+xml__ns__foldl(Pred, Content, NameSpaces, Default, [Ref|Refs], Acc0, Acc) :-
+       foldl(Pred, Content, NameSpaces, Default, Refs, Acc1, Acc),
+       call(Pred, Content, NameSpaces, Default, Ref, Acc0, Acc1).
+


--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to:       mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions:          mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------



More information about the developers mailing list