[m-dev.] for review: namespaces in xml interface
Ina Cheng
inch at students.cs.mu.oz.au
Thu Jan 4 13:25:56 AEDT 2001
Hi,
Here is the new cvs diff after addressing Tyson's comment.
Thank you.
Ina
========================================================================
Estimated hours taken: 44
Add a module to turn an xml document into a namespace-aware
xml document.
extras/xml/tryit.m
change the sample program to print out the new xml output
extras/xml/xml.m
add xml.ns.m to the wrapper module
extra/xml/xml.ns.m
the new module containing predicates to turn an xml document to a
namespace-aware xml document
cvs diff: Diffing .
Index: tryit.m
===================================================================
RCS file: /home/mercury1/repository/mercury/extras/xml/tryit.m,v
retrieving revision 1.2
diff -u -r1.2 tryit.m
--- tryit.m 2000/12/01 06:07:37 1.2
+++ tryit.m 2001/01/01 23:39:23
@@ -17,7 +17,7 @@
:- implementation.
-:- import_module parsing, xml, xml:cat, xml:encoding, xml:parse.
+:- import_module parsing, xml, xml:cat, xml:encoding, xml:parse, xml:ns.
:- import_module char, list, map, std_util, string.
main -->
@@ -29,7 +29,7 @@
main([]) --> [].
main([File|Files]) -->
- see(File, Res0),
+ see(File, Res0),
( { Res0 = ok } ->
io__read_file_as_string(_, Text),
pstate(mkEntity(Text), mkEncoding(utf8), init),
@@ -69,13 +69,20 @@
document,
finish(Res),
(
- { Res = ok(_) }
+ { Res = ok((DTD, Doc)) },
+ { nsTranslate(Doc, NsDoc) },
+ { New = cat:ok((DTD, NsDoc)) },
+ write(New)
+ % if don't want to turn the doc to namespace awared,
+ % change the above three lines to
+ % write(Res)
;
- { Res = error(Err) },
- stderr_stream(StdErr),
- format(StdErr, "%s: %s\n", [s(File), s(Err)])
+ { Res = error(Err) },
+ stderr_stream(StdErr),
+ format(StdErr, "%s: %s\n", [s(File), s(Err)]),
+ write(Res)
),
- write(Res), nl,
+ nl,
[]
;
[]
Index: xml.m
===================================================================
RCS file: /home/mercury1/repository/mercury/extras/xml/xml.m,v
retrieving revision 1.1
diff -u -r1.1 xml.m
--- xml.m 2000/09/05 22:33:59 1.1
+++ xml.m 2000/12/22 02:05:53
@@ -16,4 +16,5 @@
:- include_module xml:dtd.
:- include_module xml:encoding.
:- include_module xml:parse.
+:- include_module xml:ns.
Index: xml.ns.m
===================================================================
RCS file: xml.ns.m
diff -N xml.ns.m
--- /dev/null Wed Nov 15 09:24:47 2000
+++ xml.ns.m Thu Jan 4 13:07:52 2001
@@ -0,0 +1,276 @@
+%---------------------------------------------------------------------------%
+% Copyright (C) 2000, 2001 The University of Melbourne.
+% This file may only be copied under the terms of the GNU Library General
+% Public License - see the file COPYING.LIB in the Mercury distribution.
+%---------------------------------------------------------------------------%
+%
+% File: xml.ns.m
+% Main author: conway at cs.mu.oz.au, inch at students.cs.mu.oz.au
+%
+% This module provides predicates to turn an XML document into a namespace
+% aware XML document. A normal XML document containing multiple elements
+% and attributes may encounter problems of recognition and collision, ie.
+% same element type or attribute name may have different scope. A namespace
+% aware XML document solves this problem by using URI references to identify
+% elements and attributes.
+%
+% Reference:
+% <http://www.w3.org/TR-REC-xml-names>
+%
+%---------------------------------------------------------------------------%
+:- module xml:ns.
+
+:- interface.
+
+:- import_module list, array, string, std_util.
+:- import_module xml:doc.
+
+:- type nsDocument
+ ---> nsDoc(
+ prestuff :: list(ref(nsContent)),
+ % array index pointing to prestuff
+ root :: ref(nsContent),
+ % root of the document tree
+ poststuff :: list(ref(nsContent)),
+ % array index pointing to poststuff
+ content :: array(nsContent)
+ % array storing all document content
+ ).
+
+:- type nsContent
+ ---> nsElement(nsElement) % element or attribute
+ ; pi(string, string) % processing instruction
+ ; comment(string) % comment
+ ; data(string) % data
+ .
+
+:- type nsElement
+ ---> nsElement(
+ eName :: qName,
+ % qualified name
+ eAttrs :: list(nsAttribute),
+ % list of attributes
+ eContent :: list(ref(content)),
+ % list of index pointing to children
+ % in the document tree
+ eNamespaces :: nsList
+ % list of (Prefix - URI)
+ ).
+
+:- type nsAttribute
+ ---> nsAttribute(
+ aName :: qName, % qualified name
+ aValue :: string % attribute value
+ ).
+
+:- type qName
+ ---> qName(
+ localName :: string, % local name without prefix
+ nsURI :: nsURI % URI reference
+ ).
+
+:- type nsList == list(pair(string, string)).
+
+:- type nsURI == string.
+
+ % nsTranslate() takes in a normal XML document and returns a namespace
+ % aware XML document.
+:- pred nsTranslate((xml:doc):document::in, nsDocument::out) is det.
+
+%---------------------------------------------------------------------------%
+:- implementation.
+%---------------------------------------------------------------------------%
+
+:- import_module assoc_list, int, map, require.
+:- import_module xml:parse.
+
+:- type namespaces == map(string, string).
+
+nsTranslate(Doc, NsDoc) :-
+ traverse(Doc, [], NsDocContent),
+ NsDoc = nsDoc(Doc^prestuff, Doc^root, Doc^poststuff,
+ array(NsDocContent)).
+
+ % traverse takes in a normal XML document and an accumulator,
+ % creates an empty tree, traverse and translate the document tree,
+ % and gives back a namespace aware document.
+:- pred traverse(document, list(nsContent), list(nsContent)).
+:- mode traverse(in, in, out) is det.
+
+traverse(Doc, Acc0, Acc) :-
+ traverse(Doc^content, map__init, "", Doc^root, Acc0, Acc).
+
+ % Carries out the actual traverse and transformation.
+ % If the content is an element, change it to a namespace aware
+ % element and visit its siblings, otherwise, convert the type.
+:- pred traverse(array(content), namespaces, string, ref(content),
+ list(nsContent), list(nsContent)).
+:- mode traverse(in, in, in, in, in, out) is det.
+
+traverse(ContentArray, Namespaces0, Default0, ContentRef, Acc0, Acc) :-
+ lookup(ContentArray, ContentRef, Content),
+ (
+ Content = element(Elem) ->
+
+ % examine the attributes to find any default namespaces
+ (
+ defaultNamespace(Elem^eAttrs, Default1, Attrs0)
+ ->
+ Default = Default1,
+ Attrs1 = Attrs0
+ ;
+ Default = Default0,
+ Attrs1 = Elem^eAttrs
+ ),
+
+ % extract any namespace declaration and insert into tree
+ extractNamespaceDecls(Attrs1, NSList, Attrs2),
+ list__foldl((pred((Pref - URI)::in, NSs1::in,
+ NSs2::out) is det :-
+ map__set(NSs1, Pref, URI, NSs2)
+ ), NSList, Namespaces0, Namespaces),
+
+ % change element and attributes to namespace aware
+ namespaceizeName(Namespaces, Default, Elem^eName, Name),
+ map((pred(Attr0::in, Attr::out) is det :-
+ Attr0 = attribute(AttrName0, Value),
+ namespaceizeName(Namespaces, Default,
+ AttrName0, AttrName),
+ Attr = nsAttribute(AttrName, Value)
+ ), Elem^eAttrs, Attrs),
+
+ % visit its siblings
+ Kids = Elem^eContent,
+ list__reverse(Kids, Kids0),
+ NsElem = nsElement(nsElement(Name, Attrs, Elem^eContent,
+ NSList)),
+ Acc1 = [NsElem|Acc0],
+ xml__ns__foldl(traverse, ContentArray, Namespaces, Default,
+ Kids0, Acc1, Acc)
+ ;
+ inst_cast_content(Content, Content1),
+ Acc = [convert_type(Content1) | Acc0]
+ ).
+
+ % Searches for any default namespaces.
+:- pred defaultNamespace(list(attribute), string, list(attribute)).
+:- mode defaultNamespace(in, out, out) is semidet.
+
+defaultNamespace([], _, _) :- fail.
+defaultNamespace([Attr|Attrs], Default, NewAttrs) :-
+ (
+ % If a default namespace is found, return the namespace
+ % and the list of attributes without the default namespace
+ is_xmlns(Attr^aName)
+ ->
+ Default = Attr^aValue,
+ NewAttrs = Attrs
+ ;
+ % Otherwise keep searching
+ Default = Default0,
+ NewAttrs = NewAttrs0,
+ defaultNamespace(Attrs, Default0, NewAttrs0)
+ ).
+
+
+ % Searches the list of attributes and extract any namespace
+ % declarations.
+:- pred extractNamespaceDecls(list(attribute), nsList,
list(attribute)).+:- mode extractNamespaceDecls(in, out, out) is det.
+
+extractNamespaceDecls([], [], []).
+extractNamespaceDecls([Attr|Attrs], NSList, NewAttrs) :-
+ split_on_colon(Attr^aName, Prefix, Suffix),
+ (
+ % for case like < book xmlns:isbn="someURI" >
+ % Prefix = xmlns
+ % Suffix = isbn
+ is_xmlns(Prefix)
+ ->
+ NSList = [(Suffix - Attr^aValue) | NSList0],
+ NewAttrs = NewAttrs0
+ ;
+ NSList = NSList0,
+ NewAttrs = [Attr|NewAttrs0]
+ ),
+ extractNamespaceDecls(Attrs, NSList0, NewAttrs0).
+
+
+ % Change Name to QName by matching Name with the Namespaces list.
+ % If fails, applies default namespace.
+:- pred namespaceizeName(namespaces, string, string, qName).
+:- mode namespaceizeName(in, in, in, out) is det.
+
+namespaceizeName(Namespaces, Default, Name, QName) :-
+ split_on_colon(Name, Prefix, Suffix),
+ (
+ % for case when element name = prefix:suffix
+ map__search(Namespaces, Prefix, URI)
+ ->
+ QName = qName(Suffix, URI)
+ ;
+ % for case when attribute name = xmlns:suffix
+ is_xmlns(Prefix),
+ map__search(Namespaces, Suffix, URI)
+ ->
+ QName = qName(Suffix, URI)
+ ;
+ % for case when element name has no prefix
+ QName = qName(Suffix, Default)
+ ).
+
+
+ % Split a name into prefix and suffix.
+:- pred split_on_colon(string::in, string::out, string::out) is det.
+
+split_on_colon(Name, Prefix, Suffix) :-
+ (
+ string__sub_string_search(Name, ":", Index)
+ ->
+ string__length(Name, Length),
+ string__right(Name, Length-(Index+1), Suffix),
+ string__left(Name, Index, Prefix)
+ ;
+ Suffix = Name,
+ Prefix = ""
+ ).
+
+
+ % According to the namespaces specification `Namespaces in XML'
+ % <http://www.w3.org/TR-REC-xml-names>, a namespace is declared
+ % as an attribute with `xmlns' as a prefix.
+:- pred is_xmlns(string::in) is semidet.
+is_xmlns("xmlns").
+
+:- inst not_an_element ==
+ bound(
+ comment(ground)
+ ; data(ground)
+ ; pi(ground, ground)
+
+ ).
+
+:- func convert_type(content) = nsContent.
+:- mode convert_type(in(not_an_element)) = out is det.
+
+convert_type(comment(S)) = comment(S).
+convert_type(data(S)) = data(S).
+convert_type(pi(S,S0)) = pi(S,S0).
+
+:- pred inst_cast_content(content, content).
+:- mode inst_cast_content(in, out(not_an_element)) is det.
+:- pragma c_code(inst_cast_content(F0::in, F1::out((not_an_element))),
+ [thread_safe, will_not_call_mercury],
+ "F1 = F0;").
+
+ % Traverse children in the document tree.
+:- pred xml__ns__foldl(pred(array(content), namespaces, string, ref(content),
+ T, T), array(content), namespaces, string, list(ref(content)), T, T).
+:- mode xml__ns__foldl(pred(in, in, in, in, in, out) is det, in, in, in, in,
+ in, out) is det.
+
+xml__ns__foldl(_Pred, _, _, _, [], Acc, Acc).
+xml__ns__foldl(Pred, Content, NameSpaces, Default, [Ref|Refs], Acc0, Acc) :-
+ foldl(Pred, Content, NameSpaces, Default, Refs, Acc1, Acc),
+ call(Pred, Content, NameSpaces, Default, Ref, Acc0, Acc1).
+
--------------------------------------------------------------------------
mercury-developers mailing list
Post messages to: mercury-developers at cs.mu.oz.au
Administrative Queries: owner-mercury-developers at cs.mu.oz.au
Subscriptions: mercury-developers-request at cs.mu.oz.au
--------------------------------------------------------------------------
More information about the developers
mailing list