[element('http://www.example.com/PO1':purchaseOrder, [xmlns:apo='http://www.example.com/PO1', orderDate='1999-10-20'], [element(shipTo, [country='US'], [element(name, [], ['Alice Smith']), element(street, [], ['123 Maple Street']), element(city, [], ['Mill Valley']), element(state, [], ['CA']), element(zip, [], ['90952']) ]), element(billTo, [country='US'], [element(name, [], ['Robert Smith']), element(street, [], ['8 Oak Avenue']), element(city, [], ['Old Town']), element(state, [], ['PA']), element(zip, [], ['95819']) ]), element('http://www.example.com/PO1':comment, [], ['Hurry, my lawn is going wild!'] ), element(items, [], [element(item, [partNum='872-AA'], [element(productName, [], ['Lawnmower']), element(quantity, [], ['1']), element('USPrice', [], ['148.95']), element('http://www.example.com/PO1':comment, [], ['Confirm this is electric'] ) ]), element(item, [partNum='926-AA'], [element(productName, [], ['Baby Monitor']), element(quantity, [], ['1']), element('USPrice', [], ['39.98']), element(shipDate, [], ['1999-05-21']) ]) ]) ]) ]
/* Definite-clause grammar for the XML Schema * represented in po.xsd. * * This DCG was generated by a literate programming * system; if maintenance is necessary, make changes * to the source (podcg.xml), not to this output file. */ purchaseOrder --> [element('http://www.example.com/PO1':purchaseOrder, Attributes,Content)], { purchaseOrderType_atts(Attributes,[]), purchaseOrderType_cont(Content,[]) }. shipTo --> [element(shipTo,Attributes,Content)], { usAddress_atts(Attributes,[]), usAddress_cont(Content,[]) }. billTo --> [element(billTo,Attributes,Content)], { usAddress_atts(Attributes,[]), usAddress_cont(Content,[]) }. items --> [element(items,Attributes,Content)], { t_items_atts(Attributes,[]), t_items_cont(Content,[]) }. item --> [element(item,Attributes,Content)], { t_item_atts(Attributes,[]), t_item_cont(Content,[]) }.
comment --> [element('http://www.example.com/PO1':comment, Attributes,Content)], { xsd_string_atts(Attributes,[]), xsd_string_cont(Content,[]) }.But we know that the xsd:string type does not have any attributes declared, so Attributes must be the empty list. We can therefore define these elements this way instead:
comment --> [element('http://www.example.com/PO1':comment, [],Content)], { xsd_string_cont(Content,[]) }. poname --> [element(name,[],Content)], { xsd_string_cont(Content,[]) }. street --> [element(street,[],Content)], { xsd_string_cont(Content,[]) }. city --> [element(city,[],Content)], { xsd_string_cont(Content,[]) }. state --> [element(state,[],Content)], { xsd_string_cont(Content,[]) }. zip --> [element(zip,[],Content)], { xsd_decimal_cont(Content,[]) }. productName --> [element(productName,[],Content)], { xsd_string_cont(Content,[]) }. quantity --> [element(quantity,[],Content)], { xsd_integer_cont(Content,[]) }. 'USPrice' --> [element('USPrice',[],Content)], { xsd_decimal_cont(Content,[]) }. shipDate --> [element(shipDate,[],Content)], { xsd_date_cont(Content,[]) }.
purchaseOrderType_atts --> []. purchaseOrderType_atts --> purchaseOrderType_att, purchaseOrderType_atts. purchaseOrderType_att --> [orderDate=Date]. usAddress_atts --> []. usAddress_atts --> usAddress_att, usAddress_atts. usAddress_att --> [country='US']. /* note: fixed value! */ t_items_atts --> []. t_items_atts --> t_items_att, t_items_atts. t_items_att --> no_such_attribute. /* note that t_items_att is undefined, since there * are no attributes for the Items type */ t_item_atts --> []. t_item_atts --> t_item_att, t_item_atts. t_item_att --> [partNum=SKU].
purchaseOrderType_cont --> shipTo, billTo, opt_comment, items. opt_comment --> []. opt_comment --> comment, opt_comment. usAddress_cont --> poname, street, city, state, zip. t_items_cont --> star_item. star_item --> []. star_item --> item, star_item. t_item_cont --> productName, quantity, 'USPrice', opt_comment, opt_shipDate. opt_shipDate --> []. opt_shipDate --> shipDate.
/* In our representation of XML, character data is represented * as atoms. See note on handling of non-ASCII characters */ xsd_string_cont([H|T],T) :- xsd_string_value(H). xsd_string_value(LF) :- atom(LF).
/* We'll accept anything as a decimal number if we can convert * the atom to a list of character codes which can in turn be * converted to a number. */ xsd_decimal_cont([H|T],T) :- xsd_decimal_value(H). xsd_decimal_value(LF) :- atom_codes(LF,L), number_codes(N,L). xsd_integer_cont([H|T],T) :- xsd_integer_value(H). xsd_integer_value(LF) :- atom_codes(LF,L), number_codes(N,L), integer(N).
xsd_date_cont([H|T],T) :- xsd_date_value(H). xsd_date_value(LF) :- atom(LF). /* well, we really need to check this ... */
purchaseOrderType_atts --> []. purchaseOrderType_atts --> purchaseOrderType_att, purchaseOrderType_atts. purchaseOrderType_att --> [orderDate=Date], { xsd_date_value(Date) }. usAddress_atts --> []. usAddress_atts --> usAddress_att, usAddress_atts. usAddress_att --> [country='US']. /* note: fixed value! */ t_items_atts --> []. t_items_atts --> t_items_att, t_items_atts. t_items_att --> [this_will_never_match]. /* Since it has no equal sign or value, the rule for * t_items_att will never match anything. * Alternatively we could leave t_items_att undefined, since * there are no attributes for the Items type. */ t_item_atts --> []. t_item_atts --> t_item_att, t_item_atts. t_item_att --> [partNum=SKU], { po_sku_value(SKU) }.
<xsd:simpleType name="SKU"> <xsd:restriction base="xsd:string"> <xsd:pattern value="\d{3}-[A-Z]{2}"/> </xsd:restriction> </xsd:simpleType>The pattern can be translated readily into the following grammar operating on the character sequence of the lexical form:
po_sku_value(LF) :- atom_chars(LF,Charseq), sku_value(Charseq,[]). sku_value --> sku_decimal_part, hyphen, sku_alpha_part. sku_decimal_part --> digit, digit, digit. sku_alpha_part --> cap_a_z, cap_a_z.
digit --> [Char], { char_type(Char,digit) }. hyphen --> ['-']. cap_a_z --> [Char], { char_type(Char,upper) }.
t_item_atts --> [partNum=SKU], { po_sku_value(SKU) }.Or we could even put the constraint into the top-level rule for the item element:
item --> [element(item,[partNum=SKU],Content)], { po_sku_value(SKU), t_item_cont(Content,[]) }.In the general case, however, it is going to be simpler to put checking for required, forbidden, and defaulted attributes into a separate rule, which we will here call t_item_att_check:
t_item_att_check(LAVS) :- atts_present(LAVS,[partNum]).
t_item_att_check(LAVS,AugmentedLAVS) :- atts_present(LAVS,[partNum]), atts_absent(LAVS,[]), atts_defaulted(LAVS,[],AugmentedLAVS).
atts_present(LAVS,[]).Continued in <Utilities for checking required attributes 14>, <Utilities for checking required attributes 15>, <Utilities for checking forbidden attributes 16>, <Utilities for checking forbidden attributes 17>, <Utilities for checking defaulted attributes 18>
atts_present(LAVS,[HRA|RequiredTail]) :- att_present(LAVS,HRA), atts_present(LAVS,RequiredTail).
att_present([Attname=Attval|Tail],Attname). att_present([AVS|Tail],Attname) :- att_present(Tail,Attname).
att_present([],Attname) :- ...We don't have a rule for the empty list, though, because if we have run through the list of attribute-value specifications without finding one for the attribute name we are seeking, we want the predicate to fail. If necessary to prevent later maintenance programmers, or ourselves, from supplying the ‘missing’ induction basis by mistake, we could write:
att_present([],Attname) :- !, fail.
atts_absent(LAVS,[]).
atts_absent(LAVS,[HFA|ForbiddenTail]) :- not(att_present(LAVS,HFA)), atts_absent(LAVS,ForbiddenTail).
atts_defaulted(LAVS,[],LAVS). atts_defaulted(LAVS,[AN=AV|Tail],AugmentedLAVS) :- att_present(LAVS,AN), atts_defaulted(LAVS,Tail,AugmentedLAVS). atts_defaulted(LAVS,[AN=AV|Tail],[AN=AV|AugmentedLAVS]) :- not(att_present(LAVS,AN)), atts_defaulted(LAVS,Tail,AugmentedLAVS).
purchaseOrderType_att --> magic_att. usAddress_att --> magic_att. t_items_att --> magic_att. t_item_att --> magic_att. magic_att --> [xmlns=NS]. magic_att --> [xmlns:Pre=NS]. magic_att --> ['http://www.w3.org/2001/XMLSchema-instance':Localname=Value].
<apo:comment>Hurry, my lawn is going wild!</apo:comment> <apo:comment>I don't know how much longer I can hold out.</apo:comment>This is an error in the grammar above, which has been retained to illustrate the risks of hand-translation. Instead of
opt_comment --> []. opt_comment --> comment, opt_comment.(which allows arbitrarily many comments), the rule for the optional comment element should read
opt_comment --> []. opt_comment --> comment.
<apo:purchaseOrder xmlns:apo="http://www.example.com/PO1" orderDate="1999-10-19" orderDate="1999-10-20">(this can be blamed on the upstream processor, which should probably have rejected the document without producing an infoset, since single occurrence of attribute names is a well-formedness constraint)
<item partNum="926-AA"> <productName>Baby Monitor</productName> <quantity>100</quantity> <USPrice>39.98</USPrice> <shipDate>1999-05-21</shipDate> </item>
<item> <productName>Baby Monitor</productName> <quantity>1</quantity> <USPrice>39.98</USPrice> <shipDate>1999-05-21</shipDate> </item>
<zip>OX2 6NN</zip>
<quantity>one</quantity>
<USPrice>USD 39.98</USPrice>
Grune, Dick, and Ceriel J. H. Jacobs. 1990. Parsing techniques: a practical guide. New York, London: Ellis Horwood, 1990. Postscript of the book is available from the first author's Web site at http://www.cs.vu.nl/~dick/PTAPG.html
Sperberg-McQueen, C. M. 2004a. “A brief introduction to definite clause grammars and definite clause translation grammars”. Working paper prepared for the W3C XML Schema Working Group. 18 July 2004.
Sperberg-McQueen, C. M. 2004b. “Representing an XSD schema as a definite-clause translation grammar”. Working paper prepared for the W3C XML Schema Working Group. July 2004.
W3C (World Wide Web Consortium). 2001a. “XML Schema Part 0: Primer”, ed. David Fallside. W3C Recommendation, 2 May 2001. [Cambridge, Sophia-Antipolis, Tokyo: W3C] http://www.w3.org/TR/xmlschema-0/.
W3C (World Wide Web Consortium). 2001b. XML Schema Part 1: Structures, ed. Henry S. Thompson, David Beech, Murray Maloney, and Noah Mendelsohn. W3C Recommendation 2 May 2001. [Cambridge, Sophia-Antipolis, and Tokyo]: World Wide Web Consortium. http://www.w3.org/TR/2001/REC-xmlschema-1-20010502/
W3C (World Wide Web Consortium). 2001c. XML Schema Part 2: Datatypes, ed. Biron, Paul V. and Ashok Malhotra. W3C Recommendation 2 May 2001. [Cambridge, Sophia-Antipolis, and Tokyo]: World Wide Web Consortium. http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/
Wielemaker, Jan. “SWI-Prolog SGML/XML parser: Version 1.0.14, March 2001”. http://www.swi-prolog.org/packages/sgml2pl.html
/* potest: simple one-item test routine for SWI Prolog * * This DCG was generated by a literate programming * system; if maintenance is necessary, make changes * to the source (podcg.xml), not to this output file. */ grammar_version(current,dctg,'d:/home/cmsmcq/2003/schema/dctg/po4.pl'). grammar_version(old,dctg,'d:/home/cmsmcq/2003/schema/dctg/podctg.pl'). grammar_version(dcg,dcg,'d:/home/cmsmcq/2003/schema/dctg/podcg3.pl'). potest(Grammar,dcg,XMLDocument) :- write('Loading grammar '), write(Grammar), write('...'), nl, consult(Grammar), write('Loading document '), write(XMLDocument), write('...'), nl, load_structure(XMLDocument,Infoset,[dialect(xmlns),space(remove)]), (purchaseOrder(Infoset,[]) -> writeq('yes') ; writeq('no')), nl. potest(Grammar,dctg,XMLDocument) :- write('Loading grammar '), write(Grammar), write('...'), nl, consult('d:/usr/lib/prolog/msmdctg.pl'), dctg_reconsult(Grammar), write('Loading document '), write(XMLDocument), write('...'), nl, load_structure(XMLDocument,Infoset,[dialect(xmlns),space(remove)]), (e_purchaseOrder(_Structure,Infoset,[]) -> writeq('yes') ; writeq('no')), nl. potest2(XMLDocument,dcg) :- write('Loading document '), write(XMLDocument), write('...'), nl, load_structure(XMLDocument,Infoset,[dialect(xmlns),space(remove)]), (purchaseOrder(Infoset,[]) -> writeq('yes') ; writeq('no')), nl. potest2(XMLDocument,dctg) :- write('Loading document '), write(XMLDocument), write('...'), nl, load_structure(XMLDocument,Infoset,[dialect(xmlns),space(remove)]), (e_purchaseOrder(_Structure,Infoset,[]) -> writeq('yes') ; writeq('no')), nl. one(V) :- grammar_version(V,Syn,File), potest(File,Syn,'d:/usr/lib/xmlschema/po/tests/po1.xml'). two(V) :- grammar_version(V,Syn,File), potest(File,Syn,'d:/usr/lib/xmlschema/po/tests/po1.xml'), potest2('d:/usr/lib/xmlschema/po/tests/po1v10a.xml',Syn). /* Each of the following should be valid */ good(V) :- grammar_version(V,Syn,File), potest(File,Syn,'d:/usr/lib/xmlschema/po/tests/po1.xml'), potest2('d:/usr/lib/xmlschema/po/tests/po1v10a.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1v25.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1v33.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1v38.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1v62d.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1v65.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1v79.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1v80.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1v100a.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1v100b.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1v121.xml',Syn). /* Each of the following should raise an error */ bad(V) :- grammar_version(V,Syn,File), potest(File,Syn,'d:/usr/lib/xmlschema/po/tests/po1.xml'), potest2('d:/usr/lib/xmlschema/po/tests/po1e04.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e13.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e14.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e15.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e15a.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e15b.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e15c.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e16.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e16b.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e18.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e19.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e20.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e27.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e28.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e28b.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e30.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e31.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e32.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e35.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e36.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e41.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e42.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e43.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e44.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e46.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e47.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e48.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e50.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e51.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e52.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e55.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e56.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e62.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e62b.xml',Syn), /* potest2('d:/usr/lib/xmlschema/po/tests/po1e62c.xml',Syn), */ potest2('d:/usr/lib/xmlschema/po/tests/po1e63.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e64.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e68.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e70.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e70b.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e78.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e81.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e86.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e87.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e88.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e89.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e91.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e92.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e101a.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e101b.xml',Syn), /* potest2('d:/usr/lib/xmlschema/po/tests/po1e101c.xml',Syn), */ potest2('d:/usr/lib/xmlschema/po/tests/po1e101d.xml',Syn), /* potest2('d:/usr/lib/xmlschema/po/tests/po1e105bisa.xml',Syn), */ potest2('d:/usr/lib/xmlschema/po/tests/po1e105bisb.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e106.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e109.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e113.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e114.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e116.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e122a.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e122b.xml',Syn), potest2('d:/usr/lib/xmlschema/po/tests/po1e122c.xml',Syn). /* 62c, 101c, 105bisa are commented out because they blow up * number_chars/2. Need a more robust approach to checking * numbers, apparently. */ ugly(V) :- good(V), bad(V).
cd ~/2003/schema/dctg $ /cygdrive/d/usr/src/pl/bin/plcon.exe -f potests.pl -g 'one(current)' -t haltor
$ /cygdrive/d/usr/src/pl/bin/plcon.exe \ > -f d:/home/cmsmcq/2002/Prolog/potest.pl \ > -g 'good(current)' -t haltThe command line might be simpler if Prolog has a good search path, but I haven't gotten around to that yet.