/* * The Apache Software License, Version 1.1 * * Copyright (c) 1999-2001 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache\@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation, and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.ibm.com . For more information * on the Apache Software Foundation, please see * . */ /* * $Id: DOMPrint.cpp,v 1.36 2002/02/01 22:35:33 peiyongz Exp $ */ // --------------------------------------------------------------------------- // This sample program which invokes the DOMParser to build a DOM tree for // the specified input file. It then walks the tree, and prints out the data // as an XML file. // // Limitations: // 1. The encoding="xxx" clause in the XML header should reflect // the system local code page, but does not. // 2. Cases where the XML data contains characters that can not // be represented in the system local code page are not handled. // 3. Enabled namespace processing won't affect the output, since // DOM doesn't do namespace yet. But it will confirm that all // prefixes are correctly mapped, else you'll get errors. // --------------------------------------------------------------------------- // --------------------------------------------------------------------------- // Includes // --------------------------------------------------------------------------- #include #include #include #include #include #include #include #include #include "DOMTreeErrorReporter.hpp" #include #include // --------------------------------------------------------------------------- // Local const data // // Note: This is the 'safe' way to do these strings. If you compiler supports // L"" style strings, and portability is not a concern, you can use // those types constants directly. // --------------------------------------------------------------------------- static const XMLCh gEndElement[] = { chOpenAngle, chForwardSlash, chNull }; static const XMLCh gEndPI[] = { chQuestion, chCloseAngle, chNull}; static const XMLCh gStartPI[] = { chOpenAngle, chQuestion, chNull }; static const XMLCh gXMLDecl1[] = { chOpenAngle, chQuestion, chLatin_x, chLatin_m, chLatin_l , chSpace, chLatin_v, chLatin_e, chLatin_r, chLatin_s, chLatin_i , chLatin_o, chLatin_n, chEqual, chDoubleQuote, chNull }; static const XMLCh gXMLDecl2[] = { chDoubleQuote, chSpace, chLatin_e, chLatin_n, chLatin_c , chLatin_o, chLatin_d, chLatin_i, chLatin_n, chLatin_g, chEqual , chDoubleQuote, chNull }; static const XMLCh gXMLDecl3[] = { chDoubleQuote, chSpace, chLatin_s, chLatin_t, chLatin_a , chLatin_n, chLatin_d, chLatin_a, chLatin_l, chLatin_o , chLatin_n, chLatin_e, chEqual, chDoubleQuote, chNull }; static const XMLCh gXMLDecl4[] = { chDoubleQuote, chQuestion, chCloseAngle , chLF, chNull }; static const XMLCh gStartCDATA[] = { chOpenAngle, chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A, chLatin_T, chLatin_A, chOpenSquare, chNull }; static const XMLCh gEndCDATA[] = { chCloseSquare, chCloseSquare, chCloseAngle, chNull }; static const XMLCh gStartComment[] = { chOpenAngle, chBang, chDash, chDash, chNull }; static const XMLCh gEndComment[] = { chDash, chDash, chCloseAngle, chNull }; static const XMLCh gStartDoctype[] = { chOpenAngle, chBang, chLatin_D, chLatin_O, chLatin_C, chLatin_T, chLatin_Y, chLatin_P, chLatin_E, chSpace, chNull }; static const XMLCh gPublic[] = { chLatin_P, chLatin_U, chLatin_B, chLatin_L, chLatin_I, chLatin_C, chSpace, chDoubleQuote, chNull }; static const XMLCh gSystem[] = { chLatin_S, chLatin_Y, chLatin_S, chLatin_T, chLatin_E, chLatin_M, chSpace, chDoubleQuote, chNull }; static const XMLCh gStartEntity[] = { chOpenAngle, chBang, chLatin_E, chLatin_N, chLatin_T, chLatin_I, chLatin_T, chLatin_Y, chSpace, chNull }; static const XMLCh gNotation[] = { chLatin_N, chLatin_D, chLatin_A, chLatin_T, chLatin_A, chSpace, chDoubleQuote, chNull }; // --------------------------------------------------------------------------- // Local classes // --------------------------------------------------------------------------- class DOMPrintFormatTarget : public XMLFormatTarget { public: DOMPrintFormatTarget() {}; ~DOMPrintFormatTarget() {}; // ----------------------------------------------------------------------- // Implementations of the format target interface // ----------------------------------------------------------------------- void writeChars(const XMLByte* const toWrite, const unsigned int count, XMLFormatter * const formatter) { // Surprisingly, Solaris was the only platform on which // required the char* cast to print out the string correctly. // Without the cast, it was printing the pointer value in hex. // Quite annoying, considering every other platform printed // the string with the explicit cast to char* below. cout.write((char *) toWrite, (int) count); }; private: // ----------------------------------------------------------------------- // Unimplemented methods. // ----------------------------------------------------------------------- DOMPrintFormatTarget(const DOMPrintFormatTarget& other); void operator=(const DOMPrintFormatTarget& rhs); }; // --------------------------------------------------------------------------- // Local data // // gXmlFile // The path to the file to parser. Set via command line. // // gDoNamespaces // Indicates whether namespace processing should be done. // // gDoSchema // Indicates whether schema processing should be done. // // gSchemaFullChecking // Indicates whether full schema constraint checking should be done. // // gDoCreate // Indicates whether entity reference nodes needs to be created or not // Defaults to false // // gEncodingName // The encoding we are to output in. If not set on the command line, // then it is defaults to the encoding of the input XML file. // // gValScheme // Indicates what validation scheme to use. It defaults to 'auto', but // can be set via the -v= command. // // --------------------------------------------------------------------------- static char* gXmlFile = 0; static bool gDoNamespaces = false; static bool gDoSchema = false; static bool gSchemaFullChecking = false; static bool gDoCreate = false; static XMLCh* gEncodingName = 0; static XMLFormatter::UnRepFlags gUnRepFlags = XMLFormatter::UnRep_CharRef; static DOMParser::ValSchemes gValScheme = DOMParser::Val_Auto; static XMLFormatter* gFormatter = 0; // --------------------------------------------------------------------------- // Forward references // --------------------------------------------------------------------------- void usage(); ostream& operator<<(ostream& target, const DOMString& toWrite); ostream& operator<<(ostream& target, DOM_Node& toWrite); XMLFormatter& operator<< (XMLFormatter& strm, const DOMString& s); // --------------------------------------------------------------------------- // // Usage() // // --------------------------------------------------------------------------- void usage() { cout << "\nUsage:\n" " DOMPrint [options] \n\n" "This program invokes the DOM parser, and builds the DOM tree.\n" "It then traverses the DOM tree and prints the contents of the\n" "tree for the specified XML file.\n\n" "Options:\n" " -e create entity reference nodes. Default is no expansion.\n" " -u=xxx Handle unrepresentable chars [fail | rep | ref*].\n" " -v=xxx Validation scheme [always | never | auto*].\n" " -n Enable namespace processing. Default is off.\n" " -s Enable schema processing. Default is off.\n" " -f Enable full schema constraint checking. Defaults to off.\n" " -x=XXX Use a particular encoding for output. Default is\n" " the same encoding as the input XML file. UTF-8 if\n" " input XML file has not XML declaration.\n" " -? Show this help.\n\n" " * = Default if not provided explicitly.\n\n" "The parser has intrinsic support for the following encodings:\n" " UTF-8, USASCII, ISO8859-1, UTF-16[BL]E, UCS-4[BL]E,\n" " WINDOWS-1252, IBM1140, IBM037.\n" << endl; } // --------------------------------------------------------------------------- // // main // // --------------------------------------------------------------------------- int main(int argC, char* argV[]) { int retval = 0; // Initialize the XML4C2 system try { XMLPlatformUtils::Initialize(); } catch(const XMLException& toCatch) { cerr << "Error during Xerces-c Initialization.\n" << " Exception message:" << DOMString(toCatch.getMessage()) << endl; return 1; } // Check command line and extract arguments. if (argC < 2) { usage(); XMLPlatformUtils::Terminate(); return 1; } // See if non validating dom parser configuration is requested. int parmInd; for (parmInd = 1; parmInd < argC; parmInd++) { // Break out on first parm not starting with a dash if (argV[parmInd][0] != '-') break; // Watch for special case help request if (!strcmp(argV[parmInd], "-?")) { usage(); XMLPlatformUtils::Terminate(); return 2; } else if (!strncmp(argV[parmInd], "-v=", 3) || !strncmp(argV[parmInd], "-V=", 3)) { const char* const parm = &argV[parmInd][3]; if (!strcmp(parm, "never")) gValScheme = DOMParser::Val_Never; else if (!strcmp(parm, "auto")) gValScheme = DOMParser::Val_Auto; else if (!strcmp(parm, "always")) gValScheme = DOMParser::Val_Always; else { cerr << "Unknown -v= value: " << parm << endl; XMLPlatformUtils::Terminate(); return 2; } } else if (!strcmp(argV[parmInd], "-n") || !strcmp(argV[parmInd], "-N")) { gDoNamespaces = true; } else if (!strcmp(argV[parmInd], "-s") || !strcmp(argV[parmInd], "-S")) { gDoSchema = true; } else if (!strcmp(argV[parmInd], "-f") || !strcmp(argV[parmInd], "-F")) { gSchemaFullChecking = true; } else if (!strcmp(argV[parmInd], "-e") || !strcmp(argV[parmInd], "-E")) { gDoCreate = true; } else if (!strncmp(argV[parmInd], "-x=", 3) || !strncmp(argV[parmInd], "-X=", 3)) { // Get out the encoding name gEncodingName = XMLString::transcode( &(argV[parmInd][3]) ); } else if (!strncmp(argV[parmInd], "-u=", 3) || !strncmp(argV[parmInd], "-U=", 3)) { const char* const parm = &argV[parmInd][3]; if (!strcmp(parm, "fail")) gUnRepFlags = XMLFormatter::UnRep_Fail; else if (!strcmp(parm, "rep")) gUnRepFlags = XMLFormatter::UnRep_Replace; else if (!strcmp(parm, "ref")) gUnRepFlags = XMLFormatter::UnRep_CharRef; else { cerr << "Unknown -u= value: " << parm << endl; XMLPlatformUtils::Terminate(); return 2; } } // else if (!strcmp(argV[parmInd], "-NoEscape")) // { // gDoEscapes = false; // } else { cerr << "Unknown option '" << argV[parmInd] << "', ignoring it.\n" << endl; } } // // And now we have to have only one parameter left and it must be // the file name. // if (parmInd + 1 != argC) { usage(); XMLPlatformUtils::Terminate(); return 1; } gXmlFile = argV[parmInd]; // // Create our parser, then attach an error handler to the parser. // The parser will call back to methods of the ErrorHandler if it // discovers errors during the course of parsing the XML document. // DOMParser *parser = new DOMParser; parser->setValidationScheme(gValScheme); parser->setDoNamespaces(gDoNamespaces); parser->setDoSchema(gDoSchema); parser->setValidationSchemaFullChecking(gSchemaFullChecking); DOMTreeErrorReporter *errReporter = new DOMTreeErrorReporter(); parser->setErrorHandler(errReporter); parser->setCreateEntityReferenceNodes(gDoCreate); parser->setToCreateXMLDeclTypeNode(true); // // Parse the XML file, catching any XML exceptions that might propogate // out of it. // bool errorsOccured = false; try { parser->parse(gXmlFile); int errorCount = parser->getErrorCount(); if (errorCount > 0) errorsOccured = true; } catch (const XMLException& e) { cerr << "An error occured during parsing\n Message: " << DOMString(e.getMessage()) << endl; errorsOccured = true; } catch (const DOM_DOMException& e) { cerr << "A DOM error occured during parsing\n DOMException code: " << e.code << endl; errorsOccured = true; } catch (...) { cerr << "An error occured during parsing\n " << endl; errorsOccured = true; } // If the parse was successful, output the document data from the DOM tree if (!errorsOccured && !errReporter->getSawErrors()) { DOM_Node doc = parser->getDocument(); DOMPrintFormatTarget* formatTarget = new DOMPrintFormatTarget(); if (gEncodingName == 0) { DOMString encNameStr("UTF-8"); DOM_Node aNode = doc.getFirstChild(); if (aNode.getNodeType() == DOM_Node::XML_DECL_NODE) { DOMString aStr = ((DOM_XMLDecl &)aNode).getEncoding(); if (aStr != "") { encNameStr = aStr; } } unsigned int lent = encNameStr.length(); gEncodingName = new XMLCh[lent + 1]; XMLString::copyNString(gEncodingName, encNameStr.rawBuffer(), lent); gEncodingName[lent] = 0; } try { gFormatter = new XMLFormatter(gEncodingName, formatTarget, XMLFormatter::NoEscapes, gUnRepFlags); cout << doc; *gFormatter << chLF; // add linefeed in requested output encoding cout << flush; } catch (XMLException& e) { cerr << "An error occurred during creation of output transcoder. Msg is:" << endl << DOMString(e.getMessage()) << endl; retval = 4; } delete formatTarget; delete gFormatter; } else retval = 4; delete [] gEncodingName; // // Clean up the error handler. The parser does not adopt handlers // since they could be many objects or one object installed for multiple // handlers. // delete errReporter; // // Delete the parser itself. Must be done prior to calling Terminate, below. // delete parser; // And call the termination method XMLPlatformUtils::Terminate(); // DomMemDebug().print(); // // The DOM document and its contents are reference counted, and need // no explicit deletion. // return retval; } // --------------------------------------------------------------------------- // ostream << DOM_Node // // Stream out a DOM node, and, recursively, all of its children. This // function is the heart of writing a DOM tree out as XML source. Give it // a document node and it will do the whole thing. // --------------------------------------------------------------------------- ostream& operator<<(ostream& target, DOM_Node& toWrite) { // Get the name and value out for convenience DOMString nodeName = toWrite.getNodeName(); DOMString nodeValue = toWrite.getNodeValue(); unsigned long lent = nodeValue.length(); switch (toWrite.getNodeType()) { case DOM_Node::TEXT_NODE: { gFormatter->formatBuf(nodeValue.rawBuffer(), lent, XMLFormatter::CharEscapes); break; } case DOM_Node::PROCESSING_INSTRUCTION_NODE : { *gFormatter << XMLFormatter::NoEscapes << gStartPI << nodeName; if (lent > 0) { *gFormatter << chSpace << nodeValue; } *gFormatter << XMLFormatter::NoEscapes << gEndPI; break; } case DOM_Node::DOCUMENT_NODE : { DOM_Node child = toWrite.getFirstChild(); while( child != 0) { target << child; // add linefeed in requested output encoding *gFormatter << chLF; target << flush; child = child.getNextSibling(); } break; } case DOM_Node::ELEMENT_NODE : { // The name has to be representable without any escapes *gFormatter << XMLFormatter::NoEscapes << chOpenAngle << nodeName; // Output the element start tag. // Output any attributes on this element DOM_NamedNodeMap attributes = toWrite.getAttributes(); int attrCount = attributes.getLength(); for (int i = 0; i < attrCount; i++) { DOM_Node attribute = attributes.item(i); // // Again the name has to be completely representable. But the // attribute can have refs and requires the attribute style // escaping. // *gFormatter << XMLFormatter::NoEscapes << chSpace << attribute.getNodeName() << chEqual << chDoubleQuote << XMLFormatter::AttrEscapes << attribute.getNodeValue() << XMLFormatter::NoEscapes << chDoubleQuote; } // // Test for the presence of children, which includes both // text content and nested elements. // DOM_Node child = toWrite.getFirstChild(); if (child != 0) { // There are children. Close start-tag, and output children. // No escapes are legal here *gFormatter << XMLFormatter::NoEscapes << chCloseAngle; while( child != 0) { target << child; child = child.getNextSibling(); } // // Done with children. Output the end tag. // *gFormatter << XMLFormatter::NoEscapes << gEndElement << nodeName << chCloseAngle; } else { // // There were no children. Output the short form close of // the element start tag, making it an empty-element tag. // *gFormatter << XMLFormatter::NoEscapes << chForwardSlash << chCloseAngle; } break; } case DOM_Node::ENTITY_REFERENCE_NODE: { DOM_Node child; #if 0 for (child = toWrite.getFirstChild(); child != 0; child = child.getNextSibling()) { target << child; } #else // // Instead of printing the refernece tree // we'd output the actual text as it appeared in the xml file. // This would be the case when -e option was chosen // *gFormatter << XMLFormatter::NoEscapes << chAmpersand << nodeName << chSemiColon; #endif break; } case DOM_Node::CDATA_SECTION_NODE: { *gFormatter << XMLFormatter::NoEscapes << gStartCDATA << nodeValue << gEndCDATA; break; } case DOM_Node::COMMENT_NODE: { *gFormatter << XMLFormatter::NoEscapes << gStartComment << nodeValue << gEndComment; break; } case DOM_Node::DOCUMENT_TYPE_NODE: { DOM_DocumentType doctype = (DOM_DocumentType &)toWrite;; *gFormatter << XMLFormatter::NoEscapes << gStartDoctype << nodeName; DOMString id = doctype.getPublicId(); if (id != 0) { *gFormatter << XMLFormatter::NoEscapes << chSpace << gPublic << id << chDoubleQuote; id = doctype.getSystemId(); if (id != 0) { *gFormatter << XMLFormatter::NoEscapes << chSpace << chDoubleQuote << id << chDoubleQuote; } } else { id = doctype.getSystemId(); if (id != 0) { *gFormatter << XMLFormatter::NoEscapes << chSpace << gSystem << id << chDoubleQuote; } } id = doctype.getInternalSubset(); if (id !=0) *gFormatter << XMLFormatter::NoEscapes << chOpenSquare << id << chCloseSquare; *gFormatter << XMLFormatter::NoEscapes << chCloseAngle; break; } case DOM_Node::ENTITY_NODE: { *gFormatter << XMLFormatter::NoEscapes << gStartEntity << nodeName; DOMString id = ((DOM_Entity &)toWrite).getPublicId(); if (id != 0) *gFormatter << XMLFormatter::NoEscapes << gPublic << id << chDoubleQuote; id = ((DOM_Entity &)toWrite).getSystemId(); if (id != 0) *gFormatter << XMLFormatter::NoEscapes << gSystem << id << chDoubleQuote; id = ((DOM_Entity &)toWrite).getNotationName(); if (id != 0) *gFormatter << XMLFormatter::NoEscapes << gNotation << id << chDoubleQuote; *gFormatter << XMLFormatter::NoEscapes << chCloseAngle << chLF; break; } case DOM_Node::XML_DECL_NODE: { DOMString str; *gFormatter << gXMLDecl1 << ((DOM_XMLDecl &)toWrite).getVersion(); *gFormatter << gXMLDecl2 << gEncodingName; str = ((DOM_XMLDecl &)toWrite).getStandalone(); if (str != 0) *gFormatter << gXMLDecl3 << str; *gFormatter << gXMLDecl4; break; } default: cerr << "Unrecognized node type = " << (long)toWrite.getNodeType() << endl; } return target; } // --------------------------------------------------------------------------- // ostream << DOMString // // Stream out a DOM string. Doing this requires that we first transcode // to char * form in the default code page for the system // --------------------------------------------------------------------------- ostream& operator<< (ostream& target, const DOMString& s) { char *p = s.transcode(); target << p; delete [] p; return target; } XMLFormatter& operator<< (XMLFormatter& strm, const DOMString& s) { unsigned int lent = s.length(); if (lent <= 0) return strm; XMLCh* buf = new XMLCh[lent + 1]; XMLString::copyNString(buf, s.rawBuffer(), lent); buf[lent] = 0; strm << buf; delete [] buf; return strm; }