/* * The Apache Software License, Version 1.1 * * Copyright (c) 2001 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache\@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation, and was * originally based on software copyright (c) 2001, International * Business Machines, Inc., http://www.ibm.com . For more information * on the Apache Software Foundation, please see * . */ /* * $Id: IDOMPrint.cpp,v 1.7 2002/02/01 22:36:42 peiyongz Exp $ */ // --------------------------------------------------------------------------- // This sample program which invokes the DOMParser to build a DOM tree for // the specified input file. It then walks the tree, and prints out the data // as an XML file. // // Limitations: // 1. The encoding="xxx" clause in the XML header should reflect // the system local code page, but does not. // 2. Cases where the XML data contains characters that can not // be represented in the system local code page are not handled. // 3. Enabled namespace processing won't affect the output, since // DOM doesn't do namespace yet. But it will confirm that all // prefixes are correctly mapped, else you'll get errors. // --------------------------------------------------------------------------- // --------------------------------------------------------------------------- // Includes // --------------------------------------------------------------------------- #include #include #include #include #include #include #include #include #include "IDOMTreeErrorReporter.hpp" #include #include // --------------------------------------------------------------------------- // Local const data // // Note: This is the 'safe' way to do these strings. If you compiler supports // L"" style strings, and portability is not a concern, you can use // those types constants directly. // --------------------------------------------------------------------------- static const XMLCh gEndElement[] = { chOpenAngle, chForwardSlash, chNull }; static const XMLCh gEndPI[] = { chQuestion, chCloseAngle, chNull}; static const XMLCh gStartPI[] = { chOpenAngle, chQuestion, chNull }; static const XMLCh gXMLDecl1[] = { chOpenAngle, chQuestion, chLatin_x, chLatin_m, chLatin_l , chSpace, chLatin_v, chLatin_e, chLatin_r, chLatin_s, chLatin_i , chLatin_o, chLatin_n, chEqual, chDoubleQuote, chDigit_1 , chPeriod, chDigit_0, chNull }; static const XMLCh gXMLDecl2[] = { chDoubleQuote, chSpace, chLatin_e, chLatin_n, chLatin_c , chLatin_o, chLatin_d, chLatin_i, chLatin_n, chLatin_g, chEqual , chDoubleQuote, chNull }; static const XMLCh gXMLDecl3[] = { chDoubleQuote, chQuestion, chCloseAngle , chLF, chNull }; static const XMLCh gStartCDATA[] = { chOpenAngle, chBang, chOpenSquare, chLatin_C, chLatin_D, chLatin_A, chLatin_T, chLatin_A, chOpenSquare, chNull }; static const XMLCh gEndCDATA[] = { chCloseSquare, chCloseSquare, chCloseAngle, chNull }; static const XMLCh gStartComment[] = { chOpenAngle, chBang, chDash, chDash, chNull }; static const XMLCh gEndComment[] = { chDash, chDash, chCloseAngle, chNull }; static const XMLCh gStartDoctype[] = { chOpenAngle, chBang, chLatin_D, chLatin_O, chLatin_C, chLatin_T, chLatin_Y, chLatin_P, chLatin_E, chSpace, chNull }; static const XMLCh gPublic[] = { chLatin_P, chLatin_U, chLatin_B, chLatin_L, chLatin_I, chLatin_C, chSpace, chDoubleQuote, chNull }; static const XMLCh gSystem[] = { chLatin_S, chLatin_Y, chLatin_S, chLatin_T, chLatin_E, chLatin_M, chSpace, chDoubleQuote, chNull }; static const XMLCh gStartEntity[] = { chOpenAngle, chBang, chLatin_E, chLatin_N, chLatin_T, chLatin_I, chLatin_T, chLatin_Y, chSpace, chNull }; static const XMLCh gNotation[] = { chLatin_N, chLatin_D, chLatin_A, chLatin_T, chLatin_A, chSpace, chDoubleQuote, chNull }; // --------------------------------------------------------------------------- // Local classes // --------------------------------------------------------------------------- class IDOMPrintFormatTarget : public XMLFormatTarget { public: IDOMPrintFormatTarget() {}; ~IDOMPrintFormatTarget() {}; // ----------------------------------------------------------------------- // Implementations of the format target interface // ----------------------------------------------------------------------- void writeChars(const XMLByte* const toWrite, const unsigned int count, XMLFormatter * const formatter) { // Surprisingly, Solaris was the only platform on which // required the char* cast to print out the string correctly. // Without the cast, it was printing the pointer value in hex. // Quite annoying, considering every other platform printed // the string with the explicit cast to char* below. cout.write((char *) toWrite, (int) count); }; private: // ----------------------------------------------------------------------- // Unimplemented methods. // ----------------------------------------------------------------------- IDOMPrintFormatTarget(const IDOMPrintFormatTarget& other); void operator=(const IDOMPrintFormatTarget& rhs); }; // --------------------------------------------------------------------------- // Local data // // gXmlFile // The path to the file to parser. Set via command line. // // gDoNamespaces // Indicates whether namespace processing should be done. // // gDoSchema // Indicates whether schema processing should be done. // // gSchemaFullChecking // Indicates whether full schema constraint checking should be done. // // gDoCreate // Indicates whether entity reference nodes needs to be created or not // Defaults to false // // gEncodingName // The encoding we are to output in. If not set on the command line, // then it is defaults to the encoding of the input XML file. // // gValScheme // Indicates what validation scheme to use. It defaults to 'auto', but // can be set via the -v= command. // // --------------------------------------------------------------------------- static char* gXmlFile = 0; static bool gDoNamespaces = false; static bool gDoSchema = false; static bool gSchemaFullChecking = false; static bool gDoCreate = false; static const XMLCh* gEncodingName = 0; static bool gEncodingNameisOnHeap = false; static XMLFormatter::UnRepFlags gUnRepFlags = XMLFormatter::UnRep_CharRef; static IDOMParser::ValSchemes gValScheme = IDOMParser::Val_Auto; static XMLFormatter* gFormatter = 0; // --------------------------------------------------------------------------- // Forward references // --------------------------------------------------------------------------- void usage(); ostream& operator<<(ostream& target, const XMLCh * toWrite); ostream& operator<<(ostream& target, IDOM_Node *toWrite); // XMLFormatter& operator<< (XMLFormatter& strm, const XMLCh *s); // --------------------------------------------------------------------------- // // Usage() // // --------------------------------------------------------------------------- void usage() { cout << "\nUsage:\n" " IDOMPrint [options] \n\n" "This program invokes the IDOM parser, and builds the DOM tree.\n" "It then traverses the DOM tree and prints the contents of the\n" "tree for the specified XML file.\n\n" "Options:\n" " -e create entity reference nodes. Default is no expansion.\n" " -u=xxx Handle unrepresentable chars [fail | rep | ref*].\n" " -v=xxx Validation scheme [always | never | auto*].\n" " -n Enable namespace processing. Default is off.\n" " -s Enable schema processing. Default is off.\n" " -f Enable full schema constraint checking. Defaults is off.\n" " -x=XXX Use a particular encoding for output. Default is\n" " the same encoding as the input XML file. UTF-8 if\n" " input XML file has not XML declaration.\n" " -? Show this help.\n\n" " * = Default if not provided explicitly.\n\n" "The parser has intrinsic support for the following encodings:\n" " UTF-8, USASCII, ISO8859-1, UTF-16[BL]E, UCS-4[BL]E,\n" " WINDOWS-1252, IBM1140, IBM037.\n" << endl; } // --------------------------------------------------------------------------- // // main // // --------------------------------------------------------------------------- int main(int argC, char* argV[]) { int retval = 0; // Initialize the XML4C2 system try { XMLPlatformUtils::Initialize(); } catch(const XMLException &toCatch) { cerr << "Error during Xerces-c Initialization.\n" << " Exception message:" << StrX(toCatch.getMessage()) << endl; return 1; } // Check command line and extract arguments. if (argC < 2) { usage(); XMLPlatformUtils::Terminate(); return 1; } // See if non validating dom parser configuration is requested. int parmInd; for (parmInd = 1; parmInd < argC; parmInd++) { // Break out on first parm not starting with a dash if (argV[parmInd][0] != '-') break; // Watch for special case help request if (!strcmp(argV[parmInd], "-?")) { usage(); XMLPlatformUtils::Terminate(); return 2; } else if (!strncmp(argV[parmInd], "-v=", 3) || !strncmp(argV[parmInd], "-V=", 3)) { const char* const parm = &argV[parmInd][3]; if (!strcmp(parm, "never")) gValScheme = IDOMParser::Val_Never; else if (!strcmp(parm, "auto")) gValScheme = IDOMParser::Val_Auto; else if (!strcmp(parm, "always")) gValScheme = IDOMParser::Val_Always; else { cerr << "Unknown -v= value: " << parm << endl; XMLPlatformUtils::Terminate(); return 2; } } else if (!strcmp(argV[parmInd], "-n") || !strcmp(argV[parmInd], "-N")) { gDoNamespaces = true; } else if (!strcmp(argV[parmInd], "-s") || !strcmp(argV[parmInd], "-S")) { gDoSchema = true; } else if (!strcmp(argV[parmInd], "-f") || !strcmp(argV[parmInd], "-F")) { gSchemaFullChecking = true; } else if (!strcmp(argV[parmInd], "-e") || !strcmp(argV[parmInd], "-E")) { gDoCreate = true; } else if (!strncmp(argV[parmInd], "-x=", 3) || !strncmp(argV[parmInd], "-X=", 3)) { // Get out the encoding name gEncodingName = XMLString::transcode( &(argV[parmInd][3]) ); gEncodingNameisOnHeap = true; } else if (!strncmp(argV[parmInd], "-u=", 3) || !strncmp(argV[parmInd], "-U=", 3)) { const char* const parm = &argV[parmInd][3]; if (!strcmp(parm, "fail")) gUnRepFlags = XMLFormatter::UnRep_Fail; else if (!strcmp(parm, "rep")) gUnRepFlags = XMLFormatter::UnRep_Replace; else if (!strcmp(parm, "ref")) gUnRepFlags = XMLFormatter::UnRep_CharRef; else { cerr << "Unknown -u= value: " << parm << endl; XMLPlatformUtils::Terminate(); return 2; } } // else if (!strcmp(argV[parmInd], "-NoEscape")) // { // gDoEscapes = false; // } else { cerr << "Unknown option '" << argV[parmInd] << "', ignoring it.\n" << endl; } } // // And now we have to have only one parameter left and it must be // the file name. // if (parmInd + 1 != argC) { usage(); XMLPlatformUtils::Terminate(); return 1; } gXmlFile = argV[parmInd]; // // Create our parser, then attach an error handler to the parser. // The parser will call back to methods of the ErrorHandler if it // discovers errors during the course of parsing the XML document. // IDOMParser *parser = new IDOMParser; parser->setValidationScheme(gValScheme); parser->setDoNamespaces(gDoNamespaces); parser->setDoSchema(gDoSchema); parser->setValidationSchemaFullChecking(gSchemaFullChecking); IDOMTreeErrorReporter *errReporter = new IDOMTreeErrorReporter(); parser->setErrorHandler(errReporter); parser->setCreateEntityReferenceNodes(gDoCreate); // // Parse the XML file, catching any XML exceptions that might propogate // out of it. // bool errorsOccured = false; try { parser->parse(gXmlFile); } catch (const XMLException& e) { cerr << "An error occured during parsing\n Message: " << StrX(e.getMessage()) << endl; errorsOccured = true; } catch (const IDOM_DOMException& e) { cerr << "A DOM error occured during parsing\n DOMException code: " << e.code << endl; errorsOccured = true; } catch (...) { cerr << "An error occured during parsing\n " << endl; errorsOccured = true; } // If the parse was successful, output the document data from the DOM tree if (!errorsOccured && !errReporter->getSawErrors()) { IDOM_Node *doc = parser->getDocument(); IDOMPrintFormatTarget* formatTarget = new IDOMPrintFormatTarget(); // Figure out the encoding that we want to use to write the document. // If command line specified, use that, // otherwise use utf-8. // if (gEncodingName == 0) // if no encoding specified on command line { static const XMLCh utf_8[] = {chLatin_U, chLatin_T, chLatin_F, chDash, chDigit_8, 0}; gEncodingName = utf_8; } // // Create the output formatter and actually write the document HERE! // try { gFormatter = new XMLFormatter(gEncodingName, formatTarget, XMLFormatter::NoEscapes, gUnRepFlags); //print out the XML Decl node first *gFormatter << gXMLDecl1 ; *gFormatter << gXMLDecl2 << gEncodingName; *gFormatter << gXMLDecl3; cout << doc; *gFormatter << chLF; // add linefeed in requested output encoding cout << flush; } catch (XMLException& e) { cerr << "An error occurred during creation of output transcoder. Msg is:" << endl << StrX(e.getMessage()) << endl; retval = 4; } delete formatTarget; delete gFormatter; } else retval = 4; // // Clean up the error handler. The parser does not adopt handlers // since they could be many objects or one object installed for multiple // handlers. // delete errReporter; // // Delete the parser itself. Must be done prior to calling Terminate, below. // delete parser; // And call the termination method XMLPlatformUtils::Terminate(); if (gEncodingNameisOnHeap) delete (void *)gEncodingName; // const problems. return retval; } // --------------------------------------------------------------------------- // ostream << DOM_Node // // Stream out a DOM node, and, recursively, all of its children. This // function is the heart of writing a DOM tree out as XML source. Give it // a document node and it will do the whole thing. // --------------------------------------------------------------------------- ostream& operator<<(ostream& target, IDOM_Node *toWrite) { // Get the name and value out for convenience const XMLCh * nodeName = toWrite->getNodeName(); const XMLCh * nodeValue = toWrite->getNodeValue(); unsigned long lent = XMLString::stringLen(nodeValue); switch (toWrite->getNodeType()) { case IDOM_Node::TEXT_NODE: { gFormatter->formatBuf(nodeValue, lent, XMLFormatter::CharEscapes); break; } case IDOM_Node::PROCESSING_INSTRUCTION_NODE : { *gFormatter << XMLFormatter::NoEscapes << gStartPI << nodeName; if (lent > 0) { *gFormatter << chSpace << nodeValue; } *gFormatter << XMLFormatter::NoEscapes << gEndPI; break; } case IDOM_Node::DOCUMENT_NODE : { IDOM_Node *child = toWrite->getFirstChild(); while( child != 0) { target << child; // add linefeed in requested output encoding *gFormatter << chLF; target << flush; child = child->getNextSibling(); } break; } case IDOM_Node::ELEMENT_NODE : { // The name has to be representable without any escapes *gFormatter << XMLFormatter::NoEscapes << chOpenAngle << nodeName; // Output the element start tag. // Output any attributes on this element IDOM_NamedNodeMap *attributes = toWrite->getAttributes(); int attrCount = attributes->getLength(); for (int i = 0; i < attrCount; i++) { IDOM_Node *attribute = attributes->item(i); // // Again the name has to be completely representable. But the // attribute can have refs and requires the attribute style // escaping. // *gFormatter << XMLFormatter::NoEscapes << chSpace << attribute->getNodeName() << chEqual << chDoubleQuote << XMLFormatter::AttrEscapes << attribute->getNodeValue() << XMLFormatter::NoEscapes << chDoubleQuote; } // // Test for the presence of children, which includes both // text content and nested elements. // IDOM_Node *child = toWrite->getFirstChild(); if (child != 0) { // There are children. Close start-tag, and output children. // No escapes are legal here *gFormatter << XMLFormatter::NoEscapes << chCloseAngle; while( child != 0) { target << child; child = child->getNextSibling(); } // // Done with children. Output the end tag. // *gFormatter << XMLFormatter::NoEscapes << gEndElement << nodeName << chCloseAngle; } else { // // There were no children. Output the short form close of // the element start tag, making it an empty-element tag. // *gFormatter << XMLFormatter::NoEscapes << chForwardSlash << chCloseAngle; } break; } case IDOM_Node::ENTITY_REFERENCE_NODE: { #if 0 IDOM_Node *child; for (child = toWrite->getFirstChild(); child != 0; child = child->getNextSibling()) { target << child; } #else // // Instead of printing the refernece tree // we'd output the actual text as it appeared in the xml file. // This would be the case when -e option was chosen // *gFormatter << XMLFormatter::NoEscapes << chAmpersand << nodeName << chSemiColon; #endif break; } case IDOM_Node::CDATA_SECTION_NODE: { *gFormatter << XMLFormatter::NoEscapes << gStartCDATA << nodeValue << gEndCDATA; break; } case IDOM_Node::COMMENT_NODE: { *gFormatter << XMLFormatter::NoEscapes << gStartComment << nodeValue << gEndComment; break; } case IDOM_Node::DOCUMENT_TYPE_NODE: { IDOM_DocumentType *doctype = (IDOM_DocumentType *)toWrite;; *gFormatter << XMLFormatter::NoEscapes << gStartDoctype << nodeName; const XMLCh *id = doctype->getPublicId(); if (id != 0 && *id != 0) { *gFormatter << XMLFormatter::NoEscapes << chSpace << gPublic << id << chDoubleQuote; } id = doctype->getSystemId(); if (id != 0 && *id != 0) { *gFormatter << XMLFormatter::NoEscapes << chSpace << chDoubleQuote << id << chDoubleQuote; } id = doctype->getSystemId(); if (id != 0 && *id != 0) { *gFormatter << XMLFormatter::NoEscapes << chSpace << gSystem << id << chDoubleQuote; } id = doctype->getInternalSubset(); if (id != 0 && *id != 0) *gFormatter << XMLFormatter::NoEscapes << chOpenSquare << id << chCloseSquare; *gFormatter << XMLFormatter::NoEscapes << chCloseAngle; break; } case IDOM_Node::ENTITY_NODE: { *gFormatter << XMLFormatter::NoEscapes << gStartEntity << nodeName; const XMLCh * id = ((IDOM_Entity *)toWrite)->getPublicId(); if (id != 0) *gFormatter << XMLFormatter::NoEscapes << gPublic << id << chDoubleQuote; id = ((IDOM_Entity *)toWrite)->getSystemId(); if (id != 0) *gFormatter << XMLFormatter::NoEscapes << gSystem << id << chDoubleQuote; id = ((IDOM_Entity *)toWrite)->getNotationName(); if (id != 0) *gFormatter << XMLFormatter::NoEscapes << gNotation << id << chDoubleQuote; *gFormatter << XMLFormatter::NoEscapes << chCloseAngle << chLF; break; } default: cerr << "Unrecognized node type = " << (long)toWrite->getNodeType() << endl; } return target; } // --------------------------------------------------------------------------- // ostream << XMLCh * // // Stream out a DOM string. Doing this requires that we first transcode // to char * form in the default code page for the system // --------------------------------------------------------------------------- ostream& operator<< (ostream& target, const XMLCh *s) { char *p = XMLString::transcode(s); target << p; delete [] p; return target; }