######################################################################## # $Header: /var/local/cvsroot/4Suite/Ft/Xml/Lib/Print.py,v 1.27.4.1 2006/10/16 04:14:27 uogbuji Exp $ """ This module supports document serialization in XML or HTML syntax. Copyright 2005 Fourthought, Inc. (USA). Detailed license and copyright information: http://4suite.org/COPYRIGHT Project home, documentation, distributions: http://4suite.org/ """ import sys from xml.dom import Node from Ft.Xml import XML_NAMESPACE, XMLNS_NAMESPACE import XmlPrinter, XmlPrettyPrinter, HtmlPrinter, HtmlPrettyPrinter class PrintVisitor: """ Provides functions to recursively walk a DOM or Domlette object and generate SAX-like event calls for each node encountered. See the printer classes (XMLPrinter, HTMLPrinter, etc.) for the event handlers. """ def __init__(self, stream, encoding, nsHints=None, isHtml=False, indent=False, canonical=False, addedAttributes=None, removedNsDecls=None): """ Initializes an instance of the class, selecting the appropriate printer to use, depending on the isHtml and indent flags. nsHints, if given, is a dictionary of namespace mappings that help determine if namespace declarations need to be emitted when visiting the first Element node. """ if indent and isHtml: self.writer = HtmlPrettyPrinter.HtmlPrettyPrinter(stream, encoding) elif indent: self.writer = XmlPrettyPrinter.XmlPrettyPrinter(stream, encoding) elif isHtml: self.writer = HtmlPrinter.HtmlPrinter(stream, encoding) elif canonical: self.writer = XmlPrinter.CanonicalXmlPrinter(stream, encoding) else: self.writer = XmlPrinter.XmlPrinter(stream, encoding) # Namespaces self._namespaces = [{'xml' : XML_NAMESPACE}] self._nsHints = nsHints self._addedAttributes = addedAttributes or {} self._removedNsDecls = removedNsDecls or [] return _dispatch = {} def visit(self, node): """ Starts walking the tree at the given node. """ try: node_type = node.nodeType except AttributeError: raise ValueError('Not a valid DOM node %r' % node) try: visit = self._dispatch[node_type] except KeyError: # unknown node type, try and get a "pretty" name for the error node_types = {} for name in dir(Node): if name.endswith('_NODE'): node_types[getattr(Node, name)] = name node_type = node_types.get(node.node_type, node.node_type) raise ValueError('Unknown node type %r' % node_type) else: visit(self, node) return def visitNotImplemented(self, node): """ Called when an known but unsupported type of node is encountered, always raising a NotImplementedError exception. The unsupported node types are those that require DTD subset support: entity nodes, entity reference nodes, and notation nodes. """ raise NotImplementedError('Printing of %r' % node) _dispatch[Node.ENTITY_REFERENCE_NODE] = visitNotImplemented _dispatch[Node.ENTITY_NODE] = visitNotImplemented _dispatch[Node.NOTATION_NODE] = visitNotImplemented def visitDocumentFragment(self, node): """ Called when a DocumentFragment node is encountered. Just proceeds to the node's children. """ for child in node.childNodes: self.visit(child) return _dispatch[Node.DOCUMENT_FRAGMENT_NODE] = visitDocumentFragment def visitDocument(self, node): """ Called when a Document node is encountered. Just proceeds to the associated DocumentType node, if any, and then to the node's children. """ self.writer.startDocument() hasDocTypeNode = False if hasattr(node, 'doctype'): # DOM Level 1/2/3 if node.doctype: hasDocTypeNode = True self.visitDocumentType(node.doctype) children = [ x for x in node.childNodes if x != node.doctype ] if not hasDocTypeNode and hasattr(node, 'systemId'): # Domlette if node.documentElement: self.writer.doctype(node.documentElement.tagName, node.publicId, node.systemId) children = node.childNodes for child in children: self.visit(child) return _dispatch[Node.DOCUMENT_NODE] = visitDocument def visitDocumentType(self, node): """ Called when a DocumentType node is encountered. Generates a doctype event for the printer. """ self.writer.doctype(node.name, node.publicId, node.systemId) return _dispatch[Node.DOCUMENT_TYPE_NODE] = visitDocumentType def visitElement(self, node): """ Called when an Element node is encountered. Generates for the printer a startElement event, events for the node's children (including attributes), and an endElement event. """ current_nss = self._namespaces[-1].copy() # Gather the namespaces and attributes for writing namespaces = {} if self._nsHints: for prefix, namespaceUri in self._nsHints.items(): # See if this namespace needs to be emitted if current_nss.get(prefix, 0) != namespaceUri: namespaces[prefix] = namespaceUri self._nsHints = None if self._addedAttributes: attributes = self._addedAttributes self._addedAttributes = None else: attributes = {} for attr in node.attributes.values(): # xmlns="uri" or xmlns:foo="uri" if attr.namespaceURI == XMLNS_NAMESPACE: if not attr.prefix: # xmlns="uri" prefix = None else: # xmlns:foo="uri" prefix = attr.localName if current_nss.get(prefix, 0) != attr.value: namespaces[prefix] = attr.value else: attributes[attr.name] = attr.value # The element's namespaceURI/prefix mapping takes precedence if node.namespaceURI or current_nss.get(None, 0): if current_nss.get(node.prefix, 0) != node.namespaceURI: namespaces[node.prefix] = node.namespaceURI or u"" #The for prefix in self._removedNsDecls: del namespaces[prefix] tagName = getattr(node, 'tagName', getattr(node, 'nodeName')) self.writer.startElement(node.namespaceURI, tagName, namespaces, attributes) if self._removedNsDecls: self._removedNsDecls = [] # Update in scope namespaces with those we emitted current_nss.update(namespaces) self._namespaces.append(current_nss) # Write out this node's children for child in node.childNodes: self.visit(child) self.writer.endElement(node.namespaceURI, tagName) del self._namespaces[-1] return _dispatch[Node.ELEMENT_NODE] = visitElement def visitAttribute(self, node): """ Called when an Attribute node is encountered. Generates an attribute event for the printer. """ self.writer.attribute(None, node.name, node.value) return _dispatch[Node.ATTRIBUTE_NODE] = visitAttribute def visitText(self, node): """ Called when a Text node is encountered. Generates a text event for the printer. """ self.writer.text(node.data) return _dispatch[Node.TEXT_NODE] = visitText def visitCDATASection(self, node): """ Called when a CDATASection node is encountered. Generates a cdataSection event for the printer. """ self.writer.cdataSection(node.data) return _dispatch[Node.CDATA_SECTION_NODE] = visitCDATASection def visitComment(self, node): """ Called when a Comment node is encountered. Generates a comment event for the printer. """ self.writer.comment(node.data) return _dispatch[Node.COMMENT_NODE] = visitComment def visitProcessingInstruction(self, node): """ Called when a ProcessingInstruction node is encountered. Generates a processingInstruction event for the printer. """ self.writer.processingInstruction(node.target, node.data) return _dispatch[Node.PROCESSING_INSTRUCTION_NODE] = visitProcessingInstruction def Print(root, stream=sys.stdout, encoding='UTF-8', asHtml=None): """ Given a Node instance assumed to be the root of a DOM or Domlette tree, this function serializes the document to the given stream or stdout, using the given encoding (UTF-8 is the default). The asHtml flag can be used to force HTML-style serialization of an XML DOM. Otherwise, the DOM type (HTML or XML) is automatically determined. This function does nothing if root is not a Node. It is preferable that users import this from Ft.Xml.Domlette rather than directly from Ft.Xml.Lib. """ from Ft.Xml.Domlette import SeekNss if not hasattr(root, "nodeType"): return ns_hints = SeekNss(root) # When asHtml is not specified, choose output method from interface # of document node (getElementsByName is an HTML DOM only method) if asHtml is None: asHtml = hasattr(root.ownerDocument or root, 'getElementsByName') visitor = PrintVisitor(stream, encoding, ns_hints, asHtml, 0) visitor.visit(root) return def PrettyPrint(root, stream=sys.stdout, encoding='UTF-8', asHtml=None): """ Given a Node instance assumed to be the root of a DOM or Domlette tree, this function serializes the document to the given stream or stdout, using the given encoding (UTF-8 is the default). Extra whitespace is added to the output for visual formatting. The asHtml flag can be used to force HTML-style serialization of an XML DOM. Otherwise, the DOM type (HTML or XML) is automatically determined. This function does nothing if root is not a Node. Please import this from Ft.Xml.Domlette rather than directly from Ft.Xml.Lib. """ from Ft.Xml.Domlette import SeekNss if not hasattr(root, "nodeType"): return ns_hints = SeekNss(root) # When asHtml is not specified, choose output method from interface # of document node (getElementsByName is an HTML DOM only method) if asHtml is None: asHtml = hasattr(root.ownerDocument or root, 'getElementsByName') visitor = PrintVisitor(stream, encoding, ns_hints, asHtml, 1) visitor.visit(root) stream.write('\n') return def CanonicalPrint(root, stream=sys.stdout, exclusive=False, inclusivePrefixes=None): """ Given a Node instance assumed to be the root of an XML DOM or Domlette tree, this function serializes the document to the given stream or stdout, using c14n serialization, according to http://www.w3.org/TR/xml-c14n (the default) or http://www.w3.org/TR/xml-exc-c14n/ This function does nothing if root is not a Node. exclusive - if true, apply exclusive c14n according to http://www.w3.org/TR/xml-exc-c14n/ inclusivePrefixes - if exclusive is True, use this as a list of namespaces representing the "InclusiveNamespacesPrefixList" list in exclusive c14n Please import this from Ft.Xml.Domlette rather than directly from Ft.Xml.Lib. """ from Ft.Xml.Domlette import SeekNss if not hasattr(root, "nodeType"): return added_attributes = {} #All the contents should be XML NS attrs nshints = {} if not exclusive: #Roll in ancestral xml:* attributes parent_xml_attrs = root.xpath(u'ancestor::*/@xml:*') for attr in parent_xml_attrs: aname = (attr.namespaceURI, attr.nodeName) if (aname not in added_attributes and aname not in root.attributes): added_attributes[attr.nodeName] = attr.value nsnodes = root.xpath('namespace::*') inclusivePrefixes = inclusivePrefixes or [] if u'#default' in inclusivePrefixes: inclusivePrefixes.remove(u'#default') inclusivePrefixes.append(u'') decls_to_remove = [] if exclusive: used_prefixes = [ node.prefix for node in root.xpath('self::*|@*') ] declared_prefixes = [] for ans, anodename in root.attributes: if ans == XMLNS_NAMESPACE: attr = root.attributes[ans, anodename] prefix = attr.localName declared_prefixes.append(prefix) #print attr.prefix, attr.localName, attr.nodeName if attr.localName not in used_prefixes: decls_to_remove.append(prefix) #for prefix in used_prefixes: # if prefix not in declared_prefixes: # nshints[ns.nodeName] = ns.value #Roll in ancestral NS nodes for ns in nsnodes: prefix = ns.nodeName if (ns.value != XML_NAMESPACE and (XMLNS_NAMESPACE, ns.nodeName) not in root.attributes and (not exclusive or ns.localName in inclusivePrefixes)): #added_attributes[(XMLNS_NAMESPACE, ns.nodeName)] = ns.value nshints[prefix] = ns.value elif (exclusive and prefix in used_prefixes and prefix not in declared_prefixes): nshints[prefix] = ns.value visitor = PrintVisitor(stream, 'UTF-8', nshints, False, 0, True, added_attributes, decls_to_remove) visitor.visit(root) return