# Copyright (c) 2004-2005 DoCoMo Euro-Labs GmbH (Munich, Germany). # Copyright (c) 2001-2004 LOGILAB S.A. (Paris, FRANCE). # # http://www.docomolab-euro.com/ -- mailto:tarlano@docomolab-euro.com # http://www.logilab.fr/ -- mailto:contact@logilab.fr # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """The narval's registry register interfaces, classes and adapters for narval's elements (i.e. in its memory). Elements instances can be created from XML (using external handlers). :version: $Revision:$ :author: Logilab :copyright: 2001-2004 LOGILAB S.A. (Paris, FRANCE) 2004 DoCoMo Euro-Labs GmbH (Munich, Germany) :contact: http://www.logilab.fr/ -- mailto:contact@logilab.fr http://www.docomolab-euro.com/ -- mailto:tarlano@docomolab-euro.com :var REGISTRY: the narval interpreter's elements registry :var match_expression: function providing access to REGISTRY.match_expression :var multi_match_expression: function using `match_expression` to match multiple elements """ __revision__ = '$Id: reader.py 20 2004-04-15 14:43:51Z syt $' __docformat__ = "restructuredtext en" import sys from threading import Lock from cStringIO import StringIO from xml.sax import ContentHandler, make_parser from xml.sax.handler import feature_namespaces from mx.DateTime import today, now from twisted.python import components from narval import NO_NS, AL_NS, TYPE_NS, element from narval.utils import Singleton class HandlerNotDefined(Exception): """trying to get an element which has no class has been associated """ class NoImplementationDefined(Exception): """trying to get an element for an interface but no implementation of the interface has been registered """ class BadAlElement(Exception): """error while parsing XML stream""" def __init__(self, msg, skip = False): self.skip_element = skip self.msg = msg Exception.__init__(self, msg) class Registry(Singleton): """the registry references available interfaces, adapters and elements classes. It's able to create elements for narval's memory from XML string or stream using the external handlers registered, or from the desired object interface. :type elmts: dict :ivar elmts: dictionary of registered XML elmts, indexed by the fully qualifed name matched by the handler (ie a (uri, name) tuple) :type global_context: dict :ivar global_context: context that will be used as global's when matching elements """ def __init__(self, base_context): super(Registry, self).__init__() self.elmts = {} self.global_context = { 'implements': components.implements, 'today': today, 'now': now } self.global_context.update(base_context) parser = make_parser()#["xml.sax.drivers2.drv_pyexpat"]) parser.setFeature(feature_namespaces, 1) self._parser = parser self._s_lock = Lock() # reader interface ####################################################### def from_stream(self, stream, level=0): """get elements from a stream (file like object) containing a XML document :type stream: file like object :param stream: the stream containing the XML data to parse :type level: int :param level: the level of the elements in the XML tree, default to 0. 0 means that the XML document is itself an element. 1 means that each child of the xml's root is an element :rtype: list :return: the list of generated elements """ handler = ALSaxHandler(self.get_class, level) self._s_lock.acquire() try: self._parser.reset() self._parser.setContentHandler(handler) self._parser.parse(stream) finally: self._s_lock.release() return handler.elements def from_string(self, string, level=0): """get elements from a string containing a XML document :type string: str :param string: the XML document to parse :type level: int :param level: the level of the elements in the XML tree, default to 0. 0 means that the XML document is itself an element. 1 means that each child of the xml's root is an element :rtype: list :return: the list of generated elements """ if isinstance(string, unicode): string = string.encode('UTF-8') return self.from_stream(StringIO(string), level) # elements management #################################################### def autoregister(self, module_name): """automatically register subclasses of in the module """ module = sys.modules[module_name] for obj in vars(module).itervalues(): try: if obj.__module__ != module.__name__: continue except AttributeError: continue try: if issubclass(obj, element.ALElement): self.register_class(obj) elif issubclass(obj, components.Interface): self.register_interface(obj) elif issubclass(obj, components.Adapter): if not getattr(obj, '__implements__', None): # abstract adapter continue for source in obj.__sources__: self.register_adapter(obj, source, *obj.__implements__) except TypeError: pass def register_interfaces(self, *interfaces): """register the given interfaces""" for iface in interfaces: self.register_interface(iface) def register_interface(self, interface): """register an interface to allow it's use in match expressions :type interface: classobj :param interface: the interface class to register """ assert not self.global_context.has_key(interface.__name__) #log(LOG_DEBUG, 'interface %s.%s', # (interface.__module__, interface.__name__)) self.global_context[interface.__name__] = interface def register_adapter(self, adapter, orig_interface, *interface_classes): """register a twisted adapter""" #log(LOG_DEBUG, 'adapter %s from %s to %s', # (adapter, orig_interface, interface_classes)) components.registerAdapter(adapter, orig_interface, *interface_classes) def register_classes(self, *element_classes): """register the given element classes""" for elmt_class in element_classes: self.register_class(elmt_class) def register_class(self, klass, default=False): """register an element's class to allow it's use in match expressions :type klass: classobj :param klass: the element's class to register :type default: bool :param default: flag indicating whether the class is a prefered default class for interfaces it implements """ uri, name = klass.__xml_element__ assert not self.elmts.has_key((uri, name)) self.elmts[(uri, name)] = klass assert not self.global_context.has_key(klass.__name__) self.global_context[klass.__name__] = klass #log(LOG_DEBUG, 'element %s.%s (%s)', # (klass.__module__, klass.__name__, name)) implements = klass.__implements__ try: iter(implements) except: implements = (implements,) def get_class(self, uri, name): """return the element class registered for uri / name :type uri: str or None (empty namespace) :param uri: the XML namespace uri of the XML node :type name: str :param name: the tag name of the XML node :type context: dict :param context: the current namespaces / prefix mapping in the document when the node is introduced :type locator: xml.sax.saxlib.Locator :param locator: SAX locator object, allowing to locate parsing error in the original XML document :rtype: `ALElement` or subclass instance :return: an element instance to handle the XML snippet under the given node :raise HandlerNotDefined: when no handler is defined for this element """ try: return self.elmts[(uri, name)]() # FIXME - if keyerror in handler -> wrong error message except KeyError: #import traceback #traceback.print_exc() raise HandlerNotDefined('No handler defined for element %s:%s' % ( uri, name)) # matches management ###################################################### def match_expression(self, expr, context): """return the result of the evaluation of in usually context has at least a "elmt" name defined, referencing a memory element :type expr: str :param expr: the expression to evaluate :type context: dict :param context: the context to use as locals during evaluation :rtype: bool :return: the result of the evaluation """ try: return eval(expr, self.global_context, context) except (SyntaxError, NameError), ex: ex.args += (expr,) raise except Exception: #import traceback #traceback.print_exc() return False class ALSaxHandler(ContentHandler): """Narval elements main SAX handler (may produce more than one object from a single xml dooument) :type elements: list :ivar elements: list of generated elements, filled during parsing :type element_level: int :ivar element_level: level of elements nodes in the document tree """ def __init__(self, elmts_factory, element_level): ContentHandler.__init__(self) self.elmts_fact = elmts_factory self.element_level = element_level self.current_level = 0 # results list (contains extracted elements) self.elements = [] # element handler self._handler = None self._elmt = None self._elmt_error = None # name spaces mapping self._ns_contexts = [{}] # contains uri -> prefix dicts self._current_context = self._ns_contexts[-1] def startElement(self, name, attrs): """ensure SAX level 1 callbacks are not called :type name: unicode :param name: the tag name :type attrs: dict :param attrs: the node's attribute values, indexed by attribute's name :raise RuntimeError: """ raise RuntimeError('Should use sax2 interface with NS support enabled') def endElement(self, name): """ensure SAX level 1 callbacks are not called :type name: unicode :param name: the tag name :raise RuntimeError: """ raise RuntimeError('Should use sax2 interface with NS support enabled') def startPrefixMapping(self, prefix, uri): """SAX callback: start namespace prefix declaration :type prefix: unicode :param prefix: the newly defined prefix :type uri: unicode :param uri: the uri for the new prefix """ if self._handler: self._handler.start_prefix_mapping(prefix, uri) self._ns_contexts.append(self._current_context.copy()) self._current_context[uri] = prefix def endPrefixMapping(self, prefix): """SAX callback: end namespace prefix declaration scope :type prefix: unicode :param prefix: the no more defined prefix """ if self._handler: self._handler.end_prefix_mapping(prefix) self._current_context = self._ns_contexts.pop() def startElementNS(self, name, qname, attrs): """SAX callback: start a new xml node if we are at the element level, get and initialize the handler associated with the element then if a handler is defined, delegate the call :type name: tuple :param name: the tag name as a tuple (uri, name) :type qname: tuple :param qname: the qualified tag name as a tuple (prefix, name) :type attrs: dict :param attrs: the node's attribute values, indexed by attribute's name as a tuple (uri, name) """ if self.element_level == self.current_level: self._elmt = elmt = self.elmts_fact(name[0], name[1]) if elmt.__child_handler__: self._handler = elmt.__child_handler__(elmt, self._current_context, self._locator) # self._handler.start(attrs) self._call_and_check(self._handler.start, attrs) elmt.init_attrs(attrs) elif self._handler: self._call_and_check(self._handler.start_element, name, attrs) # self._handler.start_element(name, attrs) elif self.current_level >= self.element_level: log(LOG_ERR, 'Child element %s is not handled on %s', (qname, self._elmt)) self.current_level += 1 def endElementNS(self, name, qname): """SAX callback: close a xml node if a handler is defined, delegate the call if we are at the element level, finalyze the handler and add the builded element to the elements list :type name: tuple :param name: the tag name as a tuple (uri, name) :type qname: tuple :param qname: the qualified tag name as a tuple (prefix, name) """ self.current_level -= 1 if self.element_level == self.current_level: if self._handler: self._handler.end() self._handler = None if self._elmt_error is not None: log(LOG_ERR, 'Skipping element (%s):\n%s' % (self._elmt, self._elmt_error)) self._elmt_error = None else: self.elements.append(self._elmt) elif self.current_level >= self.element_level and self._handler: self._call_and_check(self._handler.end_element, name) def characters(self, content): """SAX callback: get some characters if a handler is defined and we got a non empty string, delegate the call :type content: unicode :param content: the non empty string to hold """ if self._handler: self._call_and_check(self._handler.characters, content) # self._handler.characters(content) def _call_and_check(self, handler_meth, *args, **kwargs): """wraps a sub_handler's method call with a try / except BadAlElement If a method call (on handler) raises a BadAlElement, we must catch it, and check if we can skip it or not. :param handler_meth: the method to call :param args: arguments to be passed to the method """ try: handler_meth(*args, **kwargs) except BadAlElement, exc: if exc.skip_element: self._elmt_error = exc.msg else: raise REGISTRY = Registry({'NO_NS': NO_NS, 'AL_NS': AL_NS, 'TYPE_NS': TYPE_NS}) match_expression = REGISTRY.match_expression def multi_match_expression(expr, elements): """return an iterator on elements matching :type expr: str :param expr: the expression to evaluate :type elements: list :param elements: the list of elements against which the expression will be evaluated """ for element in elements: if match_expression(expr, {'elmt': element}): yield element