import string, time, re import base64 from types import * # Representations of XML Schema types # Standard base type namespace NS_XSD = "http://www.w3.org/1999/XMLSchema" NS_XSD102K = "http://www.w3.org/2000/10/XMLSchema" # Standard namespace namespace NS_XMLNS = "http://www.w3.org/2000/xmlns/" # Standard encoding namespace NS_ENC = "http://schemas.xmlsoap.org/soap/encoding/" # Pythonware "lab" namespace NS_LAB = "http://www.pythonware.com/soap/" # For python 1.5.2 compatibility try: unicode_type = UnicodeType except: unicode_type = None class ComplexType: """Represents an XML Schema 'complex' type, which is a struct made up of simple types.""" def __init__(self, name, namespace, items=None): self.name = name self.namespace = namespace self.members = {} if items is not None: for key in items.keys(): self.members[key] = items[key] def addMember(self, namespace, name, type): self.members[name] = type def checkType(self, input): for key in self.members.keys(): self.members[key].checkType(input[key]) def writeXml(self, out, paramname, input, nscount): """Writes the XML for this type to the output. Returns the input nscount plus 1 since it uses this namespace.""" out.write('<' + paramname + ' xsi:type="q' + str(nscount) + ':' + self.name + '" xmlns:q' + str(nscount) + '="' + self.namespace + '">') nscount = nscount + 1 for k in input.keys(): paramtype = self.members[k] nscount = paramtype.writeXml(out,k,input[k],nscount) out.write('\n') return nscount + 1 def getObject(self, nodes): """Given a DOM node representing an element with this type, parse out the data representing this type and return as a Python dict.""" obj = {} node = nodes[0] # not an array type, so we assume only one for member_key in self.members.keys(): ctype = self.members[member_key] cnodes = filter(eval('lambda x: (is_element_node(x) and x.tagName == "' + member_key + '")'), node.childNodes) if len(cnodes) > 0: obj[member_key] = ctype.getObject(cnodes) return obj class ArrayType: """Represents a type which is actually another type which may occur multiple times.""" def __init__(self, actualtype, min=0, max=None): self.actualtype = actualtype if (type(actualtype) is not StringType and type(actualtype) is not unicode_type): self.setType(actualtype) self.min = min self.max = max def setType(self, actualtype): self.actualtype = actualtype self.name = actualtype.name self.namespace = actualtype.namespace self.members = actualtype.members def checkType(self, input): for item in input: self.actualtype.checkType(item) def writeXml(self, out, paramname, input, nscount): for item in input: self.actualtype.writeXml(out, paramname, item, nscount) nscount = nscount + 1 return nscount def getObject(self, nodes): """Given a DOM node representing an element with this type, parse out the data representing this type and return as a Python dict.""" result = [] for node in nodes: result.append(self.actualtype.getObject([node])) return result class SimpleType: """Represents an XML Schema 'simple' type, which is derived from a base type. It may be an enumerated type.""" trans_dispatch = {} python_types = {} def __init__(self, name, namespace, base_type=None, base_namespace=None, encoding=None): self.name = name self.namespace = namespace self.base_type = base_type self.base_namespace = base_namespace self.encoding = encoding if base_namespace is None or base_type is None: self.base_namespace = namespace self.base_type = name self.enumeration = None def addEnumElement(self, element): if self.enumeration is None: self.enumeration = [] self.enumeration.append(element) # this isn't really meaningful for most types def set_encoding(self, encoding): self.encoding = encoding # FIXME: the below doesn't work with non-simple types like binary def checkType(self, input): if not (type(input).__name__ == self.python_types[self.base_namespace + self.base_type]): raise TypeError(input + " does not match type " + self.name) if self.enumeration is not None: if input not in self.enumeration: raise TypeError(input + " is not in the enumeration list for " + self.name) def getObject(self, nodes): """Given a DOM node representing an element with this type, parse out the data representing this type and return as a simple type.""" data = [] node = nodes[0] # not an array type, so we assume only one for cnode in node.childNodes: if (cnode.nodeType == xml.dom.Node.TEXT_NODE or cnode.nodeType == xml.dom.Node.CDATA_SECTION_NODE): data.append(cnode.data) return self.translate(data) def writeXml(self, out, paramname, input, nscount): """Writes the XML for this type to the output. Returns the input nscount plus 1 since it uses this namespace.""" out.write('<' + paramname + ' xsi:type="q' + str(nscount) + ':' + self.name + '" xmlns:q' + str(nscount) + '="' + self.namespace + '">') out.write(str(input)) out.write('\n') return nscount + 1 def translate(self, input): """Translates the input to the appropriate Python type. Input is a list of strings.""" f = self.trans_dispatch[self.base_namespace + self.base_type] return f(self,input) def translate_int(self, input): return int(string.join(input,"")) trans_dispatch[NS_XSD + 'int'] = translate_int trans_dispatch[NS_XSD + 'byte'] = translate_int trans_dispatch[NS_XSD + 'unsignedByte'] = translate_int trans_dispatch[NS_XSD + 'short'] = translate_int trans_dispatch[NS_XSD + 'unsignedShort'] = translate_int trans_dispatch[NS_XSD + 'long'] = translate_int python_types[NS_XSD + 'int'] = 'int' python_types[NS_XSD + 'byte'] = 'int' python_types[NS_XSD + 'unsignedByte'] = 'int' python_types[NS_XSD + 'short'] = 'int' python_types[NS_XSD + 'unsignedShort'] = 'int' python_types[NS_XSD + 'long'] = 'int' def translate_str(self, input): try: ustr = unicode(string.join(input,"")) return ustr.encode('latin-1') except: # Python 1.5.2 compatibility return string.join(input,"") trans_dispatch[NS_XSD + 'string'] = translate_str python_types[NS_XSD + 'string'] = 'string' def translate_long(self, input): return long(string.join(input,"")) trans_dispatch[NS_XSD + 'integer'] = translate_long trans_dispatch[NS_XSD + 'unsignedInt'] = translate_long trans_dispatch[NS_XSD + 'unsignedLong'] = translate_long python_types[NS_XSD + 'integer'] = 'long' python_types[NS_XSD + 'unsignedInt'] = 'long' python_types[NS_XSD + 'unsignedLong'] = 'long' def translate_float(self, input): return float(string.join(input,"")) trans_dispatch[NS_XSD + 'double'] = translate_float trans_dispatch[NS_XSD + 'float'] = translate_float python_types[NS_XSD + 'double'] = 'float' python_types[NS_XSD + 'float'] = 'float' def translate_binary(self, input): if self.encoding == 'base64': return base64.decodestring(string.join(input,"")) trans_dispatch[NS_XSD + 'binary'] = translate_binary python_types[NS_XSD + 'binary'] = 'string' class BooleanType(SimpleType): """Represents a SOAP boolean type""" def __init__(self, namespace='http://www.w3.org/1999/XMLSchema', name='boolean'): SimpleType.__init__(self, name, namespace) def checkType(self, input): """For input, we only accept integers, either 1 or 0""" if input != 1 and input != 0: raise TypeError(str(input) + " must be either 1 or 0.") def translate(self, input): if type(input) is not TupleType: return not not input # force to a boolean else: return not not input[0] class DateTimeType(SimpleType): """Represents a SOAP date/time type""" def __init__(self, name, namespace='http://www.w3.org/1999/XMLSchema'): SimpleType.__init__(self, name, namespace) #FIXME: Implement this later def checkType(self, input): pass #FIXME: how should this work? def translate(self, input): return string.join(input,"") SIMPLE_TYPES_BASE = {'int': SimpleType('int', NS_XSD), 'byte': SimpleType('byte',NS_XSD), 'unsignedByte': SimpleType('unsignedByte',NS_XSD), 'short': SimpleType('short',NS_XSD), 'unsignedShort': SimpleType('unsignedShort',NS_XSD), 'long': SimpleType('long',NS_XSD), 'string': SimpleType('string',NS_XSD), 'integer': SimpleType('integer',NS_XSD), 'unsignedInt': SimpleType('unsignedInt',NS_XSD), 'unsignedLong': SimpleType('unsignedLong',NS_XSD), 'double': SimpleType('double',NS_XSD), 'float': SimpleType('float',NS_XSD), 'binary': SimpleType('binary',NS_XSD), 'bin.base64': SimpleType('binary',NS_XSD,encoding='base64'), 'boolean': BooleanType(), 'timeInstant': DateTimeType('timeInstant'), 'timePeriod': DateTimeType('timePeriod')} SIMPLE_TYPES={} for key in SIMPLE_TYPES_BASE.keys(): SIMPLE_TYPES[NS_XSD + key] = SIMPLE_TYPES_BASE[key] SIMPLE_TYPES[NS_XSD102K + key] = SIMPLE_TYPES_BASE[key] import xml.dom def is_element_node(node): return node.nodeType == xml.dom.Node.ELEMENT_NODE class XMLSchema: """Initializes from a DOM object representing an XML schema, which may include multiple inclusions of external schema files (which are recursively parsed). Has two primary lists: elements, which are most likely message types which are accessed by the SDL, and actual named types. These are stored as dictionaries keyed by name.""" def __init__(self, schemaobjs, targetns='', namespaces={}): """Takes a target namespace, a DOM object representing the schema, and an optional set of namespaces.""" self.targetns = targetns self.schemaobjs = schemaobjs self.namespaces = namespaces self.types = SIMPLE_TYPES.copy() self.elements = {} for schemaobj in schemaobjs: self.__load_named_types(schemaobj) self.__load_elements(schemaobj) self.__load_includes(schemaobj) def getType(self, typeName): """Given a type name which may or may not include a fully-qualified namespace, return a type. If a type can't be found, we'll try with the local target namespace and finally the default namespace.""" if self.types.has_key(typeName): return self.types[typeName] elif self.types.has_key(self.targetns + typeName): return self.types[self.targetns + typeName] elif self.types.has_key(NS_XSD + typeName): return self.types[NS_XSD + typeName] def getObject(self, domobj, element_name, type=None): """Given a DOM object representing a parsed XML document and an element which must exist at the top level of the schema, return a list of Python objects, each one representing the type for that element and each one containing the data in an instance of that data in the document object. Complex types are represented as 'XMLObject' objects so that their attributes are directly accessible; simple types are represented by their Python equivalents in most cases. Arrays are represented as lists. If type is provided it is used, otherwise we assume that the element has a type listed in the schema.""" if type is None: type = self.elements[element_name] nodes = domobj.getElementsByTagName(element_name) obj = type.get_object(nodes) return objs def __load_named_types(self, schemaobj): # Load the single types typenodes = schemaobj.getElementsByTagName('simpleType') typenodes = filter(lambda x: x.getAttributeNS('','name') != '', typenodes) simple_types = self.__get_simple_types(typenodes) self.types.update(simple_types) # Get a first pass of complex types typenodes = schemaobj.getElementsByTagName('complexType') typenodes = filter(lambda x: x.getAttributeNS('','name') != '', typenodes) complex_types = self.__get_complex_types(typenodes) self.types.update(complex_types) # and then do it again so that we make sure to pick up any types whose definitions # are found after they are actually used. Since we're doing dictionaries keyed # by namespace + name, we avoid duplication. typenodes = schemaobj.getElementsByTagName('complexType') typenodes = filter(lambda x: x.getAttributeNS('','name') != '', typenodes) complex_types = self.__get_complex_types(typenodes) self.types.update(complex_types) def __load_elements(self, schemaobj): elementnodes = filter(is_element_node, schemaobj.childNodes) elementnodes = filter(lambda x: x.tagName == 'element', elementnodes) for node in elementnodes: name, type = self.__parse_element(node) self.elements[self.targetns + name] = type def __load_includes(self, schemaobj): pass def __get_simple_types(self, typenodes): """From a set of nodes representing named 'simpleType' nodes in an XML schema doc, return a dictionary mapping names to SimpleType objects.""" simple_types = {} for node in typenodes: name, type = self.__parse_simple_type(node) simple_types[name] = type return simple_types def __parse_simple_type(self, node): name = node.getAttributeNS('','name') base = node.getAttributeNS('','base') basens = None if ':' in base: basens = base[:string.index(base,":")] basens = node.getAttributeNS(NS_XMLNS, basens) base = base[string.index(base,":") + 1:] type = SimpleType(name, self.targetns, base, basens) enumnodes = node.getElementsByTagName('enumeration') for enumnode in enumnodes: type.addEnumElement(enumnode.getAttributeNS('','value')) encodingnodes = node.getElementsByTagName('encoding') for encodingnode in encodingnodes: type.set_encoding(encodingnode.getAttributeNS('','value')) return type.namespace + type.name, type def __get_complex_types(self, typenodes): """From a set of nodes representing named 'complexType' nodes in an XML schema doc, return a dictionary mapping names to ComplexType objects.""" complex_types = {} for node in typenodes: name, type = self.__parse_complex_type(node) complex_types[name] = type return complex_types def __parse_complex_type(self, node): name = node.getAttributeNS('','name') type = ComplexType(name, self.targetns) # we only support complex types with type 'all' for the moment anodes = filter(is_element_node, node.childNodes) anodes = filter(lambda x: x.tagName == 'all', anodes) for anode in anodes: element_nodes = filter(is_element_node,anode.childNodes) element_nodes = filter(lambda x: x.tagName == 'element', element_nodes) for enode in element_nodes: ename, etype = self.__parse_element(enode) type.addMember(self.targetns, ename, etype) return type.namespace + type.name, type def __parse_element(self, node): name = node.getAttributeNS('','name') typename = node.getAttributeNS('','type') typens = None type = None if (typename == '' and len(node.childNodes) > 0): # We probably have an embedded complexType, try that first typenodes = filter(is_element_node, node.childNodes) typenodes = filter(lambda x: x.tagName == 'complexType', typenodes) if len(typenodes) > 0: ignore, type = self.__parse_complex_type(typenodes[0]) else: # Maybe an embedded simpleType typenodes = filter(is_element_node, node.childNodes) typenodes = filter(lambda x: x.tagName == 'simpleType', typenodes) if len(typenodes) > 0: ignore, type = self.__parse_simple_type(typenodes[0]) else: # try to get the typename from if ':' in typename: typenskey = typename[:string.index(typename,':')] typename = typename[string.index(typename,':')+1:] typens = node.getAttributeNS(NS_XMLNS, typenskey) if typens is None or typens == '': if self.namespaces.has_key(typenskey): typens = self.namespaces[typenskey] else: typens = '' if self.types.has_key(typens + typename): type = self.types[typens + typename] else: type = typens + typename # Handle arrays if node.getAttributeNS('','minOccurs') != '': min = int(node.getAttributeNS('','minOccurs')) max = node.getAttributeNS('','maxOccurs') if max == 'unbounded': max = None else: max = int(max) type = ArrayType(type, min, max) return name, type