# -*- test-case-name: yadis.test.test_xri -*-
"""Utility functions for handling XRIs.
@see: XRI Syntax v2.0 at the U{OASIS XRI Technical Committee<http://www.oasis-open.org/committees/tc_home.php?wg_abbrev=xri>}
"""
import re
XRI_AUTHORITIES = ['!', '=', '@', '+', '$', '(']
try:
unichr(0x10000)
except ValueError:
# narrow python build
UCSCHAR = [
(0xA0, 0xD7FF),
(0xF900, 0xFDCF),
(0xFDF0, 0xFFEF),
]
IPRIVATE = [
(0xE000, 0xF8FF),
]
else:
UCSCHAR = [
(0xA0, 0xD7FF),
(0xF900, 0xFDCF),
(0xFDF0, 0xFFEF),
(0x10000, 0x1FFFD),
(0x20000, 0x2FFFD),
(0x30000, 0x3FFFD),
(0x40000, 0x4FFFD),
(0x50000, 0x5FFFD),
(0x60000, 0x6FFFD),
(0x70000, 0x7FFFD),
(0x80000, 0x8FFFD),
(0x90000, 0x9FFFD),
(0xA0000, 0xAFFFD),
(0xB0000, 0xBFFFD),
(0xC0000, 0xCFFFD),
(0xD0000, 0xDFFFD),
(0xE1000, 0xEFFFD),
]
IPRIVATE = [
(0xE000, 0xF8FF),
(0xF0000, 0xFFFFD),
(0x100000, 0x10FFFD),
]
_escapeme_re = re.compile('[%s]' % (''.join(
map(lambda (m, n): u'%s-%s' % (unichr(m), unichr(n)),
UCSCHAR + IPRIVATE)),))
def identifierScheme(identifier):
"""Determine if this identifier is an XRI or URI.
@returns: C{"XRI"} or C{"URI"}
"""
if identifier.startswith('xri://') or identifier[0] in XRI_AUTHORITIES:
return "XRI"
else:
return "URI"
def toIRINormal(xri):
"""Transform an XRI to IRI-normal form."""
if not xri.startswith('xri://'):
xri = 'xri://' + xri
return escapeForIRI(xri)
_xref_re = re.compile('\((.*?)\)')
def _escape_xref(xref_match):
"""Escape things that need to be escaped if they're in a cross-reference.
"""
xref = xref_match.group()
xref = xref.replace('/', '%2F')
xref = xref.replace('?', '%3F')
xref = xref.replace('#', '%23')
return xref
def escapeForIRI(xri):
"""Escape things that need to be escaped when transforming to an IRI."""
xri = xri.replace('%', '%25')
xri = _xref_re.sub(_escape_xref, xri)
return xri
def toURINormal(xri):
"""Transform an XRI to URI normal form."""
return iriToURI(toIRINormal(xri))
def _percentEscapeUnicode(char_match):
c = char_match.group()
return ''.join(['%%%X' % (ord(octet),) for octet in c.encode('utf-8')])
def iriToURI(iri):
"""Transform an IRI to a URI by escaping unicode."""
# According to RFC 3987, section 3.1, "Mapping of IRIs to URIs"
return _escapeme_re.sub(_percentEscapeUnicode, iri)
def providerIsAuthoritative(providerID, canonicalID):
"""Is this provider ID authoritative for this XRI?
@returntype: bool
"""
# XXX: can't use rsplit until we require python >= 2.4.
lastbang = canonicalID.rindex('!')
parent = canonicalID[:lastbang]
return parent == providerID
def rootAuthority(xri):
"""Return the root authority for an XRI.
Example::
rootAuthority("xri://@example") == "xri://@"
@type xri: unicode
@returntype: unicode
"""
if xri.startswith('xri://'):
xri = xri[6:]
authority = xri.split('/', 1)[0]
if authority[0] == '(':
# Cross-reference.
# XXX: This is incorrect if someone nests cross-references so there
# is another close-paren in there. Hopefully nobody does that
# before we have a real xriparse function. Hopefully nobody does
# that *ever*.
root = authority[:authority.index(')') + 1]
elif authority[0] in XRI_AUTHORITIES:
# Other XRI reference.
root = authority[0]
else:
# IRI reference. XXX: Can IRI authorities have segments?
segments = authority.split('!')
segments = reduce(list.__add__,
map(lambda s: s.split('*'), segments))
root = segments[0]
return XRI(root)
def XRI(xri):
"""An XRI object allowing comparison of XRI.
Ideally, this would do full normalization and provide comparsion
operators as per XRI Syntax. Right now, it just does a bit of
canonicalization by ensuring the xri scheme is present.
@param xri: an xri string
@type xri: unicode
"""
if not xri.startswith('xri://'):
xri = 'xri://' + xri
return xri
syntax highlighted by Code2HTML, v. 0.9.1