# benchmark
# MSXML:  This can be downloaded from many places.  You need 3.0
# which is NOT in most newly installed Windows boxes. (650kb)
# http://download.microsoft.com/download/xml/Install/3.0/WIN98Me/EN-US/msxml3.exe
#    for a quick tutorial on MSXML 3.0, see
# http://www.perfectxml.com/articles/xml/msxml30.asp

# you should then run the COM MakePY utility on the Pythonwin menu.
# to get it going as fast as possible.


import sys
import glob
import time
import string
from types import TupleType
import cStringIO
    
def tupleTreeStats(node):
    # counts tags and attributes recursively
    # use for all reportlab parsers
    if node[1] is None:
        attrCount = 0
    else:
        attrCount = len(node[1])
    nodeCount = 1
    if node[2] is not None:
        for child in node[2]:
            if type(child) is TupleType:
                a, n = tupleTreeStats(child)
                attrCount = attrCount + a
                nodeCount = nodeCount + n
    return attrCount, nodeCount

###  pyRXP - our wrapper around Univ of Edinburgh

def getPyRXPParser():
    import pyRXP
    p = pyRXP.Parser()
    return p

def getNonValidatingPyRXPParser():
    import pyRXP
    p = pyRXP.Parser(Validate=0)
    return p

def parseWithPyRXP(parser, rawdata):
    return parser.parse(rawdata)

###  rparsexml - Aaron's very fast pure python parser

def loadRparseXML():
    #it's a module, what the heck
    from rlextra.radxml import rparsexml
    return rparsexml

def parseWithRParseXML(rparsexml, rawdata):
    #first argument is a dummy holding none
    return rparsexml.parsexml0(rawdata)[0] 

###  expattree - tree-building wrapper around pyexpat
def getExpatParser():
    import expattree
    return expattree.ExpatTreeParser()
    
def parseWithExpat(expatParser, rawdata):
    #first argument is a dummy holding none
    return expatParser.parse(rawdata)

####### minidom - non-validating DOM parser in the Python distro

def loadMiniDOM():
    import xml.dom.minidom
    return xml.dom.minidom

def parseWithMiniDOM(dom_module, rawdata):
    #parser is None
    return dom_module.parseString(rawdata)
    
def statsWithMiniDOM(node):
    return (1, 0)

#########  Microsoft XML Parser via COM ######################


def loadMSXML30():
    from win32com.client import Dispatch
    msx = Dispatch('Microsoft.XMLDOM')
    return msx

def parseWithMSXML30(msx, rawdata):
    msx.loadXML(rawdata)
    return msx

def statsWithMSXML30(node):
    #not done
    return (1,0)    

###########4DOM ###############
def load4DOM():
    from xml.dom.ext.reader import PyExpat
    from xml.dom import Node
    reader = PyExpat.Reader()
    return reader

def parseWith4DOM(reader, rawdata):
    return reader.fromString(rawdata)


def statsWith4DOM(node):
    #node
    return (1,0)

def loadCDomlette():
    from Ft.Lib import cDomlettec
    return cDomlettec

def parseWithCDomlette(modul, rawdata):
    io = cStringIO.StringIO(rawdata)
    return modul.parse(io, '')

def statsWithCDomlette(node):
    #node
    return (1,0)

##########put them all together################

TESTMAP = [
    # name of parser; function to initialize if needed;
    # function to parse; function to do stats
    ('pyRXP', getPyRXPParser, parseWithPyRXP, tupleTreeStats),
    ('pyRXP_nonvalidating', getNonValidatingPyRXPParser, parseWithPyRXP, tupleTreeStats),
    ('rparsexml', loadRparseXML, parseWithRParseXML, tupleTreeStats),
    ('expat', getExpatParser, parseWithExpat, tupleTreeStats),
    ('minidom', loadMiniDOM, parseWithMiniDOM, statsWithMiniDOM),
    ('msxml30', loadMSXML30, parseWithMSXML30, statsWithMSXML30),
    ('4dom', load4DOM, parseWith4DOM, statsWith4DOM),
    ('cdomlette', loadCDomlette, parseWithCDomlette, statsWithCDomlette)
    ]    

def interact(testName=None, dtd=1, pause='unknown'):

    # if no DTD requested, trim off first 2 lines; the lack of
    # a DTD reference will put validating parsers into non-
    # validating mode
    if dtd:
        sampleText = open('rml_a.xml').read()
    else:
        print 'DTD declaration removed, non-validating'
        lines = open('rml_a.xml').readlines()[2:]
        sampleText = string.join(lines,'')
        
    if testName:
        found = 0
        for row in TESTMAP:
            if row[0] == testName:
                found = 1
                (name, loadFunc, parseFunc, statFunc) = row
                break
        if not found:
            print 'parser %s not found, please select' % testName

    if not testName:            
    # interactive, show stuff
        print "Interactive benchmark suite for Python XML tree-parsers."
        print 'Using sample XML file %d bytes long' % len(sampleText)
        print "Parsers available:"
        i = 1
        for (name, a, b, c) in TESTMAP:
            print '\t%d.  %s' % (i, name)
            i = i + 1
        print
        inp = raw_input('Parser number (or x to exit) > ')
        if inp == 'x':
            print 'bye'
            return
        else:
            num = int(inp)
            (name, loadFunc, parseFunc, statFunc) = TESTMAP[num-1]

    # force pause to 1 or 0 by asking
    if pause == 'unknown': 
        inp = raw_input("Shall we do memory tests?  i.e. you look at Task Manager? y/n > ")
        assert inp in 'yn', 'enter "y" or "n".  Please run again!'
        pause = (inp == 'y')


    print 'testing %s' % testName
    #load the parser
    t0 = time.clock()
    parser = loadFunc()
    loadTime = time.clock() - t0
    if pause:
        baseMem = float(raw_input("Pre-parsing: please input python process memory in kb > "))
    t1 = time.clock()
    parsedOutput = parseFunc(parser, sampleText)
    t2 = time.clock()
    parseTime = t2 - t1
    
    if pause:
        totalMem = float(raw_input('Post-parsing: please input python process memory in kb > '))
        usedMem = totalMem - baseMem
        memFactor = usedMem * 1024.0 / len(sampleText)
    t3 = time.clock()
    n, a = statFunc(parsedOutput)
    t4 = time.clock()
    traverseTime = t4 - t3
    print 'counted %d tags, %d attributes' % (n, a)
    if pause:
        print '%s: init %0.4f, parse %0.4f, traverse %0.4f, mem used %dkb, mem factor %0.2f' % (
            name, loadTime, parseTime, traverseTime, usedMem, memFactor)
    else:
        print '%s: init %0.4f, parse %0.4f, traverse %0.4f' % (
            name, loadTime, parseTime, traverseTime)
    print

    
if __name__=='__main__':
    import sys
    args = sys.argv[:]
    if '-nodtd' in args:
        dtd=0
        args.remove('-nodtd')
    else:
        dtd=1
        
    if '-pause' in args:
        pause = 1
        args.remove('-pause')
    elif '-nopause' in args:
        pause = 0
        args.remove('-nopause')
    else:
        pause = 'unknown'  # it will ask
    if len(args) > 1:
        testName = args[1]
    else:
        testName = None
    interact(testName, dtd, pause=pause)