declaration = r""" firstLine := "This is first line" secondLine := "This is second line" fifthLine := "This is fifth line" := [ \t]* # the actual text strings are included directly # for the negative versions which is basically to # avoid the overhead of the name-ref indirection # (which should be optimised away automatically, but isn't) set := -firstLine*, firstLine, -secondLine*, secondLine, -fifthLine*, fifthLine sets := set* """ from simpleparse.parser import Parser p = Parser( declaration, 'set' ) file1 = """This is first line This is second line This is NOT first line This is NOT second line This is fifth line This is NOT fifth line """ file2 = """This is first line This is fifth line This is second line This is NOT first line This is NOT second line This is NOT fifth line """ if __name__ == "__main__": import pprint import time pprint.pprint( p.parse( file1) ) pprint.pprint( p.parse( file2) ) testData = "\n"*30000000 + file1 print 'starting parse of file 1 with 1 match at end' t = time.clock() success, results, next = p.parse( testData, "sets") print 'finished parse', time.clock()-t print 'number of results', len(results) pprint.pprint( results ) print testData = file1 * (30000000/len(file1)) print 'starting parse of file 1 with ~230,000 matches (slow)' t = time.clock() success, results, next = p.parse( testData, "sets") print 'finished parse', time.clock()-t print 'number of results', len(results)