#!/usr/bin/env python
import formatter
import htmllib
import sgmllib
import string
import sys
import os.path
import os
SKIP_DATA = [ "Next Page", "Previous Page", "Home", "Next", "Up", "<", ">"]
class HTMLParser (htmllib.HTMLParser):
is_a = 0
a = []
link = ""
def anchor_bgn (self, href, name, type):
self.is_a = 1
self.link = href
def anchor_end (self):
self.is_a = 0
def handle_data (self, data):
data = string.strip (data)
if data in SKIP_DATA:
return
if not '#' in self.link:
return
if self.link[:2] == "..":
return
if self.is_a and self.link:
self.a.append ((data, self.link))
def parse_file (filename, bookname):
fd = open (filename)
try:
p = HTMLParser (formatter.NullFormatter())
p.feed (fd.read())
p.close()
except KeyboardInterrupt:
raise SystemExit
return p.a
dirname = os.path.abspath (sys.argv[1])
bookname = os.path.basename (os.path.abspath (sys.argv[1]))
files = os.listdir (dirname)
files.sort()
funcs = []
for file in files:
if file[-5:] != ".html":
continue
print "parsing", file
links = parse_file (dirname + "/" + file, bookname)
for link in links:
if not link in funcs:
funcs.append (link)
print "Sorting function list"
funcs.sort()
filename = "%s/%s.index" % (dirname, bookname)
print "Writing index to", filename
fd = open (filename, "w")
fd.write ("\n")
for name, link in funcs:
if ' ' in name or '\n' in name:
continue
fd.write (' \n' % (name, link))
fd.write ("\n\n")
fd.close()