#!/usr/bin/python -u # # Run doclifter against an entire manual tree. # Sees all files in section 1 through 9 by default. import sys, os, getopt, signal, time, re, commands, cStringIO, stat, sets mandir = "/usr/share/man" patchdir = os.path.abspath("prepatch") outdir = None patched = 0 makehtml = False xslfragment = None processed = sets.Set([]) def manfile(section, basename=""): "Return a manual file or directory based on section name." if not basename: # Return the directory return "%s/man%s/" % (mandir, section) elif basename[0] == '/': return basename elif basename.endswith(".gz") or basename.endswith(".bz2") or basename.endswith(".Z"): # We've been handed an actual filename return "%s/man%s/%s" % (mandir, section, basename) else: # We've been handed a filename section return "%s/man%s/%s.%s.gz" % (mandir, section[:1], basename, section) def analyze_manpage(manpage): "Provide log annotations based on content." exclusions = ( ("auto-generated by docbook2man-spec", "SGML DocBook"), ("Generated by db2man.xsl", "XML DocBook"), ("Automatically generated by Pod::Man", "Pod::Man"), ) output = "" # Check to see if it has DocBook masters fp = open(manpage) firstline = fp.readline() for (pattern, generator) in exclusions: if firstline.find(pattern) > -1: output += "Generated from %s\n" % generator if firstline.startswith(""): output += "This page is HTML.\n" fp.close() return output def fetch_page(file, localcopy, patch): "Grab a local copy of a man page, patching if needed." output = "" if file[-3:] == ".gz": cstat = os.system("gunzip <%s >%s" % (file, localcopy)) elif file[-4:] == ".bz2": cstat = os.system("bunzip2 <%s >%s" % (file, localcopy)) elif file[-2:] == ".Z": cstat = os.system("uncompress <%s >%s" % (file, localcopy)) else: cstat = os.system("cp %s %s" % (file, localcopy)) if os.WIFSIGNALED(cstat) or os.WEXITSTATUS(cstat): return (1, output + "testjig: copy failed, status %d", cstat) if os.path.exists(patch): here = os.getcwd() os.chdir(outdir) patch = commands.getoutput("patch --version-control=never <%s" % (patch,)) stem = os.path.basename(localcopy) os.system("rm -f %s.orig %s.rej" % (stem, stem)) os.chdir(here) if patch: output += patch + "\n" return (0, output) def getstem(file): "Reduce the name of a man page or generated HTML file to its stem" if file.endswith(".xml"): file = file[:-4] file = ".".join(file.split(".")[:-1]) # Remove section return file def make_xml(source, options, batchmode): "Make XML from specified man page." doclifter.stdout = doclifter.stderr = keep_io = cStringIO.StringIO() args = ["-I", mandir,] + options.split() + [source,] doclifter_status = doclifter.main(args, keep_io, keep_io) output = keep_io.getvalue() keep_io.close() lxmlloc = None if doclifter_status == 2: fp = open(source) contents = fp.read() inclusions = re.compile(r"\.so\s+(.*)").search(contents) fp.close() if inclusions: lxmlloc = os.path.join(outdir, getstem(inclusions.group(1)) + ".xml") return(2, lxmlloc, output) return (doclifter_status, None, output) def validate(translation): "Validate an XML file produced by translation." output = "" # If it has entity inclusions it won't validate, so don't try. # This is only a good idea because man pages that have these are # usually trivial wrappers like builtins.1 try: fp = open(translation) inclusions = re.compile("").search(fp.read()) fp.close() if inclusions: output += "Won't validate due to entity inclusion of %s\n" % inclusions.group(1) return (0, output) except IOError: output += "%s is missing.\n" % translation # Run the validation checker (bstat, validate_out) = commands.getstatusoutput("xmllint --xinclude --postvalid %s >/dev/null" % translation) if validate_out: output += validate_out + "\n" if os.WIFSIGNALED(bstat): output += "Bailing out of xmllint...\n" return (-1, output) xmllint_error_status = os.WEXITSTATUS(bstat) if xmllint_error_status: output += "xmllint error status:%s\n" % os.WEXITSTATUS(bstat) if xmllint_error_status: return (6, output) return (0, output) def format(translation, fmt, xslfragment): "Format an XML file to a specified format." output = "" here = os.getcwd() os.chdir(os.path.dirname(translation)) if xslfragment: command = "xmlto %s %s" % (fmt, os.path.basename(translation)) else: command = "xmlto -m %s %s %s" % (xslfragment, fmt, os.path.basename(translation)) (bstat, format_out) = commands.getstatusoutput(command) os.chdir(here) if format_out: output += format_out + "\n" if os.WIFSIGNALED(bstat): output += "Bailing out of %s formatting...\n" % fmt return (-1, output) format_error_status = os.WEXITSTATUS(bstat) if format_error_status: output += "format error status:%s\n" % os.WEXITSTATUS(bstat) if format_error_status: return (6, output) return (0, output) def deploy(source, target): try: os.rename(source, target) except OSError, e: return(3, "Rename of %s to %s failed, errno = %d" % (source, target, e.errno,)) return (0, "") def makelink(source, target): try: os.symlink(os.path.abspath(source), os.path.abspath(target)) except OSError: pass def singlerun(file, options, tmpstem="foo"+`os.getpid()`, batchmode=False): "Test-format a single file." global patched if not os.path.exists(file): return (0, "") output = "" if file[-3:] == ".gz": withsect = os.path.basename(file)[:-3] elif file[-4:] == ".bz2": withsect = os.path.basename(file)[:-4] elif file[-2:] == ".Z": withsect = os.path.basename(file)[:-2] else: withsect = os.path.basename(file) dot = withsect.rindex(".") section = withsect[dot+1:dot+2] subdir = os.path.join(outdir, "man" + section) stem = getstem(withsect) xmlloc = os.path.join(subdir, stem + ".xml") # Count patches here so our stats won't be off patch = os.path.join(patchdir, withsect + ".patch") if os.path.exists(patch): patched += 1 try: global processed tmpstem = os.path.join(outdir, tmpstem) source = tmpstem + ".man" # Save work by doing conversions only as needed rebuild_xml = True if batchmode and os.path.exists(xmlloc): if os.stat(file).st_mtime < os.lstat(xmlloc).st_mtime: output += "XML conversion is up to date\n" processed.discard(withsect) rebuild_xml = False htmlloc = os.path.join(subdir, stem + ".html") if rebuild_xml: # Grab the actual manual page localcopy = os.path.join(outdir, withsect) (status, output) = fetch_page(file, localcopy, patch) if (status): return (status, output) # Note the the patch was used processed.discard(withsect) # Add any annotations output += analyze_manpage(localcopy) # Move the source file into the output directory os.rename(localcopy, source) # Run the translator (doclifter_status, lxmlloc, note) = make_xml(source, options, batchmode) output += note if doclifter_status not in (0, 2): if not batchmode: output += "doclifter error status: %s\n" % doclifter_status return (doclifter_status, output) translation = tmpstem + ".man.xml" # Warn about FIX-ME problems output += commands.getoutput("grep FIX-ME " + translation + " 2>/dev/null") # If the translation went through, cleaning up consists # of putting this in its permanent location. try: # This will foo up if we ever have to symlink between dirs if batchmode and not os.path.exists(subdir): os.mkdir(subdir) except OSError, e: return(3, output + "Creation of %s failed, errno = %d\n"%(subdir,e.errno)) if doclifter_status == 2: makelink(lxmlloc, xmlloc) if doclifter_status == 0: if not makehtml: (status, more) = validate(translation) output += more if batchmode and status: os.remove(translation) try: os.remove(htmlloc) except OSError: pass return (status, output) if batchmode: (status, more) = deploy(translation, xmlloc) translation = xmlloc output += more if status: return (status, output) # Save work by doing HTML conversions only as needed rebuild_html = makehtml if batchmode and os.path.exists(htmlloc): if os.stat(xmlloc).st_mtime < os.lstat(htmlloc).st_mtime: output += "HTML conversion is up to date\n" rebuild_html = False if rebuild_html: if batchmode: htmlloc = os.path.join(subdir, stem + ".html") else: htmlloc = stem + ".html" if batchmode and stat.S_ISLNK(os.lstat(xmlloc).st_mode): makelink(os.readlink(xmlloc)[:-4]+".html", htmlloc) else: (status, more) = format(translation, "xhtml-nochunks", xslfragment) output += more if status: if batchmode: os.remove(xmlloc) try: os.remove(htmlloc) except OSError: pass return (status, output) finally: # Clean up if batchmode: if os.path.exists(source): os.remove(source) return (0, output) def sectionfiles(sections): "Generate files corresponding to a list of sections." files = [] for section in sections: files = files + map(lambda f: manfile(section, f), os.listdir(manfile(section))) files.sort() return files total = eligible = starttime = 0 def massrun(files, options): "Test against all files in specified sections." def bailout(signum, frame): print "\nBailing out with signal %d..." % signum os.system("rm -f doclifter_test%s.py doclifter_test%s.py[co]" % (os.getpid(), os.getpid())) sys.exit(0) global total, eligible, starttime total = 0 starttime = int(time.time()) eligible = len(files) doclifter_error_count = xmllint_error_count = total = 0 def report(sig, frame, out=sys.stderr): ftotal = float(total) elapsed = int(time.time()) - starttime out.write("\n%%%d of %d files in %02d:%02d:%02d, %d OK, %d patched, %d doclifter errors, %d validation failures, %2.2f%% good.\n" % \ (total, eligible, elapsed/3600, (elapsed % 3600)/60, elapsed % 60, (total - doclifter_error_count - xmllint_error_count), patched, doclifter_error_count, xmllint_error_count, (ftotal-doclifter_error_count-xmllint_error_count-patched)*100.0/ftotal)) def test(file, options): before = time.time() (status, output) = singlerun(file=file, options=options, batchmode=True) after = time.time() sys.stdout.write("! %s=%d (%2.2f)\n%s\n" % (file, status, after-before, output)) return (status, output) signal.signal(signal.SIGUSR2, report) signal.signal(signal.SIGHUP, bailout) signal.signal(signal.SIGINT, bailout) signal.signal(signal.SIGPWR, bailout) signal.signal(signal.SIGTERM, bailout) print "%Test started", time.ctime(), "\n" try: for file in files: (status, output) = test(file=file, options=options) if status == -1: break elif status in (1, 4): # Doclifter parse or internal error if output.find("is empty") == -1 and output.find("has no text") == -1 and output.find("has no content") == -1: doclifter_error_count += 1 elif status == 2: # .so inclusion pass elif status in (3, 5): # File I/O error or keyboard interrupt pass elif status == 6: xmllint_error_count += 1 total = total + 1 except KeyboardInterrupt: pass report(0, sys.stdout) htmlheader = ''' Manlifter contents page ''' htmltrailer = "\n\n" def genindex(ofp): # Collect all section/name/description triples filelist = [] section_re = re.compile("/man([^/]*)") extract_re = re.compile("([^<]*)") section_dict = {} for (root, dirs, files) in os.walk('xmlman'): for file in files: try: # Extract the manual section m = section_re.search(root) if m: section = m.group(1) else: continue section_dict[section] = [] # Extract the manual page name name = ".".join(file.split(".")[:-2]) # Extract the description file = os.path.join(root, file) fp = open(file) contents = fp.read() fp.close() m = extract_re.search(contents) if m: description = m.group(1) else: description = "(no description)" # Build an index entry filelist.append((section, file, name, description)) except IOError: pass filelist.sort() # In case the directory was pieced together by several runs for (section, file, name, description) in filelist: section_dict[section].append((file, name, description)) keys = section_dict.keys() keys.sort() for section in keys: ofp.write(htmlheader) ofp.write("

%s:

\n
\n" % section) for (file, name, description) in section_dict[section]: ofp.write("
%s
%s
\n" \ % (file, name, description)) ofp.write("
\n") ofp.write(htmltrailer) def statistics(): counts = [0] * 7 legends = ( "OK ", "???", ".so", "I/O", "!!!", "^C ", "XML", ) patchcount = re.compile("([0-9]+) patched") totalcount = 0 while True: line = sys.stdin.readline() if not line: break m = patchcount.search(line) if m: patched = int(m.group(1)) if line[0] != '!': if line.find("is empty") > 1 or line.find("has no text") > -1: print file counts[1] -= 1 counts[0] += 1 continue line = line[2:] rcolon = line.rindex("=") file = line[:rcolon] retval = line[rcolon+1:].split()[0] if file.endswith(".gz"): file = file[:-3] elif file.endswith(".bz2"): file = file[:-4] elif file.endswith(".Z"): file = file[:-2] file = os.path.basename(file) counts[int(retval)] += 1 totalcount += 1 total = sum(counts) for (i, count) in enumerate(counts): print "%d = %s: %5d %2.2f%%" % (i, legends[i], count, (count * 1.0)*100/total) print "Total:",totalcount print "Patched: %d (%2.2f%%)" % (patched, patched*100/float(totalcount)) print "With patches: %2.2f%%" % ((counts[0]+counts[2])*100/float(totalcount)) print "Without patches: %2.2f%%" % ((counts[0]+counts[2]-patched)*100/float(totalcount)) def errorclean(error_only, pattern): if pattern: pattern = re.compile(pattern) pagename = re.compile(r"! (.*)=([0-9]+)") while 1: header = sys.stdin.readline() if not header: break # Look for a log leader m = pagename.search(header) if not m: continue subject = m.group(1) status = int(m.group(2)) # Collect following error messages up to a blank line trailer = '' while 1: line = sys.stdin.readline() trailer += line if not line or line == '\n': break if pattern: # Emit by pattern if pattern.search(trailer): sys.stdout.write(subject+"\n") else: # Emit some of them by status if status == 0 or status == 2: continue if status == 1 and (trailer.find("page is empty") > -1 or trailer.find("page has no text") > -1): continue # Otherwise, emit if error_only: print subject else: sys.stdout.write(header + trailer) citereftemplate = ''' %s /man / .html ''' def doclifter_driver(options, arguments): "Lift old markup to new." global makehtml, outdir, xslfragment, patchdir filelist = [] sections = [] callopts = "" makehtml = False errorfilter = False quiet = False fval = None for (switch, val) in options: if (switch == '-d'): callopts += " -d " + val elif (switch == '-e'): errorfilter = True elif (switch == '-f'): # Translate files in the specified list fval = val elif (switch == '-h'): makehtml = True elif (switch == '-I'): # Specify the root of the manual hierarchy mandir = val elif (switch == '-p'): # Specify patch directory patchdir = os.path.abspath(val) elif (switch in ("-q", '-v')): # Set verbosity level quiet = True callopts += " " + switch elif (switch == '-s'): # Specify search list of sections sections.append(val) elif (switch == '-S'): # Generate statistics from log on stdin statistics() sys.exit(0) if not sections: sections = ["1", "2", "3", "4", "5", "6", "7", "8", "9"] if not outdir: if not arguments: outdir = 'xmlman' else: outdir = '.' # Clean/create the output directory if not arguments: if not os.path.exists(outdir): os.mkdir(outdir) # Create XSL fragment for making refentries into links xslfragment = os.path.abspath(os.path.join(outdir, "citerefentry.xsl")) fp = open(xslfragment, "w") fp.write(citereftemplate % outdir) fp.close() try: # Process args, if present if arguments: for file in arguments: for section in sections: manpage = manfile(section, file) print "Trying", manpage if os.path.exists(manpage): (status, output) = singlerun(manpage, callopts, "foobar", batchmode=False) print output break elif errorfilter: errorclean(quiet, fval) elif fval: fp = open(fval) filelist = map(lambda x: x.rstrip(), fp.readlines()) fp.close() massrun(filelist, callopts) else: global processed processed = sets.Set(map(lambda x: x.replace(".patch", "").replace(".correction", ""), os.listdir(patchdir))) massrun(sectionfiles(sections), callopts) if processed: print "# Patches not used:" for file in processed: print file finally: pass #os.remove(xslfragment) # Now, rebuild the index page if makehtml: fp = open(os.path.join(outdir, "index.html"), "w") genindex(fp) fp.close() if __name__ == "__main__": # Find a copy of doclifter for pathdir in ["."] + os.environ["PATH"].split(":"): where = os.path.join(pathdir, "doclifter") if os.path.exists(where): break else: sys.stderr.write("manlifter: can't find doclifter!\n") sys.exit(1) # Import it, so we can modify it while the test is running without # screwing up the results try: os.system("cp %s doclifter_test%s.py" % (where, os.getpid())) exec 'import doclifter_test%s' % os.getpid() exec "doclifter=doclifter_test%s" % os.getpid() finally: os.system("rm -f doclifter_test%s.py*" % os.getpid()) # Gather options (options, arguments) = getopt.getopt(sys.argv[1:], "d:ef:hI:p:qs:Sv") # Do the real work doclifter_driver(options, arguments) # End