ports//devel/devhelp/work/devhelp-0.16/contrib/bulkman.pl

#!/usr/bin/perl

#
# * Copyright (C) 2004 Marc Britten <mbritten@monochromatic.net>
# *
# * This program is free software; you can redistribute it and/or
# * modify it under the terms of the GNU General Public License as
# * published by the Free Software Foundation; either version 2 of the
# * License, or (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# * General Public License for more details.
# *
# * You should have received a copy of the GNU General Public
# * License along with this program; if not, write to the
# * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
# * Boston, MA 02111-1307, USA.
#

# We should be fully functional with search and all that
# if using -S its useful to use -t to make a special book for a specific API
# probably want to use -f at that point too.

use Getopt::Std;

$var = @ARGV;

# depending on distro you may have to change this
$cgi = "lynxcgi:/usr/lib/cgi-bin/man/man2html";

if($var < 1) {
  print "WARNING: this script deletes all html files found in current directory!!!\n\n";
  print "Please specify man section to convert and option flags.\n";
  print "Valid flags are:\n";
  print "-f name of .devhelp file.\n";
  print "-t title of book.\n";
  print "-S search string for manpages to include. Uses grep - very CPU intensive.\n";
  print "-s man section (manditory).\n";
  print "\n To make a specific API book use all 4 options.\n";
  print "bulkman.pl -f SDL.devbook -t \"SDL API Reference\" -S \"SDL API Reference\" -s 3\n\n";
  die " bulkman.pl <opt> -s section \n";
}

getopt("f:t:s=iS:");

$section = $opt_s;

if(!$section) {
  die "Please specify a section with -s number \n";
}

$HTML = "rman -f HTML -S -r '../man$section/%s.%s.html'";

if($opt_f ne "") {
  $xmlfile = $opt_f;
  ($name) = split(/\./, $opt_f);
  $index = $name . ".html";
}
else {
  $xmlfile = "man$section.devhelp";
  $index = "man$section.html";
}

print "HTML index page is being called: $index\n";

print "Cleaning up $xmlfile\n";
unlink($xmlfile);

print "Cleaning html files\n";
unlink(<*.html>);

print "Finding MANPATH's...\n";
open(MANHANDLE, "/etc/man.config") || open(MANHANDLE, "/etc/manpath.config");
while(<MANHANDLE>) {
  if(/\b^MANPATH\b/ || /\b^MANDATORY_MANPATH\b/) {
	chop($_);
	($junk, @path) = split(/\s/, $_);
	foreach $thing (@path) {
	  #something wicked in debian makes me haveto do this
	  if($thing ne "") {
		push(@dirs, $thing);
	  }
	}
  }
}

close(MANHANDLE);

if($opt_t) {
  $title = $opt_t;
}
else {
  $title = "man$section";
}

open(INDEX, ">$index");
open(XMLHANDLE, ">$xmlfile");
print XMLHANDLE "<?xml version=\"1.0\" encoding=\"iso-8859-1\"?>\n";
print XMLHANDLE "<book name=\"man$N\" version=\"0.1\" title=\"$title\" link=\"$index\">\n";
print XMLHANDLE "<chapters>\n";

print INDEX "<HTML>\n<HEAD>\n<TITLE>$title</TITLE>\n</HEAD>\n<BODY>\n<H2>$title</H2><P><UL>\n";

print "Creating XML file and HTML conversions.\n";

if($opt_S) {
  print "Using search string: $opt_S\n";
}
# Open each dir in turn and do our thing on em
# The gziped non-gziped dual support made this a little clunky.
# so did the man2html's inability to use the .so support in manfiles to link
foreach $dir (@dirs) {
  $mandir = "$dir//man$section";
  if(opendir(DIR, "$mandir")) {
	foreach $file (sort readdir(DIR)) {
	  #	  print "Test: $file\n";
	  $fullfile = $mandir . "//" . $file;
	  if( -f $fullfile) {
		if($opt_S) {
		  if($fullfile =~ /\.gz$/) {
			$syscmd = "gunzip -c $fullfile | grep -q \"$opt_S\"";
		  } else {
			$syscmd = "grep -q \"$opt_S\" $fullfile";
		  }
		  #system returns are backwards so success == failed == next
		  if((system "$syscmd")) {	
			#			print "failed 1st: $file\n";
			# OK the file failed but that doesn't mean it's not a real match.
			# reopen the file and see if it uses .so linking to another manpage.
			if($fullfile =~ /.gz$/) {
			  $syscmd = "gunzip -c $fullfile | grep -q \"\.so\sman.\"";
			} else {
			  $syscmd = "grep -q \"\.so\sman.\" $fullfile";
			}
			if(!(system "$syscmd")) {
			  print "good .so: $file\n";
			  if($fullfile =~ /.gz$/) {
				open(BADFILE, "gunzip -c $fullfile|");
			  } else {
				open(BADFILE, "$fullfile");
			  }
			  @dat = <BADFILE>;
			  ($junk, $data) = split(/\s/, $dat[0]);
			  if(-f "$dir/$data") {
				$syscmd = "grep -q \"$opt_S\" $dir/$data";
			  } elsif(-f "$dir/$data.gz") {
				$syscmd = "gunzip -c $dir/$data | grep -q \"$opt_S\" $dir/$data";
			  }
			  else {
				print "Cannot locate file $data\n";
				$syscmd = "/bin/false";
			  }
			  if((system "$syscmd")) {
				# failed again, skip the damn thing
				next;
			  }
			  else {
				#				print "good test: $syscmd\n";
			  }
			  close(BADFILE);
			}
			else {
			  next;
			}
		  }
		}

		#		print "Good: $file\n";
		if($fullfile =~ /.gz$/) {
			$file =~ s/.gz//g;
			(!system "gunzip -c $fullfile | $HTML > $file.html 2>/dev/null");
		} else {
			(!system "$HTML -n $file $fullfile > $file.html 2>/dev/null");
		}
		($page) = split(/\./, $file);
		print XMLHANDLE "<sub name=\"$page\" link=\"$file.html\">\n";
		print XMLHANDLE "</sub>\n";
		print INDEX "<LI><A HREF=\"$file.html\">$page</A><br>";
	  }
	}
	closedir(DIR);
  }
}

print XMLHANDLE "</chapters>\n";
print XMLHANDLE "<functions>\n";

print INDEX "</UL></BODY></HTML>";
close(INDEX);

print "Fixing up XML search info.\n";

# Now we do it again to setup functions
foreach $dir (@dirs) {
  $mandir = "$dir//man$section";
  if(opendir(DIR, "$mandir")) {
	foreach $file (sort readdir(DIR)) {
	  $fullfile = $mandir . "//" . $file;
	  if( -f $fullfile) {
		($page) = split(/\./, $file);
		print XMLHANDLE "<function name=\"$page\" link=\"$file.html\"/>\n"
	  }
	}
	closedir(DIR);
  }
}

print XMLHANDLE "</functions>\n";
print XMLHANDLE "</book>\n";

close(XMLHANDLE);

# Now its time to cleanup some of the mess.  Some of the manfiles use an
# internal map system to use an already existing man file
# man2html hates that, so we figure out which ones caused an error
# and use symlinks to simulate an existing file.

print "Fixing invalid conversions with links.\n";

open(GREP, "grep -l \"Invalid Manpage\" *.html|");
@bad = <GREP>;
close(GREP);

foreach $file (@bad) {
  foreach $dir (@dirs) {
	$mandir = "$dir//man$section";

	($page) = split(/\./, $file);
	$manfile = $page . "." . $section . ".gz";
	$fullfile = $mandir . "//" . $manfile;

	if(open(BADFILE, "gunzip -c $fullfile|")) {
	  @dat = <BADFILE>;
	  ($junk, $data) = split(/\s/, $dat[0]);
	  ($junk, $data) = split(/\//, $data);
	  $link = $data . ".gz.html";

	  chop($file);
	  unlink($file);
	  symlink($link, $file);

	  close(BADFILE);
	  last;
	}
  }
}

#die "testing no fixing\n";
# Yet another loop through.  This time we need to fixup man2html BS
print "Fixing man2html formatting.\n";

open(PWD, "pwd|");
($mydir) = <PWD>;
chop($mydir);
close(PWD);

opendir(MYDIR, $mydir) || die "can't open $mydir.";
foreach $file (sort readdir(MYDIR)) {
  if(-f $file) {
	($page) = split(/\./, $file);
	# Inplace editing trick from Perl Cookbook
	open(FILE, "+<$file");
	@data = <FILE>;
	#	print $file . "\n";
	foreach $line (@data) {
	  $_ = $line; # for regex only, need to mod $line to get file to change
	  $line =~ s/Content-type: text\/html//g;

	  while($line =~ /$cgi/g) {
		$line =~ s/\"$cgi\"/\"$index\"/g;
		$line =~ s/Return to Main Contents/Documentation Index/g;
		$line =~ s/<A HREF=\"#index\">Index<\/A>//g;

		m{<B><A.*>(.*)</[aA]}sx;
		# $1 should now be our manpage
		$link = $1;
		
		m{\((\d)\).}sx;
		# $1 should now be our man section
		$sec = $1;
		if($sec =~ /\D/) {
		  $sec = $opt_s;
		  if(/(3G)/) {
			$sec = $sec . "x";
		  }
		}
		if($link ne "") {
		  	  #print $file . ": " . $link . ": " . $sec . "\n";
		  $line =~ s/$cgi....//g;
			  #print $line;
		  $line =~ s/\"$link\"/\"$link.$sec.gz.html\"/g;
			  #print $line;
		}
		else {
		  last;
		}
	  }
	}
	seek(FILE, 0, 0);
	print FILE @data;
	truncate(FILE,tell(FILE));
	close(FILE);
  }
}
closedir(DIR);
syntax highlighted by Code2HTML, v. 0.9.1