#!/usr/local/bin/perl # # manpager.pl # # A script used to process sfs.texi and to output a series of manpages # formatted for *roff. Takes as arguments the input file (in all # seen cases, sfs.texi) and an output directory. Will dump all # of the manpages in the output directory. # # For the sfs.texi file to be successfully parsed by this program, # added structure is needed in the form of texi comments. For # instance, here is a typical man page input: # # @c @mp # @c @mp @command{foo}{foo processor}{1} # @c @mp @synopsis # @c @mpp foo [-i] @var{bar} # @c @mp @end synopsis # # @c @mp @description # Foo is a program used for the processing of foos, # the validation of the processing of foos, and the # generation of auto-validators thereof. # @c @mp @end description # # @c @mp @options # @table @option # @item -I process, validate and auto-generate foo-validation # routines # @end @table # @c @mp @end @options # @c @mp @end @command # @c @mp # # In this example, we've taken a .texi files, and added commands for # manpager.pl in the form of texi commands -- all lines of the form # "@c @mp ..." or "@c @mpp" # # This program works by convering texi files into POD files, and then # by running pod2man translation. Note that some functionality of # the full *roff language is lost, notably, formatting in indented # environments. However, we've done our best to work around those # problems. # # Note that this program needs certain environment variables # to be given; for sfs.texi, VERSION will be needed. # # #----------------------------------------------------------------------- # $Id: manpager.pl,v 1.15 2002/12/03 20:55:48 dm Exp $ #----------------------------------------------------------------------- # use strict; use IO::File; use Data::Dumper; use Pod::Man; use vars qw [ $infile %TEXI_FORMATS %STYPES %TEXI_CHARS @TEXI_IGNORES_ARR %TEXI_IGNORES @STYPES_ARR @NATIVE_ENVS $NES @SEEALSO %TEXI_OPEN_ENV @TEXI_IGNORES_WL_ARR %TEXI_IGNORES_WL %GLOBALS @GLOBAL_VARS $GVSWITCH @MANPAGE_SECTIONS $subsec_g %OUTPUT_ENV %SEEALSO ]; %TEXI_FORMATS = qw ( command B file F option B var I samp B env B uref I ref I strong B code B dfn B ); $TEXI_FORMATS{xref} = "See I"; %TEXI_CHARS = qw ( dots ... dslash / ); %TEXI_OPEN_ENV = qw ( item item itemx item ); @TEXI_IGNORES_ARR = qw ( cindex anchor table noindent pxref ); %TEXI_IGNORES = map { $_ => 1 } @TEXI_IGNORES_ARR; # Ignore Whole-Line (WL) @TEXI_IGNORES_WL_ARR = qw ( cindex ignore ); %TEXI_IGNORES_WL = map { $_ => 1 } @TEXI_IGNORES_WL_ARR; # Section Types @STYPES_ARR = qw ( options synopsis command ignore description example examples caveats indent table conffile display itemize bugs files ); %STYPES = map { $_ => 1 } @STYPES_ARR; @NATIVE_ENVS = qw ( example table display itemize ); $NES = join("|", @NATIVE_ENVS); # native env switch @GLOBAL_VARS = qw ( VERSION SYSCONFDIR PKGDATADIR ); $GVSWITCH = join("|", @GLOBAL_VARS); @MANPAGE_SECTIONS = qw ( synopsis description options files environment diagostics examples caveats history see_also bugs copying authors); sub read_infile { my ($fh) = @_; my $state = 0; my $lineno = 0; my $lines = []; while (<$fh>) { $lineno++; # Begin reading on "@c @mp" if ($state == 0 && /^\@c\s+\@mp\s*$/) { $state = 2; # end reading on "@c @mp @end" } elsif ($state == 1 && /^\@c\s+\@mp\s+\@end\s*$/) { $state = 0; } if ($state == 1) { my $pushit = 0; # Only Take Commented lines that are commands if ( /^\@(c|comment)\s+/ ) { if ( $_ =~ s/^\@c\s+\@mpp// or $_ =~ s/^\@c\s+\@mp/\@mp/ ) { $pushit = 1; } } else { $pushit = 1; } push @$lines, [ $_, $lineno ] if $pushit; } $state = 1 if $state == 2 ; } return $lines; } sub parse_clump { my ($lines) = @_; while (parse_globals($lines)) {} my $sec = parse_section($lines); return $sec if $sec; return parse_text($lines); } sub parse_globals { my ($lines) = @_; return 0 unless $lines and $lines->[0] and $lines->[0]->[0]; my $line = $lines->[0]->[0]; if ($line =~ /^\@mp\s+global\s+($GVSWITCH)\s+(\S*)/ ) { $GLOBALS{$1} = $2; shift @$lines; return 1; } return 0; } sub parse_text { my ($lines) = @_; my $olines; my $line; while ($line = shift @$lines) { # stop reading text on beginning or end of @mp environment if ($line->[0] =~ /^\@mp\s+\@/ or $line->[0] =~ /^\@(end\s+)?($NES)/ ) { unshift @$lines, $line; last; } push @$olines, $line; } return { "type" => "TEXT" , "data" => $olines }; } sub parse { my ($lines) = @_; my $out = []; while ($lines and $#$lines >= 0) { my $r = parse_clump($lines); last unless $r; push @$out, $r; } return $out; } sub parse_section { my ($lines) = @_; my $pair = shift @$lines; return undef unless $pair and $pair->[0]; my $line = $pair->[0]; my $start = $pair->[1]; my $stype; my $native_env = 0; if ($line =~ /^\@mp\s+\@\s/ ) { die "$infile:$start: Illegal empty command tag found\n"; } elsif ($line =~ s/^\@mp\s+\@(\w+)// ) { $stype = $1; } elsif ($line =~ s/^\@($NES)\b// ) { $stype = $1; $native_env = 1; } else { unshift @$lines, $pair; return undef; } my $lineno = $pair->[1]; if ($stype eq "end") { die "$infile:$lineno: Unbalanced \@end tag found\n"; } unless ($STYPES{$stype}) { unshift @$lines, $pair; die "$infile:$lineno: Unrecognized section type: $stype\n"; } $line =~ s/^\s*\{// ; $line =~ s/\}\s*$//; my @args = split /\}\s*\{/, $line; my @clumps = (); while (1) { my $clump = parse_clump($lines); my $line = shift @$lines; if (!$line or !$line->[0]) { die qq#$infile:EOF: Hit EOF before end of <$stype> #, qq# environment started on line $start#; } push @clumps, $clump; my $prefix = $native_env ? "" : '@mp\s+'; if ($line->[0] =~ /^$prefix\@end\s+$stype/ ) { last; } unshift @$lines, $line; } my $section = { "type" => $stype, "args" => [ @args ], "data" => [ @clumps ] , "lineno" => $lineno }; return $section; } sub output { my ($doc, $outdir) = @_; my $manpages = 0; foreach my $manpage (@$doc) { if (ref($manpage) and ref($manpage) eq "HASH" and $manpage->{type} eq "command") { output_command($manpage, $outdir); $manpages ++; } } } sub output_command { my ($mp, $outdir) = @_; die "$infile:$mp->{lineno}: 3 Arguments expected to \@mp \@command" unless $#{$mp->{args}} == 2; my $name = $mp->{args}->[0]; my $sdesc = $mp->{args}->[1]; my $msec = $mp->{args}->[2]; my $outfnp = "$outdir/$name.$msec.pod"; my $outfn = "$outdir/$name.$msec"; my ($dsec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); my @months = qw [ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ]; my $date = $months[$mon] . " " . ($year + 1900); foreach ($outfnp, $outfn) { unlink $_ if -f $_ and !(-w $_); } my $outfh = new IO::File(">$outfnp"); die "Cannot open $outfn for writing" unless $outfh; print $outfh "=head1 NAME\n\n$name - $sdesc\n\n"; my %dispatch = ( "see_also" => \&output_see_also, "authors" => \&output_authors, "files" => \&output_files ); foreach my $ss (@MANPAGE_SECTIONS) { my $subsec = $subsec_g = $mp->{subsec}->{$ss}; my $func = $dispatch{$ss}; if ($func and !$subsec) { &$func($outfh, $name, $msec); } else { output_subsection($subsec, $outfh) if $subsec; } } $outfh->close(); # if we have another global variable this should become a function; # the next thing to think about is scoping. my $v = get_value("VERSION", 0, "0.0"); my $parser = Pod::Man->new( release => "SFS $v", section => $msec, center => "SFS $v", name => $name); $parser->parse_from_file($outfnp, $outfn); unlink($outfnp); } sub output_subsection { my ($ssec_arr, $outfh) = @_; return unless $ssec_arr and $#$ssec_arr >= 0; print $outfh "=head1 ", uc($ssec_arr->[0]->{type}), "\n\n"; foreach my $ssec (@$ssec_arr) { output_normal($ssec, $outfh); # print a newline ? } print $outfh "\n"; } sub output_normal { my ($sec, $outfh) = @_; my $over = 0; my %oenv_copy; if ($sec->{type} eq "TEXT") { output_text($sec->{data}, $outfh); } elsif ($sec->{type} eq "ignore") { return; } elsif ($sec->{data}) { %oenv_copy = %OUTPUT_ENV; if ($sec->{type} eq "indent" or $sec->{type} eq "table" or $sec->{type} eq "itemize" ) { print $outfh "\n\n=over 4\n\n"; $over = 1; } if ($sec->{type} eq "itemize" ) { $OUTPUT_ENV{itemize} = 1; } # need to do special handling of example. also @display mode? if ($sec->{type} eq "example" and !($subsec_g eq "synopsis")) { $OUTPUT_ENV{example} = 1; } if ($sec->{type} eq "display" ) { $OUTPUT_ENV{display} = 1; } foreach my $ssec (@{$sec->{data}}) { output_normal($ssec, $outfh); } if ($over) { print $outfh "\n\n=back\n\n"; } %OUTPUT_ENV = %oenv_copy; } } # # order: from systerm environment variables # then, from global variables defined in file # then, default value # sub get_value { my ($var, $line, $def) = @_; my $val = $ENV{$var}; $val = $GLOBALS{$var} unless defined($val); $val = $def if (defined($def) and !defined($val)); warn "$infile:$line: Variable $var undefined\n" unless $val; $val = "" unless defined($val); return $val; } sub output_text { my ($data, $outfh) = @_; my $fswitch = join("|", keys %TEXI_FORMATS); my $cswitch = join ("|", keys %TEXI_CHARS); my $iswitch = join ("|", keys %TEXI_IGNORES); my $iwlswitch = join("|", keys %TEXI_IGNORES_WL); my $oeswitch = join ("|", keys %TEXI_OPEN_ENV); # first do commands that need to keep track of newlines my $txt = ""; my $startpar = -1; my $notflushlines = 0; my $example = $OUTPUT_ENV{example}; my $itemize = $OUTPUT_ENV{itemize}; my $display = $OUTPUT_ENV{display}; my $fmt = $display ? 0 : 1; foreach my $pair (@$data) { my $line = $pair->[0]; my $lineno = $pair->[1]; $startpar = $lineno if $startpar < 0; $line =~ s/\@($iwlswitch)\s+.*$//g; $line =~ s/\@value\{(\w*)\}/get_value($1,$lineno, "")/eg; $line =~ s{\@($oeswitch)\s+(.*)}{\n\n=$TEXI_OPEN_ENV{$1} $2\n\n}g unless $itemize; $line =~ s/^\@end.*//g; # example environments that need indentation cannot have # formatting. this is a limitation of POD if ($example) { $notflushlines++ if $line =~ /^\s/ ; } else { $line =~ s/^[ \t]+//; } if ($display) { $txt .= " "; } $txt .= $line; } $fmt = 0 if $notflushlines > 0; # # itemized lists should be bulleted if ($itemize) { $txt =~ s/\@item/\n\n=item *\n\n/gs; } # # get rid of constructs like " SFS, @ref{SFS}." # $txt =~ s/,\s+\@ref{\w*}\././sgo; # now treat all of the text as one line $txt =~ s/(\S)\-{2}/$1-/gso; $txt =~ s/(\S)\-{3}/$1--/gso; $txt =~ s/\@($iswitch)(\s)/$2/gso; $txt =~ s/\@($iswitch)\{(([^\}@]|@.)*)\}//gso; $txt =~ s/\@($cswitch)(\{\})?/$TEXI_CHARS{$1}/gso; $txt = unnest($txt); die "$infile:$startpar: Unbalanced nested expression in paragraph" unless defined($txt); my $c = 0; $txt =~ s/\@uref\{([^,\}]+),([^\}]+)\}/"$2 (I<$1>)"/egso; $txt =~ s/\@($fswitch)\{(([^\}@]|@.)*)\}/tfmt($1,$2,$fmt,\$c)/egso; if ($c == 0 and $example) { $txt =~ s/^(.)/ $1/ ; $txt =~ s/\n/\n /sg; } # strip away all white space if possible unless ($example) { $txt =~ s/^[ \t]*// ; $txt =~ s/\n[ \t]*/\n/sg; } # unescape "@@" sequences $txt =~ s/@([@\{\}])/$1/gso; print $outfh $txt if $txt; } sub tfmt { my ($cmd, $arg, $fmt, $c) = @_; $$c++; return $arg unless $fmt; return $TEXI_FORMATS{$cmd} . "<$arg>"; } sub unnest_parse { my ($line, $cmd, $err) = @_; my $ret; $ret->{cmd} = $cmd if defined($cmd); my $cflag = 1; while ($$line and length($$line) >= 0 and $cflag ) { $cflag = 0; if ($$line =~ s/^(([^}@]|@.)*?)@(\w+)\{//s ) { push @{$ret->{data}}, $1 if $1 and length($1) > 0; my $sret = unnest_parse($line, $3, $err); push @{$ret->{data}}, $sret if $sret; $$err = 1 unless $$line =~ s/^\}// ; $cflag = 1; # Must fix this! - MK 06/17/02 - Gobble all non-} or @} combinations } elsif ($$line =~ s/^([^@}]|(@.))+@?//s ) { push @{$ret->{data}}, $&; } } return ($#{$ret->{data}} >= 0 ? $ret : undef ); } sub unnest_output { my ($expr) = @_; my $ret = ""; return $ret unless defined($expr); foreach my $d (@{$expr->{data}}) { if (ref($d)) { $ret .= unnest_output($d); } elsif (!defined($expr->{cmd})) { $ret .= $d ; } else { $ret .= '@' . $expr->{cmd} . '{' . $d . '}' ; } } return $ret; } # # unnests nested expressions # # input: a b @c{@d{e}} f @h{ i j @k{l @m{n @o{p}} q} r} # output: a b @d{e} f @h{ i j }@k{l }@m{n }@o{p}@k{ q}@h{ r} # # also watch out "@}" type escape routines: # input: a b @c{@}@@@d{e}} # output: a b @c{@}@@}@d{e} # sub unnest { my ($line) = @_; my $err = 0; my $expr = unnest_parse(\$line, undef, \$err); return undef if $err; return unnest_output($expr); } sub output_authors { my ($outfh) = @_; print $outfh "\n\n=head1 AUTHOR\n\nsfsdev\@redlab.lcs.mit.edu\n\n", } sub sacmp { if ($a->[1] == $b->[1]) { return $a->[0] cmp $b->[0] ; } else { return $a->[1] <=> $b->[1] ; } } sub output_see_also { my ($outfh, $cmd, $sec) = @_; print $outfh "\n\n=head1 SEE ALSO\n\n"; my $first = 1; foreach my $seealso (@SEEALSO) { next if $seealso->[0] eq $cmd and $seealso->[1] eq $sec; print $outfh ", " unless $first; $first = 0; print $outfh $seealso->[0], "(", $seealso->[1], ")"; } print $outfh < is maintained as a Texinfo manual. If the B and B programs are properly installed at your site, the command B should give you access to the complete manual. For updates, documentation, and software distribution, please see the B website at I. EOF } sub output_files { my ($outfh, $cmd, $sec) = @_; my $conf = ($sec == 5) ? $cmd : $cmd . "_config"; my $sdesc; return unless $sdesc = $SEEALSO{5}->{$conf} ; my $etcdir = get_value("ETCDIR", 0, "/etc/sfs"); my $defdir = get_value("PKGDATADIR", 0, "/usr/local/share/sfs"); print $outfh "\n\n=head1 FILES\n\n=over 4\n\n"; foreach my $d ($etcdir, $defdir) { print $outfh "=item F<$d/$conf>\n\n"; } print $outfh "$sdesc\n\n=back\n\n", "(Files in F<$etcdir> supersede default versions", " in F<$defdir>.)\n\n"; } # # associates a man-page command with its constituent subsections # sub cleanup { my ($doc) = @_; my ($secs, $sstype); foreach my $sec (@$doc) { if ($sec->{type} eq "conffile") { $sec->{type} = "command"; $sec->{args}->[2] = 5; } next unless $sec and ref($sec) eq "HASH" and ($sec->{type} eq "command" or $sec->{type} eq "conf" ); foreach my $subsec (@{$sec->{data}}) { unless ($subsec->{type} eq "TEXT") { $sstype = $subsec->{type}; push @{$sec->{subsec}->{$sstype}}, $subsec; } } my $args = $sec->{args}; push @$secs, $sec; push @SEEALSO, [ $args->[0], $args->[2] ]; $SEEALSO{$args->[2]}->{$args->[0]} = $args->[1]; } @SEEALSO = sort sacmp @SEEALSO; return $secs; } # # do config test first # if ($#ARGV == 0 and $ARGV[0] eq "-t") { exit(0); } sub usage { die "usage: $0 \n"; } usage() unless $#ARGV == 1; $infile = $ARGV[0]; my $infh = new IO::File("<$ARGV[0]"); my $outdir = $ARGV[1]; die "Cannot open infile: $infile" unless $infile; die "Cannot find destination directory: $outdir" unless -d $outdir; my $lines = read_infile($infh); $infh->close(); my $doc = parse($lines); cleanup($doc); output($doc, $outdir, undef); exit(0);