#!/usr/local/bin/perl # # Copyright (C) 1999-2001 Ricardo Ueda Karpischek # # This is free software; you can redistribute it and/or modify # it under the terms of the version 2 of the GNU General Public # License as published by the Free Software Foundation. # # This software is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this software; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, # USA. # # # mkdoc.pl: generates the Clara OCR documentation # use strict; my($nl,$nl_empty,$input_exhausted,$appends); my($cl,$sec_title,$sec_class,$sec_number); my($verbose); my($ne); my($sn,$ssn); my($preamble,$output,$contents); my(%sname,%st); my($html,$nroff,$web,$pre); my($margin); my($date); my($version); my($fname,$docname,$other,$title); # oops! $date = "May 03 2002"; $version = "0.9.9"; # # To make easier the detection of end of sections on the # process_line procedure, this function generates the lines # # /* (book) # foo section # ----------- # */ # # and after them, generates empty lines as many times as this # procedure continues to be called. # sub gen_line { if ($appends == 0) { $nl = "/* (book)\n"; } elsif ($appends == 1) { $nl = "foo section\n"; } elsif ($appends == 2) { $nl = "-----------\n"; } elsif ($appends == 3) { $nl = "*/\n"; } else { $nl = "\n"; } ++$appends; } # # Put the next input line into $cl and make available on $nl the # next line for look-ahead tests. If the input got exhausted, # append the lines generated by gen_line. # sub read_input { # make sure that $nl contains one line if ($nl_empty) { if ($_ = ) { s/\/\* \(($other)\)/\/* /g; s/\/\* \($docname\)/\/* (book)/g; s/\/\* \(all\)/\/* (book)/g; $nl = $_; } else { &gen_line(); $input_exhausted = 1; } $nl_empty = 0; } # the next line becomes the current line $cl = $nl; # append one line to the exhausted input if ($input_exhausted) { &gen_line(); } # read next line else { if ($_ = ) { s/\/\* \(($other)\)/\/* /g; s/\/\* \($docname\)/\/* (book)/g; s/\/\* \(all\)/\/* (book)/g; $nl = $_; } else { &gen_line(); $input_exhausted = 1; } } } # # Classify the section using its title. # sub classify { # the section that describes the command-line switches if ($sec_title =~ /Reference of command-line switches/i) { $sec_class = 's'; } # sections that describe menus elsif ($sec_title =~ / menu$/) { $sec_class = 'm'; } # the "contents" section elsif ($sec_title =~ /^CONTENTS/) { $sec_class = 'c'; } # the preamble sections elsif ($sec_title =~ /^(NAME|SYNOPSIS|DESCRIPTION|WELCOME)/) { $sec_class = 'p'; } # detected section absent from the index elsif ($sec_number eq '') { $sec_class = ''; if (($verbose) && ($sec_title !~ /^foo section/)) { printf(STDERR "out of index: $fname section $sec_title"); } } # other sections else { $sec_class = ''; } } # # Close preformatted mode. # sub close_pre { # finish preformatted mode if (($html) && ($pre)) { while (substr($output,length($output)-1,1) eq "\n") { chop($output); } $output .= "\n\n"; $pre = 0; } } # # process one excerpt line # sub process_line { my($a,$b); # count lines discarding the initial empty block if (($cl =~ /\w/) || ($ne > 0)) { ++$ne; } # ignore the line 2 when reading lists if (($sec_class eq 's') && ($ne == 2)) { return; } # $cl contains an underlined title ($a = $cl) =~ s/^ *//; ($b = $nl) =~ s/^ *//; if (($nl =~ /^ *-+\n$/) && (length($a) == length($b))) { # close preformatted mode &close_pre(); # margin size $sec_title = $a; if (!($sec_title =~ /^foo section/)) { if ($verbose) { printf(STDERR "detected title: $sname{$sec_title} $sec_title"); } } $margin = length($cl) - length($a); # discard next line $nl_empty = 1; # store the just finished section if ($sec_class eq 'p') { $preamble .= $output; } elsif ($sec_class eq 'c') { $contents .= $output; } elsif ($sec_number ne '') { $st{$sec_number} = $output; if ($verbose) { printf(STDERR "$sec_number size is " . length($output) . "\n"); } } # initialize the just started section $sec_number = $sname{$a}; if ($html) { my($b,$c); ($b = $a) =~ s/\n//g; $c = "

"; if ($sec_number =~ /\d+\.\d+/) { $c .= "
"; } else { $c .= ""; } $output = sprintf("$c$sec_number $b"); $output .= "
"; } elsif ($nroff) { $output = sprintf(".SH $sec_number $a"); } else { if ($sec_number eq '') { $output = sprintf("$a"); } else { $output = sprintf("$sec_number $a"); } } &classify(); return; } # remove the margin from $cl $cl = substr($cl,$margin); if ($cl eq '') { $cl = "\n"; } # put command line switches in bold if (($sec_class eq 's') && ($ne == 1)) { # close preformatted mode &close_pre(); if ($html) { $cl = "

" . $cl; $cl .= "

"; } else { $cl =~ s/ / \\ /; $cl = ".TP\n.BI " . $cl; } } # put menu items in bold elsif (($sec_class eq 'm') && ($ne == 1)) { # close preformatted mode &close_pre(); if ($html) { $cl = "

" . $cl; $cl .= "

"; } else { $cl = ".TP\n.B " . $cl; } } # compute section numbers and memorize section names elsif ($sec_class eq 'c') { my($a); # main section if ($cl =~ /^\w/) { ++$sn; $ssn = 0; $sname{$cl} = "$sn."; if ($verbose) { printf(STDERR "registered $cl"); } $cl = "$sn. $cl"; } # subsection elsif ($cl =~ /^ \w/) { ++$ssn; $cl = substr($cl,4); $sname{$cl} = "$sn.$ssn"; if ($verbose) { printf(STDERR "registered $cl"); } $cl = " $sn.$ssn $cl"; } } # replace empty lines by the paragraph tag elsif (($cl =~ /^[ \t\r\n]*$/) && ($html) && ($pre == 0)) { $cl = "\n

\n"; } # enter preformatted mode elsif (($cl =~ /^ /) && ($html) && ($pre == 0)) { $cl = "
\n\n$cl";
        $pre = 1;
    }

    # finish preformatted mode
    elsif (($cl =~ /^[^ ]/) && ($cl ne "\n") && ($html) && ($pre)) {

        &close_pre();
    }

    # send to output
    if ($pre) {
        chomp($cl);
        if ($cl !~ /^ *$/) {
            $output .= "$cl\n";
        }
        else {
            $output .= "\n";
        }
    }
    else {
        $output .= sprintf($cl);
    }
}

#
# process one file
#
sub process_file
{
    my ($f);

    # reset current section (cs) variables
    $sn = 0;
    $ssn = 0;
    $sec_class = '';
    $output = "";

    # prepare the input
    open(F,$fname=$_[0]);
    $nl_empty = 1;
    $appends = 0;
    $input_exhausted = 0;

    # process the input
    while ($appends < 6) {

        # start of excerpt
        &read_input();
        if ($cl =~ /\/\* +\(book\)/) {

            # initialize control variables
            #printf(STDERR "entering excerpt block\n");
            $ne = $f = 0;

            # for each excerpt line
            do {

                &read_input();
                # untabfy (to do)

                # end of excerpt detected
                if ($cl =~ /\*\//) {
                    $f = 1;
                }

                # add to output
                else {
                    &process_line();
                }
            } while ($f == 0);

            #printf(STDERR "out from excerpt block\n");
        }
    }
    close(F);
}

#
# Expand macros
#
sub expand
{
    $_ = $_[0];
    s/CLARA_HOME/http:\/\/www.claraocr.org\//g;
    s/CLARA_VERSION/$version/g;
    return($_);
}

#
#
#
sub linkfy
{
    $_ = $_[0];
    s/(http:\/\/[^ ]*?)([.,;:)\]]*|)([ \n])/$1<\/A>$2$3/gms;
    return($_);
}

#
# Print Book Body
#
sub print_body
{
    my(@c,$i);
    my($sn);

    #
    # Output list of sections
    #
    @c = split("\n",$contents);
    if ($html) {
        print "$c[0]\n
    \n"; } for ($i=0; $i<=$#c; ++$i) { ($sn) = ($c[$i] =~ /^ *(\d.*?) /); if (($sn ne '') && ($sn =~ /\d+\.\d+/) && ($st{$sn} eq "")) { $c[$i] .= " (to be written)"; } } for ($i=0; $i<=$#c; ++$i) { ($sn) = ($c[$i] =~ /^ *(\d.*?) /); if ($html) { if ($sn eq '') { print "

    \n"; } else { if ($sn =~ /^([2-9]|\d\d+)\.$/) { print "

\n"; } print "
  • $c[$i]\n"; if ($sn =~ /^\d+\.$/) { print "
      \n"; } } } else { print "$c[$i]\n"; } } if ($html) { print "
    \n\n"; } else { print "\n"; } # # Output the sections # for ($i=0; $i<=$#c; ++$i) { ($sn) = ($c[$i] =~ /^ *(\d.*?) /); # the section is currently empty if (($sn ne '') && ($st{$sn} eq "")) { my($a,$c); ($a = $c[$i]) =~ s/^ *//; if ($html) { $c = "

    "; if ($sn =~ /\d+\.\d+/) { $c .= "
    "; } else { $c .= ""; } print "$c$a
    \n"; } else { print "\n.SH $a\n"; } } # the section has contents elsif ($sn ne '') { $a = &expand($st{$sn}); if ($html) { $a = &linkfy($a); print "
    \n$a"; } else { print ($a); } } } } # # Flag for verbose mode # $verbose = 0; # # initialize some output buffers. # $preamble = ""; $contents = ""; # # initialize some flags # $pre = 0; # # Process command-line arguments. # { my($i); # defaults $html = 0; $nroff = 0; $web = 0; for ($i=0; ($i<=$#ARGV) && ($ARGV[$i] =~ /^-/); ++$i) { if ($ARGV[$i] eq '-html') { $html = 1; $nroff = 0; } elsif ($ARGV[$i] eq '-nroff') { $html = 0; $nroff = 1; } elsif ($ARGV[$i] eq '-web') { $web = 1; } elsif ($ARGV[$i] eq '-book') { $docname = 'book'; $other = 'faq|devel|tutorial|glossary'; $title = "Clara OCR Advanced User's Manual"; } elsif ($ARGV[$i] eq '-faq') { $docname = 'faq'; $other = 'all|book|devel|tutorial|glossary'; $title = "The Clara OCR FAQ"; } elsif ($ARGV[$i] eq '-devel') { $docname = 'devel'; $other = 'faq|book|tutorial|glossary'; $title = "Clara OCR Developer's Guide"; } elsif ($ARGV[$i] eq '-tutorial') { $docname = 'tutorial'; $other = 'faq|book|devel|glossary'; $title = "Clara OCR Tutorial"; } elsif ($ARGV[$i] eq '-glossary') { $docname = 'glossary'; $other = 'faq|book|devel|tutorial'; $title = "Clara OCR Glossary"; } } # remaining arguments are assumed to be the source code files for (; $i <= $#ARGV; ++$i) { process_file($ARGV[$i]); } } # # output preamble. # if ($html) { my($a,$t,$u); print "Clara Book\n"; print "\n"; # remove everything until DESCRIPTION heading $preamble =~ s/^.*?DESCRIPTION.*?<\/TABLE>//ms; # links $preamble = linkfy(expand($preamble)); # add HTML heading #$u = 'http://www.claraocr.org'; $u = ''; $a = "

    "; $a .= "
    $title

    \n"; $a .= "

    \n"; $a .= "

    \n"; if ($web) { $a .= "[Main]\n"; } $a .= "[FAQ]\n"; $a .= "[Glossary]\n"; $a .= "[Tutorial]\n"; $a .= "[User's Manual]\n"; $a .= "[Developer's Guide]\n"; $a .= "
    \n"; $preamble = $a . $preamble; } elsif ($nroff) { print ".TH CLARA 1 \"$date\" \"$title v $version\"\n"; $preamble = expand($preamble); } else { print "$title\n"; print "-" x length($title) . "\n\n"; $preamble = expand($preamble); } print $preamble; # # Output the body of the Book # &print_body(); # # footings # if ($html) { print "\n"; }