///////////////////////////////////////////////////////////////////////////// /* Copyright 2001 Ronald S. Burkey. Latex support Copyright 2001 Joe Cherry. This file is part of GutenMark. GutenMark is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. GutenMark is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GutenMark; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Filename: OutputHtml.c Purpose: Writes the output HTML file. Mods: 01/13/02 RSB Split off from AutoMark.c. 01/18/02 RSB Added NoPrefatory and PageBreaks. */ #include #include #include #include #include "AutoMark.h" // These are the HTML character-entity names of all alphabetics with accents. static const char *DiacriticalNames[64] = { "Agrave", "Aacute", "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave", "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc", "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml", "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc", "atilde", "auml", "aring", "aelig", "ccedil", "egrave", "eacute", "ecirc", "euml", "igrave", "iacute", "icirc", "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde", "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc", "uuml", "yacute", "thorn", "yuml" }; //-------------------------------------------------------------------------- // Outputs a single character (from the text) to the HTML output file. // This is more useful in some cases than directly outputting the character, // because automatic conversion to HTML character entities is done. static void OutputCharacter (int ForceNumeric, FILE * OutputFile, int c) { if (ForceNumeric) { if (c == '<') fprintf (OutputFile, "<"); else if (c == '>') fprintf (OutputFile, ">"); else if (c == '&') fprintf (OutputFile, "&"); else if (c < 128) putc (c, OutputFile); else fprintf (OutputFile, "&#%d;", c); } else { if (c == '<') fprintf (OutputFile, "<"); else if (c == '>') fprintf (OutputFile, ">"); else if (c == '&') fprintf (OutputFile, "&"); else if (c < 128) putc (c, OutputFile); else if (c >= 192 && c <= 255) fprintf (OutputFile, "&%s;", DiacriticalNames[c - 192]); else fprintf (OutputFile, "&#%d;", c); } } //-------------------------------------------------------------------------- // Handles just the output, after all analysis has been completed. // Returns 0 on success, non-zero on error. int OutputHtml (FILE * OutputFile, AnalysisDataset * Dataset) { int ErrorCode, c, lastc, Remove, AtEnd; MarkupRecord Mark; unsigned long Offset; char s[256]; int InParagraph = 0, CharsInLine = 0; int InPreformatted = 0, LastNewline = 1; int AfterBreak = 0; char *InputFilename = NULL; int Removed = -1; int JumpedPastHeader = 0; int InPrefatoryArea = 0; int HeaderCount = 0; // Print output header. fprintf (OutputFile, "\n"); fprintf (OutputFile, "\n\n"); fprintf (OutputFile, "\n"); fseek (Dataset->InputFile, 0, SEEK_SET); fgets (s, sizeof (s) - 1, Dataset->InputFile); fgets (s + strlen (s), sizeof (s) - strlen (s) - 1, Dataset->InputFile); NormalizeTitle (s, sizeof (s)); fprintf (OutputFile, "%s\n", s); fprintf (OutputFile, "\n\n"); fprintf (OutputFile, "\n"); fprintf (OutputFile, "\n\n", Dataset->ShortLineSize, Dataset->ReallyShortLineSize); // Print marked-up text. This is basically a matter // of merging the markup file with the input file. fseek (Dataset->MarkupFile, 0, SEEK_SET); fseek (Dataset->InputFile, 0, SEEK_SET); if (Dataset->YesHeader) { ErrorCode = fread (&Mark, sizeof (Mark), 1, Dataset->MarkupFile); if (ErrorCode != 1) Mark.Type = MarkNoMoreMarks; Offset = 0; } else { do { ErrorCode = fread (&Mark, sizeof (Mark), 1, Dataset->MarkupFile); if (ErrorCode != 1) { Mark.Type = MarkNoMoreMarks; break; } } while (Mark.Offset < Dataset->TextStart); fseek (Dataset->InputFile, Dataset->TextStart, SEEK_SET); Offset = Dataset->TextStart; } for (Remove = AtEnd = c = 0; (lastc = c, c = fgetc (Dataset->InputFile)) != EOF; Offset++, CharsInLine++) { if (!isspace (c)) AfterBreak = 0; LastShot: while (Offset == Mark.Offset && Mark.Type != MarkNoMoreMarks) { switch (Mark.Type) { case MarkTolower: //if (Removed != Offset) putc (DiacriticalTolower (c), OutputFile); Remove = 1; break; case MarkToupper: //if (Removed != Offset) putc (DiacriticalToupper (c), OutputFile); Remove = 1; break; case MarkBeginSmartQuote: CharsInLine += 6; if (Dataset->ForceNumeric) fprintf (OutputFile, "“"); else fprintf (OutputFile, "“"); Remove = 1; break; case MarkEndSmartQuote: CharsInLine += 6; if (Dataset->ForceNumeric) fprintf (OutputFile, "”"); else fprintf (OutputFile, "”"); Remove = 1; break; case MarkLsquo: CharsInLine += 6; if (Dataset->ForceNumeric) fprintf (OutputFile, "‘"); else fprintf (OutputFile, "‘"); Remove = 1; break; case MarkRsquo: CharsInLine += 6; if (Dataset->ForceNumeric) fprintf (OutputFile, "’"); else fprintf (OutputFile, "’"); Remove = 1; break; case MarkRemoveChar: Remove = 1; Removed = Offset; break; case MarkInsertChar: //putc (Mark.Insert, OutputFile); OutputCharacter (Dataset->ForceNumeric, OutputFile, Mark.Insert); break; case MarkNbsp: CharsInLine += 5; if (Dataset->ForceNumeric) fprintf (OutputFile, " "); else fprintf (OutputFile, " "); break; case MarkBeginJustifiedParagraph: InParagraph = 1; CharsInLine = 0; if (Dataset->NoJustify) { CharsInLine += 2; fprintf (OutputFile, "

"); } else { CharsInLine += 18; fprintf (OutputFile, "

"); } break; case MarkBeginRaggedParagraph: InParagraph = 1; CharsInLine = 2; fprintf (OutputFile, "

"); if (InPrefatoryArea) fprintf (OutputFile, ""); break; case MarkBeginCenteredParagraph: InParagraph = 1; CharsInLine = 17; fprintf (OutputFile, "

"); break; case MarkEndParagraph: InParagraph = 0; if (InPrefatoryArea) fprintf (OutputFile, ""); fprintf (OutputFile, "

"); break; case MarkBeginItalics: CharsInLine += 2; fprintf (OutputFile, ""); break; case MarkEndItalics: CharsInLine += 3; fprintf (OutputFile, ""); break; case MarkBeginBold: CharsInLine += 2; fprintf (OutputFile, ""); break; case MarkEndBold: CharsInLine += 3; fprintf (OutputFile, ""); break; case MarkBeginUnderline: CharsInLine += 2; fprintf (OutputFile, ""); break; case MarkEndUnderline: CharsInLine += 3; fprintf (OutputFile, ""); break; case MarkBreak: CharsInLine = 0; AfterBreak = 1; fprintf (OutputFile, "
\n"); break; case MarkBlockquote: // I think, not used. InParagraph = 1; CharsInLine = 12; fprintf (OutputFile, "

"); break; case MarkEndBlockquote: // I think, not used. InParagraph = 0; fprintf (OutputFile, "

"); break; case MarkHeader1: InPrefatoryArea = 0; if (HeaderCount == 0) { if (Dataset->NoPrefatory) fprintf (OutputFile, "\n"); } else { if (Dataset->PageBreaks) fprintf (OutputFile, "\n"); } HeaderCount++; fprintf (OutputFile, "

"); break; case MarkEndHeader1: fprintf (OutputFile, "

"); break; case MarkBeginSubtitle: fprintf (OutputFile, "

"); break; case MarkEndSubtitle: fprintf (OutputFile, "

"); break; case MarkBeginTable: InPreformatted = 1; LastNewline = 0; fprintf (OutputFile, "

");
	      break;
	    case MarkEndTable:
	      InPreformatted = 0;
	      fprintf (OutputFile, "

"); break; case MarkInsertMdash: CharsInLine += 6; if (Dataset->ForceNumeric) fprintf (OutputFile, "—"); else fprintf (OutputFile, "—"); break; case MarkInsertNdash: CharsInLine += 6; if (Dataset->ForceNumeric) fprintf (OutputFile, "–"); else fprintf (OutputFile, "–"); break; case MarkSoftHyphen: CharsInLine += 4; if (Dataset->ForceNumeric) fprintf (OutputFile, ""); else fprintf (OutputFile, ""); break; case MarkJumpPastGutenbergHeader: JumpedPastHeader = 1; fprintf (OutputFile, "

Project Gutenberg Fine Print

\n

\n");
	      break;
	    case MarkEndOfGutenbergHeader:
	      if (JumpedPastHeader)
		fprintf (OutputFile, "\n

\n\n"); if (Dataset->LowestNonPrefatoryLine > 0) { if (Dataset->NoPrefatory) fprintf (OutputFile, "

\n"); else HeaderCount++; InPrefatoryArea = 1; fprintf (OutputFile, "

Prefatory Materials

\n\n"); fprintf (OutputFile, "

\n" "This is a modified etext created by GutenMark " "software.\n"); if (InputFilename != NULL) fprintf (OutputFile, " The original text was contained in a " "computer file named \"%s.\"\n", InputFilename); fprintf (OutputFile, " Any comments below about etext " "preparation refer to the\n" "original, and not to this " "modified version of the etext. \n" "No individuals named below bear " "responsibility for changes to the text.\n" "

\n\n"); } break; case MarkGutenbergEnder: if (Dataset->YesHeader) { } else AtEnd = 1; break; case MarkNoMoreMarks: break; } ErrorCode = fread (&Mark, sizeof (Mark), 1, Dataset->MarkupFile); if (ErrorCode != 1) Mark.Type = MarkNoMoreMarks; } if (AtEnd) break; if (Remove) Remove = 0; else { if (c != '\r' && !(AfterBreak && isspace (c))) { if (InParagraph) { if (isspace (c) && CharsInLine > HTML_LENGTH) { c = '\n'; CharsInLine = 0; } else if (c == '\n') c = ' '; } if (c == '\n') { if (LastNewline > 1 && !InPreformatted) continue; LastNewline++; } else LastNewline = 0; OutputCharacter (Dataset->ForceNumeric, OutputFile, c); } } } // What this does is to allow us to process any remaining markups that // are supposed to occur at the very end of the input (such as

). if (AtEnd == 0) { AtEnd = 1; goto LastShot; } // Last stage: Close the output HTML stream. fprintf (OutputFile, "\n\n"); fprintf (OutputFile, "\n"); return (0); }