///////////////////////////////////////////////////////////////////////////// /* Copyright 2001 Ronald S. Burkey This file is part of GutenMark. GutenMark is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. GutenMark is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GutenMark; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Filename: IsHeaderHeuristic.c Purpose: Determine (with the heuristic method) whether the current line is a heading. Hopefully replaced eventually by a neural net method. However, this code will be kept so that it can optionally be used by means of a command-line switc. Mods: 01/01/02 RSB Split off from IsHeuristic code in MarkBody.c. */ #include #include #include #include #include "AutoMark.h" //------------------------------------------------------------------------ // This function is used to recognize headings. Returns 0 if it thinks the // line is not a header. The array Status.BufferedLines[BUFFERED_LINES] should have // been pre-loaded. Status.BufferedLines[PRE_OR_POST_LINES] is the current line, // while the other array elements are earlier and later lines from the file. int IsHeaderHeuristic (LineRecord * BufferedLines, int *LastWasHeader1, int LineNum, AnalysisDataset * Dataset, char *Line) { LineRecord *LineInfo; int InHeader1; int BlankBefore; InHeader1 = 0; LineInfo = BufferedLines + PRE_OR_POST_LINES; BlankBefore = (LineInfo[-1].Empty || LineInfo[0].FormFeedPrior || LineInfo[-1].FormFeedAfter); if (BlankBefore && LineInfo[1].Empty && LineInfo[0].AllCaps && LineInfo[0].OnlyRoman) InHeader1 = 1; if (BlankBefore && LineInfo[1].Empty && LineInfo[0].AllCaps && LineInfo[0].BeginsChapter) InHeader1 = 1; else if (*LastWasHeader1) *LastWasHeader1 = 0; else if (LineNum == 0 && LineInfo[0].Scanned) InHeader1 = 0; else if (BlankBefore && LineInfo[1].Empty && LineInfo[0].Contents) InHeader1 = 1; else if (BlankBefore && LineInfo[1].Empty && LineInfo[0].AllCaps && !LineInfo[0].EndPeriod) InHeader1 = 1; else if (BlankBefore && LineInfo[-2].Empty && LineInfo[1].Empty) { if (LineInfo[-3].Empty && !LineInfo[0].EndPeriod) InHeader1 = 1; if (LineInfo[2].Empty && !LineInfo[0].EndPeriod) InHeader1 = 1; if (LineInfo[0].AllCaps) InHeader1 = 1; if (LineInfo[0].BeginsBook || LineInfo[0].BeginsChapter) InHeader1 = 1; if (LineInfo[0].CapFirstChar && LineInfo[0].BeginsRoman && !(LineInfo[0].SpecialRoman && LineInfo[0].WhiteAfterRoman)) InHeader1 = 1; if (!LineInfo[0].EndPeriod) InHeader1 = 1; } else if (BlankBefore && LineInfo[-2].Empty && LineInfo[2].Empty) { if (MatchesPrefatoryLine (Dataset, Line)) InHeader1 = 1; if (LineInfo[0].Short && !LineInfo[1].EndPeriod) InHeader1 = 1; if (LineInfo[-3].Empty && !LineInfo[1].EndPeriod) InHeader1 = 1; if (LineInfo[3].Empty && !LineInfo[1].EndPeriod) InHeader1 = 1; if (LineInfo[0].AllCaps && LineInfo[1].AllCaps) InHeader1 = 1; if (LineInfo[0].BeginsBook || LineInfo[0].BeginsChapter) InHeader1 = 1; if (LineInfo[0].CapFirstChar && LineInfo[0].BeginsRoman && !(LineInfo[0].SpecialRoman && LineInfo[0].WhiteAfterRoman)) InHeader1 = 1; if (!LineInfo[1].EndPeriod) InHeader1 = 1; } if (!LineInfo[0].VerseCap) InHeader1 = 0; if (LineNum >= Dataset->NumLines - PRE_OR_POST_LINES) InHeader1 = 0; return (InHeader1); }