/*############################################################################## FUNNNELWEB COPYRIGHT ==================== FunnelWeb is a literate-programming macro preprocessor. The FunnelWeb web is at http://www.ross.net/funnelweb/ Copyright (c) Ross N. Williams 1992. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of Version 2 of the GNU General Public License as published by the Free Software Foundation (http://www.gnu.org/). This program is distributed WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See Version 2 of the GNU General Public License for more details. You should have received a copy of Version 2 of the GNU General Public License along with this program. If not, you can obtain a copy as follows: ftp://prep.ai.mit.edu/pub/gnu/COPYING-2.0 or write to: Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA Section 2a of the license requires that all changes to this file be recorded prominently in this file. Please record all changes here. Programmers: RNW Ross N. Williams (ross@ross.net) Changes: 07-May-1992 RNW Program prepared for release under GNU GPL V2. 08-May-1999 RNW scan_file: Added include file name in two error messages. 09-May-1999 RNW Added scanning of @L token (TK_LIBR). ##############################################################################*/ /******************************************************************************/ /* SCANNER.C */ /******************************************************************************/ /* */ /* Introduction */ /* ------------ */ /* The FunnelWeb scanner is a little messy because it deals with two */ /* structures at the same time while attempting to be efficient. On the one */ /* hand it is busy constructing the line list. This means that it has to keep */ /* an eye out for end of line characters ('\n'=EOL) so that it can add a line */ /* record whenever it sees one. On the other hand, it has to scan the input */ /* file into a token stream consisting of text tokens and special tokens */ /* which usually have no regard for end of lines. It is tempting to divide */ /* these two functions up (into perhaps a LINER and a TOKENIZER). However, */ /* the presence of the include file facility would make this messy. Also, the */ /* tokenizer has to count end of line markers so that it can generate */ /* correctly positioned diagnostics. */ /* */ /* The long and short of it all is that the best way to do the scanning seems */ /* to be to run a liner and a tokenizer as parallel layers. The liner */ /* extracts characters from the input file and hands them to the tokenizer. */ /* It also keeps an eye out for newline characters, sending a line record off */ /* whenever it sees one, and counting lines. The tokenizer receives the */ /* characters from the liner and performs the tokenize operation. */ /* */ /* Notes */ /* ----- */ /* - Currently FunnelWeb recognises only two characters as whitespace. */ /* These are ' ' and EOL. */ /* */ /******************************************************************************/ #include #include #include "style.h" #include "as.h" #include "clock.h" #include "data.h" #include "dump.h" #include "list.h" #include "lister.h" #include "machin.h" #include "mapper.h" #include "memory.h" #include "misc.h" #include "option.h" #include "scanner.h" /******************************************************************************/ /* The "special" character is the character that is used to introduce a */ /* "special sequence". FunnelWeb allows the user to change this character so */ /* as to cater for documents where the "default" character is common. This */ /* definition defines what the default character is. */ #define CH_DSPE ('@') /* FunnelWeb allows include files which are handled by the scanner by placing */ /* recursive calls to scan_file. A maximum is placed on the level of nested */ /* includes. This acts as a good sanity check as well as catching recursive */ /* include files which are never a sensible construct in FunnelWeb as */ /* FunnelWeb does not provide any conditional construct. */ #define MAX_INCL (10) /* FunnelWeb is very conservative about what characters it will allow in its */ /* input and output files. Currently the only characters allowed are */ /* printables and end of lines. When FunnelWeb does spot an illegal character */ /* it needs to be able to draw the user's attention to the character. The */ /* best way to do this is to point to it in the listing file. However, if the */ /* character is banned, it cannot appear in the listing file! The problem is */ /* solved by having the scanner replace all illegal characters in each mapped */ /* file by the following character. This eliminates further problems. */ #define CENSORCH ('?') /* Following the Unix convention, mapped in files are not terminated with an */ /* end-of-file character. However, the presence of such a character at the */ /* end of the mapped file simplifies scanning and so we add one. This */ /* definition defines what the character is to be. It doesn't matter what the */ /* character is, so long as it cannot legally appear in the file. A control */ /* character is a good choice as these are filtered out by the liner (see */ /* above). */ /* We undef EOF (from ) because it is too dangerously close to EOFCH.*/ /* (EOF wasn't redefined as that might confuse readers used to . */ /* However, we still use EOF as an acronym for End Of File. */ #define EOFCH (26) #undef EOF /* Tokens have a field for a general attribute which has meaning for some */ /* token kinds. For other kinds, it has no meaning. This constant is used to */ /* indicate a "don't care" value. */ #define DONTCARE 0 /* A nominal maximum value for the maximum length of an input line. */ #define INMAXINF (ULONG_MAX) /******************************************************************************/ /* The following type is used in the suite of pragma routines for parsing. */ typedef struct { ps_t pt_ps; /* Position of the start of this argument. */ char *pt_pstr; /* Pointer to a string containing the argument. */ char *pt_pinl; /* Pointer to first byte of the argument in commndline. */ } pt_t; typedef pt_t *p_pt_t; /******************************************************************************/ /* Variables Instantiated Over The Entire Scan */ /* ------------------------------------------- */ LOCVAR p_ck_t p_mapp; /* Pointer to mapper's clock. */ LOCVAR p_ck_t p_scan; /* Pointer to scanner's clock. */ LOCVAR ulong globalno; /* Global line number of line being scanned. */ LOCVAR ulong inclevel; /* Include level of current file. Top file is zero. */ LOCVAR bool seenind; /* TRUE iff we have seen an indentation pragma. */ LOCVAR ps_t ps_ind; /* seenind==TRUE => ps_ind is position of pragma. */ LOCVAR bool seentyp; /* TRUE iff we have seen a typesetter pragma. */ LOCVAR ps_t ps_typ; /* seentyp==TRUE => ps_typ is position of pragma. */ LOCVAR bool seenlimo; /* TRUE iff we have seen an out lin len limit pragma. */ LOCVAR ps_t ps_limo; /* seenlimo==TRUE => ps_limo is position of pragma. */ /* Variables Instantiated Over The Current File */ /* -------------------------------------------- */ LOCVAR ulong inln_max; /* Maximum permitted length of an input line. */ LOCVAR char specialch; /* Current special (escape) character. */ LOCVAR char *p_eof; /* Pointer to EOFCH byte at the end of current file. */ LOCVAR ulong localno; /* Local line number of line being scanned. */ /* Variables Instantiated Over The Current Line */ /* -------------------------------------------- */ LOCVAR char *p_sol; /* Pointer to Start (first char) Of current Line. */ LOCVAR char *p_ch; /* Pointer to current character in current line. */ LOCVAR char ch; /* *p_ch. */ /******************************************************************************/ /* Line Processing Layer */ /******************************************************************************/ /* */ /* This mini-section contains the two routines (prepline and NEXTCH) that */ /* take care of the line based-scanning and feed characters to the */ /* token-based scanner routines which have the top level of control. */ /* After mapping in a file to be read, place a call to prepline passing the */ /* address of the first byte of the mapped file as an argument. At that */ /* point the current position will be the first byte on the first line and */ /* the "variables instantiated over the current line" will be well defined. */ /* Calls to NEXTCH then move the position through the mapped file one byte at */ /* a time, stopping at the end of file at which point calls will not move the */ /* marker which will point to the EOF character. */ /* */ /******************************************************************************/ LOCAL void prepline P_((char *)); LOCAL void prepline(p_line) /* This function should be called at the end of each line to prepare the next */ /* line for scanning. The user of the liner mini-package should place a */ /* single call to this function at the start of scanning a mapped file. */ /* The user should then place calls to NEXTCH (which calls prepline when */ /* necessary). */ /* This function serves two purposes: */ /* 1. It looks at the next line and converts all non-printables into */ /* CENSORCH and issues errors for each non-printable. */ /* 2. It initializes the line scanning variables for the next line. */ /* The argument is a pointer to the first byte of the next line. */ char *p_line; { char *p; /* Scans through the line and winds up sitting on the EOL. */ /* Test to see if the "line" we have been given is the end of file marker. */ /* We have to be careful here because the byte we are using to mark the end */ /* of file could appear as an illegal unprintable. This is the reason for */ /* the test p_line==p_eof. */ if (*p_line==EOFCH && p_line==p_eof) { /* The line we have to process is in fact the end of file marker. */ p_sol = p_line; p_ch = p_line; ch = EOFCH; return; } /* At this point we know that we are faced with a run of bytes terminated by */ /* an EOL character (we know this cos we put an EOL before EOF earlier on). */ /* We know that we have a line, so we can now bump up the line counters. */ globalno++; localno++; /* Run through the line checking for non-printables and issuing errors. */ p = p_line; while (*p != EOL) { /* The following test tests to see if the character is a printable in */ /* seven bit ascii. FunnelWeb is not currently designed to work with */ /* any character set other than seven-bit ascii and so we flag and */ /* convert all out-of-range characters here before they are exposed to */ /* the rest of the scanner code which assumes that each line that it is */ /* handed consists entirely of printables except for the EOL char on the */ /* end and possibly an EOF char at the "Start" of a line. */ /* In particular, the NEXTCH macro will fail on machines with siged */ /* chars if non-printables are not removed. It goes into an infinite */ /* loop. */ /* Note: I don't use library function "isprint" here because on the vax */ /* it's definition is too loose (seems to accept characters with the top */ /* bit set as printable). */ if (!isascprn(*p)) /* If not a printable character. */ { ps_t ps; char c = *p; ubyte_ uc = *((ubyte_ *) p); ps.ps_line = globalno; ps.ps_column = p-p_line+1; if (strlen(chabbrev(c))==0) sprintf(linet1, "Non printable character (Sym=, Dec=%03u, Hex=%02X, Oct=%03o).", (unsigned) uc,(unsigned) uc,(unsigned) uc); else sprintf(linet1, "Non printable character (Sym=%s, Dec=%03u, Hex=%02X, Oct=%03o).", chabbrev(c),(unsigned) uc,(unsigned) uc,(unsigned) uc); lr_err(&ps,linet1); *p=CENSORCH; } p++; } /* Assert: p_line points to the start of the current line. */ /* Assert: p points to the EOL at the end of the current line. */ /* Check that the line is not too long. */ if ((p-p_line)>inln_max) { ps_t ps; ps.ps_line = globalno; ps.ps_column = inln_max+1; lr_err(&ps,"Input line is too long (this character is the first offender)."); sprintf(linet1,"Currently, the maximum allowable input line length is %lu.", (unsigned long) inln_max); lr_mes(&ps,linet1); lr_mes(&ps,"Note: You can change this using a pragma directive (@p)."); } /* Now check for trailing spaces. */ if ((p != p_line) && (*(p-1) == ' ')) { ps_t ps; ps.ps_line = globalno; ps.ps_column = p-p_line; lr_war(&ps,"Line has trailing spaces up to and including this space."); } /* Construct a line record and append the record to the line list. */ /* Note that the line scrap encompasses the trailing EOL. */ { ln_t line; line.ln_global = globalno; line.ln_local = localno; line.ln_body.sc_first = p_line; line.ln_body.sc_last = p; /* Note: We do not set sc_white as it is not used in lines. */ ls_add(line_list,PV &line); } /* Finally, set the line scanning variables to the start of the line. */ /* We can't do this earlier in case the start of the line was a */ /* non-printable and got substituted (ch might pick it up). */ p_sol = p_line; p_ch = p_line; ch = *p_line; } /* End of prepline. */ /* NEXTCH can be called continuously after an initializing call to prepline. */ /* After a call to NEXTCH, p_sol, p_ch, ch are all well-defined. p_sol points */ /* to the start of the current line, p_ch points to the current character, */ /* and ch contains *p_ch. NEXTCH can be called repeatedly forever. When it */ /* hits the EOF character, it sticks on it and returns it forever. */ /* Note: The "ch<' '" is an optimized form of "(ch==EOL)||(ch=EOFCH)". Speed */ /* is very important here as this macro is called in scanning tightloops. */ /* This line of code is a little tricky so read it carefully. */ /* WARNING: The ch<' ' will cause an infinite loop if a character appears */ /* that satisfies this condition without being EOF or EOL (e.g. a control */ /* char (meant to be filtered out earlier) or a top-bit-set character on */ /* machines with signed character type. */ #define NEXTCH {if (ch<' ') {if (ch==EOL) prepline(p_ch+1);} else ch= *++p_ch;} /******************************************************************************/ /* Scanner Support Routines */ /******************************************************************************/ LOCAL ps_t *psofch P_((void)); LOCAL ps_t *psofch() /* Returns a pointer to an internal static ps structure holding the line and */ /* column number of the current character ch. */ { STAVAR ps_t chps; chps.ps_line = globalno; chps.ps_column = p_ch-p_sol+1; return &chps; } /******************************************************************************/ LOCAL void grabchps P_((p_ps_t)); LOCAL void grabchps(p_ps) /* Writes the position of the current ch into the argument position struct. */ p_ps_t p_ps; { p_ps->ps_line = globalno; p_ps->ps_column = p_ch-p_sol+1; } /******************************************************************************/ LOCAL void sendspec P_((p_ps_t,tk_k_t,ubyte)); LOCAL void sendspec(p_tkps,tk_kind,tk_gen) /* Appends a non-text token of kind tk_kind to the end of the token list. */ /* p_ps is a pointer to a position structure giving the position of the */ /* first character of the token. tk_gen is the general token attribute. */ p_ps_t p_tkps; tk_k_t tk_kind; ubyte tk_gen; { tk_t token; token.tk_kind = tk_kind; ASSIGN(token.tk_ps,*p_tkps); token.tk_sc.sc_first = NULL; token.tk_sc.sc_last = NULL; token.tk_sc.sc_white = TRUE; token.tk_gen = tk_gen; ls_add(token_list,PV &token); } /******************************************************************************/ LOCAL void sendtext P_((p_ps_t,char *,char *,bool)); LOCAL void sendtext(p_tkps,p_first,p_last,is_white) /* Appends a text token to the end of the token list. */ /* IN: p_ps is a pointer to a position structure giving the position of the */ /* first character of the token. */ /* IN: p_first and p_last point to the first and last byte of the text scrap. */ /* IN: is_white should be set to TRUE iff scrap is entirely whitespace. */ p_ps_t p_tkps; char *p_first; char *p_last; bool is_white; { tk_t token; /* Empty text scraps should never be generated. */ as_cold(p_first<=p_last,"sendtext: Text scrap bounds are bad."); /* If ch=EOL then we should be scanning more text, not shipping it! */ as_cold(ch!=EOL,"senttext: Shipping text while still more to scan."); /* Send the text token. */ token.tk_kind = TK_TEXT; ASSIGN(token.tk_ps,*p_tkps); token.tk_sc.sc_first = p_first; token.tk_sc.sc_last = p_last; token.tk_sc.sc_white = is_white; token.tk_gen = DONTCARE; ls_add(token_list,PV &token); } /******************************************************************************/ LOCAL void add_eof P_((void)); LOCAL void add_eof() /* This function adds terminators to the line and token list. */ /* 1. It adds a TK_EOF token to the end of the token list. */ /* 2. It adds a visible line to the end of the line list. */ /* This assists the parser by allowing it to point diagnostic messages to a */ /* visible EOF marker rather than pointing vaguely to the end of the last */ /* line of the input file which (by the way) may not even exist! */ { STAVAR char *eofstr = "\n"; ln_t line; ps_t ps; /* When the liner mini package encounters an end of file marker, it stops */ /* dead on the marker and returns EOFCH forever. scan_file() eventually gets */ /* the message and drops out. However, in all of this, the line numbers are */ /* not incremented to indicate that we have moved to an EOF line. This is */ /* intended, as we do not want EOFs to appear in the listing for include */ /* files; only at the end of the main input file. Thus, here we effectively */ /* perform the liner function of moving from the last line of the input file */ /* to the imaginary line containing the EOF marker. This is done by */ /* incrementing the line numbers. Note that the fact that these line number */ /* variables are incorrect from the point of detection of the final EOF to */ /* here doesn't matter as no tokens or diagnostics are ever added after an */ /* EOF is detected. */ globalno++; localno++; /* Add a line to represent the EOF marker. */ line.ln_global = globalno; line.ln_local = localno; line.ln_body.sc_first = eofstr; line.ln_body.sc_last = eofstr+strlen(eofstr)-1; /* Note: We do not set sc_white as it is not used in lines. */ ls_add(line_list,PV &line); /* Add a TK_EOF token to the end of the token list. */ ps.ps_line = globalno; ps.ps_column = 1; sendspec(&ps,TK_EOF,DONTCARE); } /******************************************************************************/ /* The Scanner Proper */ /******************************************************************************/ LOCAL void skiptoeol P_((void)); LOCAL void skiptoeol() { while (ch != EOL) NEXTCH; } /******************************************************************************/ /* The incl_fil function calls this, so we have to declare it in advance. */ LOCAL void scan_file P_((char *)); LOCAL void incl_fil P_((p_ps_t)); LOCAL void incl_fil(p_ps) /* Upon entry, the current character is the "i" of an "@i" sequence. Our task */ /* is first to see if the sequence occurred at the start of a line (the only */ /* point at which it is legal) and issue an error if it isn't. If it is legal,*/ /* we have to read in the specified file and scan that. The included file */ /* replaces exactly the line starting with the "@i" command and we return */ /* to the "calling" file with the current position being the EOL character of */ /* the include line. */ p_ps_t p_ps; { /* Complain if the include directive was not at the start of a line. */ if (p_ch-1 != p_sol) { lr_err(p_ps,"Include sequence must be at the beginning of a line."); lr_mes(p_ps,"Include ignored."); skiptoeol(); return; } /* The include command should be followed by a blank. Get the next char. */ NEXTCH; /* Complain if the next character is not a blank. */ if (ch != ' ') { ps_t ps; ASSIGN(ps,*p_ps); ps.ps_column+=2; lr_err(&ps,"Include sequence (@i) must be followed by a blank."); lr_mes(&ps,"Example include: @i macros.fwi"); lr_mes(&ps,"Include ignored."); skiptoeol(); return; } /* Complain if the include level is too high. */ if (inclevel == MAX_INCL) { lr_err(p_ps,"This include file is nested too deeply. It's probably recursive."); sprintf(linet1,"The maximum level of nested includes is %u.", (unsigned) MAX_INCL); lr_mes(p_ps,linet1); lr_mes(p_ps,"Include ignored."); skiptoeol(); return; } {/* This construct does the work of the include. */ /* Warning: The following variables MUST be declared automatic. */ char *p_filename; char *p_tempname; ulong length; char *p; ulong xinln_max; char xspecial; char *xp_eof; ulong xlocalno; char *xp_sol; char *xp_ch; char xch; /* We save stack space by sticking this filename in the heap. */ p_filename=mm_temp((size_t) PATHNAME_MAX+1+10); /* 10 is for paranoia. */ p_tempname=mm_temp((size_t) PATHNAME_MAX+1+10); /* 10 is for paranoia. */ /* The rest of the line is supposed to hold a filename. Copy it. */ NEXTCH; p=p_tempname; length=0; while (ch!=EOL) { if (++length > PATHNAME_MAX) { lr_err(p_ps, "This include command's file specification is too long."); if (option.op_b7_b) sprintf(linet1,"The maximum file name length is %s characters.", SUPPVAL); else sprintf(linet1,"The maximum file name length is %u characters.", (unsigned) PATHNAME_MAX); lr_mes(p_ps,linet1); lr_mes(p_ps,"Include ignored."); skiptoeol(); return; } *p++=ch; NEXTCH; } *p=EOS; /* Note: Current position is now on the EOL at the end of the @i line. */ /* Complain if the user has not specified a filename. */ if (strlen(p_tempname) ==0) { lr_err(psofch(),"Expecting the name of a file to include."); return; } /* Perform the necessary filename inheritance. */ strcpy(p_filename,""); fn_ins(p_filename,option.op_f_s); fn_ins(p_filename,".fwi"); fn_ins(p_filename,option.op_i_s); fn_ins(p_filename,p_tempname); /* Include the included file by calling scan_file recursively. */ /* Save and restore all variables in instantiation scope. */ xinln_max = inln_max; xspecial = specialch; xp_eof = p_eof; xlocalno = localno; xp_sol = p_sol; xp_ch = p_ch; xch = ch; inclevel++; scan_file(p_filename); inclevel--; ch = xch; p_ch = xp_ch; p_sol = xp_sol; localno = xlocalno; p_eof = xp_eof; specialch = xspecial; inln_max = xinln_max; } } /******************************************************************************/ LOCAL void do_ascii P_((p_ps_t)); LOCAL void do_ascii(p_psspec) /* Upon entry, the current character is the '^' of a @^ sequence. The task is */ /* to parse the following ascii code and generate a text token. */ p_ps_t p_psspec; { ubyte base; /* Base of the number we are scanning. */ ubyte digits; /* Number of digits expected. */ uword val; /* Value of target character. */ ubyte i; /* Looping variable. */ STAVAR char alphab[256]; /* Static alphabet array to which to point scraps. */ STAVAR bool init=FALSE; /* Tells if alphab has been initialized. */ /* Establish an array containing the ascii character set. Later on we can */ /* point the sc_first and sc_last pointers to particular characters. */ if (!init) {uword i; for (i=0;i<256;i++) alphab[i]=(char) i; init=TRUE;} /* Make sure that the base character is legal. */ NEXTCH; switch(toupper(ch)) { case 'B': base= 2; digits=8; break; case 'O': case 'Q': base= 8; digits=3; break; case 'D': base=10; digits=3; break; case 'H': case 'X': base=16; digits=2; break; default : lr_err(psofch(),"Expecting one of 'B', 'Q', 'D', 'H'."); lr_mes(psofch(),"(For Binary, Octal, Decimal, and Hexadecimal)."); base=10; goto trouble; } /* Parse opening parenthesis. */ NEXTCH; if (ch!='(') {lr_err(psofch(),"Expecting '('.");goto trouble;} val=0; for (i=0;i=base) {lr_err(psofch(),"Illegal digit."); goto trouble;} val = base*val + d; } /* Parse closing parenthesis. */ NEXTCH; if (ch!=')') {lr_err(psofch(),"Expecting ')'.");goto trouble;} /* Make sure that the number is not too big (this is possible in decimal). */ if (val>255) { lr_err(psofch(),"Character number is too large."); lr_mes(psofch(),"Character number must be in the range [0,255] (decimal)."); goto trouble; } /* Success! Now we can parcel it up into a scrap! */ sendtext(p_psspec,&alphab[val],&alphab[val],ch==' ' || ch==EOL); return; trouble: /* Jump here after a specific diagnostic to give the user a reminder of */ /* how to specify an ascii character constant. */ switch (base) { case 2: lr_mes(psofch(), "A binary character representation takes the form \"@^B(dddddddd)\"."); lr_mes(psofch(), "(exactly 8 digits) where each digit d is either 0 or 1."); break; case 8: lr_mes(psofch(), "An octal character representation takes the form \"@^Q(ddd)\" (or \"@^O(ddd)\")."); lr_mes(psofch(), "(exactly 3 digits) where each digit d is in the range 0..7."); break; case 10: lr_mes(psofch(), "A decimal character representation takes the form \"@^D(ddd)\"."); lr_mes(psofch(), "(exactly 3 digits) where each digit d is in the range 0..9."); break; case 16: lr_mes(psofch(), "A hexadecimal character representation takes the form \"@^X(dd)\" (or \"@^H(dd)\")."); lr_mes(psofch(), "(exactly 2 digits) where each digit d is in the range 0..9,A..F."); break; default: as_bomb("do_ascii: trouble base switch defaulted."); } } /******************************************************************************/ LOCAL void do_name P_((p_ps_t)); LOCAL void do_name (p_psspec) /* Upon entry, the current character is the # of a @#. The task is to parse */ /* it and transmit a name token. */ p_ps_t p_psspec; { as_cold(ch=='#',"do_name: character is wrong."); NEXTCH; if ((ch==EOL) || (ch==' ')) {lr_err(psofch(),"Expecting a printable character."); return;} /* Transmit a name token. */ sendspec(p_psspec,TK_NAME,(ubyte) ch); } /******************************************************************************/ LOCAL void do_pgind P_((uword,p_pt_t)); LOCAL void do_pgind(numarg,arg) /* Parse an indentation pragma. */ uword numarg; p_pt_t arg; { bool pragind; ps_t psprag; /* Make sure that there are exactly three arguments. */ if (numarg != 3) /* "indentation" "none|blank". */ { lr_err(&arg[0].pt_ps, "This indentation pragma has the wrong number of arguments."); goto help; } /* Make sure that the second argument is an "=". */ if (0 != strcmp(arg[2].pt_pstr,"=")) { lr_err(&arg[2].pt_ps,"Expecting \"=\"."); goto help; } /* Check the third argument. */ if (strcmp(arg[3].pt_pstr,"none" )==0) pragind=FALSE; else if (strcmp(arg[3].pt_pstr,"blank")==0) pragind=TRUE; else { lr_err(&arg[3].pt_ps,"Expecting either \"none\" or \"blank\"."); goto help; } /* Construct a shorthand for the start of the pragma. */ ASSIGN(psprag,arg[0].pt_ps); /* Make sure that the pragma does not contradict an earlier pragma. */ if (seenind && (tgindent!=pragind)) { sprintf(linet1,"This pragma is opposed by the pragma at line %lu.", (unsigned long) psprag.ps_line); lr_mes(&ps_ind,linet1); sprintf(linet1,"This pragma opposes the pragma at line %lu.", (unsigned long) ps_ind.ps_line); lr_err(&psprag,linet1); lr_mes(&psprag,"You can have as many indentation pragmas as you like,"); lr_mes(&psprag,"but they all have to be the same!"); lr_mes(&psprag,"Pragma ignored."); return; } /* Success: Record the pragma information. */ seenind = TRUE; /* Record that we have seen a pragma. */ tgindent = pragind; /* Record what the pragma said. */ ASSIGN(ps_ind,psprag); /* Record where the pragma was. */ return; help: lr_mes(&arg[0].pt_ps, "The correct format is: \"@p indentation = none|blank\"."); lr_mes(&arg[0].pt_ps,"Pragma ignored."); return; } /******************************************************************************/ LOCAL void do_pginl P_((uword,p_pt_t)); LOCAL void do_pginl(numarg,arg) /* Parse a maximum input line length pragma. */ uword numarg; p_pt_t arg; { char *numstr; uword spn; /* Make sure that there are exactly three arguments. */ if (numarg != 3) /* "max..length = ". */ { lr_err(&arg[0].pt_ps,"This pragma has the wrong number of arguments."); goto help; } /* Make sure that the second argument is "=". */ if (0 != strcmp(arg[2].pt_pstr,"=")) {lr_err(&arg[2].pt_ps,"Expecting \"=\"."); goto help;} /* Set up an abbreviation. */ numstr=arg[3].pt_pstr; /* See if the value is "infinity". */ if (strcmp(numstr,"infinity")==0) {inln_max=INMAXINF; return;} /* Calculate length of longest prefix containing all decimal digits. */ /* Check that there are no illegal digits. */ spn=strspn(numstr,"0123456789"); if (spn != strlen(numstr)) { ps_t ps; ASSIGN(ps,arg[3].pt_ps); ps.ps_column+=spn; lr_err(&ps,"Illegal digit. Value must consist entirely of decimal digits."); lr_mes(&ps,"You can also use the value \"infinity\"."); lr_mes(&ps,"Pragma ignored."); return; } /* Check that the number is not too long. */ if (strlen(numstr)>8) { lr_err(&arg[3].pt_ps,"Too many digits. The maximum is eight."); lr_mes(&arg[3].pt_ps,"Pragma ignored."); return; } /* Convert the argument into an integer. */ { ulong val; int result; /* Note: Should really be %lu, but the Vax doesn't know about the %u */ /* in sscanf and so we make do with %ld. */ result=sscanf(numstr,"%ld",&val); as_cold(result==1,"do_pginl:sscanf failed."); inln_max=val; } return; help: lr_mes(&arg[0].pt_ps, "The correct format is: \"@p maximum_input_line_length = |infinity\"."); lr_mes(&arg[0].pt_ps,"Pragma ignored."); return; } /******************************************************************************/ LOCAL void do_pgotl P_((uword,p_pt_t)); LOCAL void do_pgotl(numarg,arg) /* Parse a maximum product file line length pragma. */ uword numarg; p_pt_t arg; { char *numstr; uword spn; ulong val; ps_t psprag; /* Set up an abbreviation. */ ASSIGN(psprag,arg[0].pt_ps); /* Make sure that there are exactly three arguments. */ if (numarg != 3) /* "max..length" "=" "value". */ { lr_err(&arg[0].pt_ps,"This pragma has the wrong number of arguments."); goto help; } /* Make sure that the second argument is an "=". */ if (0 != strcmp(arg[2].pt_pstr,"=")) {lr_err(&arg[2].pt_ps,"Expecting \"=\"."); goto help;} /* Set up an abbreviation. */ numstr=arg[3].pt_pstr; /* See if the value is "infinity". */ if (strcmp(numstr,"infinity")==0) {val=TGMAXINF; goto gotvalue;} /* Calculate length of longest prefix containing all decimal digits. */ /* Check that there are no illegal digits. */ spn=strspn(numstr,"0123456789"); if (spn != strlen(numstr)) { ps_t ps; ASSIGN(ps,arg[3].pt_ps); ps.ps_column+=spn; lr_err(&ps,"Illegal digit. Value must consist entirely of decimal digits."); lr_mes(&ps,"You can also use the value \"infinity\"."); lr_mes(&ps,"Pragma ignored."); return; } /* Check that the number is not too long. */ if (strlen(numstr)>8) { lr_err(&arg[3].pt_ps,"Too many digits. The maximum is eight."); lr_mes(&arg[3].pt_ps,"Pragma ignored."); return; } /* Convert the argument into an integer. */ { int result=sscanf(numstr,"%ld",&val); as_cold(result==1,"do_pgotl:sscanf failed."); } gotvalue: /* Make sure that the pragma does not contradict an earlier pragma. */ if (seenlimo && (tglinmax!=val)) { sprintf(linet1,"This pragma is opposed by the pragma at line %lu.", (unsigned long) psprag.ps_line); lr_mes(&ps_limo,linet1); sprintf(linet1,"This pragma opposes the pragma at line %lu.", (unsigned long) ps_limo.ps_line); lr_err(&psprag,linet1); lr_mes(&psprag,"You can have as many output line length pragmas"); lr_mes(&psprag,"as you like, but they all have to be the same!"); lr_mes(&psprag,"Pragma ignored."); return; } /* If we got this far then the pragma is just the same as an earlier one. */ /* We don't want to set the pragma position to the later pragma so we */ /* return now. */ if (seenlimo) return; /* Success. Set the variables. */ tglinmax=val; seenlimo=TRUE; ASSIGN(ps_limo,psprag); return; help: lr_mes(&arg[0].pt_ps, "The correct format is: \"@p maximum_output_line_length = |infinity\"."); lr_mes(&arg[0].pt_ps,"Pragma ignored."); return; } /******************************************************************************/ LOCAL void do_pgnpg P_((uword,p_pt_t)); LOCAL void do_pgnpg(numarg,arg) /* Parse a newpage typesetter directive. */ uword numarg; p_pt_t arg; { /* Make sure that there is exactly one argument. */ if (numarg > 1) /* "new_page" */ { lr_err(&arg[2].pt_ps,"The new_page directive does not take arguments."); lr_mes(&arg[2].pt_ps,"Directive ignored."); return; } sendspec(&arg[0].pt_ps,TK_NPAG,DONTCARE); } /******************************************************************************/ LOCAL void do_pgtoc P_((uword,p_pt_t)); LOCAL void do_pgtoc(numarg,arg) /* Parse a table of contents typesetter directive. */ uword numarg; /* Number of arguments to table of contents directive. */ p_pt_t arg; /* Array describing arguments. */ { /* Make sure that there is exactly one argument. */ if (numarg > 1) /* "table_of_contents" */ { lr_err(&arg[2].pt_ps, "The table_of_contents directive does not take arguments."); lr_mes(&arg[2].pt_ps,"Directive ignored."); return; } sendspec(&arg[0].pt_ps,TK_TOCS,DONTCARE); } /******************************************************************************/ LOCAL void do_pgvsk P_((uword,p_pt_t)); LOCAL void do_pgvsk(numarg,arg) /* Parse a vskip typesetter directive. */ uword numarg; /* Number of arguments to indentation directive. */ p_pt_t arg; /* Array describing arguments. */ { char *numstr; uword spn; /* Make sure that there are exactly three arguments. */ if (numarg != 3) /* "vskip" n "mm". */ { lr_err(&arg[0].pt_ps,"This directive has the wrong number of arguments."); goto help; } /* Make sure that the third argument is "mm". */ if (0 != strcmp(arg[3].pt_pstr,"mm")) {lr_err(&arg[3].pt_ps,"Expecting \"mm\"."); goto help;} /* Set up an abbreviation. */ numstr=arg[2].pt_pstr; /* Calculate length of longest prefix containing all decimal digits. */ /* Check that there are no illegal digits. */ spn=strspn(numstr,"0123456789"); if (spn != strlen(numstr)) { ps_t ps; ASSIGN(ps,arg[2].pt_ps); ps.ps_column+=spn; lr_err(&ps,"Illegal digit."); lr_mes(&ps,"Value must consist entirely of decimal digits."); lr_mes(&ps,"Directive ignored."); return; } /* Check that the number is not too long. */ if (strlen(numstr)>3) { lr_err(&arg[2].pt_ps,"Too many digits. The maximum is three."); lr_mes(&arg[2].pt_ps,"Directive ignored."); return; } /* Convert the argument into an integer. */ { ulong val; int result; result=sscanf(numstr,"%ld",&val); as_cold(result==1,"do_pginl:sscanf failed."); if (val>255) { lr_err(&arg[2].pt_ps,"Value too large. Maximum is 255."); lr_mes(&arg[2].pt_ps,"Directive ignored."); return; } sendspec(&arg[0].pt_ps,TK_SKIP,(ubyte) val); } return; help: lr_mes(&arg[0].pt_ps,"The correct format is: \"@t vskip mm\"."); lr_mes(&arg[0].pt_ps,"Directive ignored."); return; } /******************************************************************************/ LOCAL void do_pgtit P_((uword,p_pt_t)); LOCAL void do_pgtit(numarg,arg) /* Parse a title typesetter directive. */ uword numarg; /* Number of arguments to title directive. */ p_pt_t arg; /* Array describing arguments. */ { uword align; uword font; char *p_sot,*p_eot; /* Make sure that there are at least three arguments. */ if (numarg < 4) /* "title ". */ {lr_err(&arg[0].pt_ps,"This directive has too few arguments."); goto help;} /* Check the font argument. */ if (strcmp(arg[2].pt_pstr,"normalfont" )==0) font=FT_NORM; else if (strcmp(arg[2].pt_pstr,"titlefont" )==0) font=FT_TITL; else if (strcmp(arg[2].pt_pstr,"smalltitlefont")==0) font=FT_STIT; else { lr_err(&arg[2].pt_ps, "Expecting one of {titlefont,smalltitlefont,normalfont}."); lr_mes(&arg[2].pt_ps,"Directive ignored."); return; } /* Check the alignment argument. */ if (strcmp(arg[3].pt_pstr,"left" )==0) align=LR_LEFT; else if (strcmp(arg[3].pt_pstr,"right" )==0) align=LR_RIGH; else if (strcmp(arg[3].pt_pstr,"centre")==0) align=LR_CENT; else { lr_err(&arg[3].pt_ps,"Expecting one of {left,right,centre}."); if (strcmp(arg[3].pt_pstr,"center")==0) { lr_mes(&arg[3].pt_ps,"Note: Centre is spelt centRE, not centER."); lr_mes(&arg[3].pt_ps," This is my revenge for years of getting error messages"); lr_mes(&arg[3].pt_ps," from TeX whenever I accidentally wrote \\centreline - Ross Williams."); } lr_mes(&arg[3].pt_ps,"Directive ignored."); return; } /* Now make sure that the remainder of the line is delimited by quotes. */ p_sot=arg[4].pt_pinl; p_eot=p_sot+strlen(p_sot)-1; if (*p_sot!='"' || *p_eot!='"' || p_sot==p_eot) { lr_err(&arg[4].pt_ps,"Text argument must be delimited by double quotes."); lr_mes(&arg[4].pt_ps,"Directive ignored."); return; } p_sot++; p_eot--; /* Ship out a token whose fields are all fully laden. */ { tk_t token; token.tk_kind = TK_TITL; ASSIGN(token.tk_ps,arg[0].pt_ps); token.tk_sc.sc_first = p_sol+ (3+(p_sot-arg[1].pt_pinl)); token.tk_sc.sc_last = p_sol+ (3+(p_eot-arg[1].pt_pinl)); token.tk_sc.sc_white = FALSE; token.tk_gen = LRFT_PACK*font+align; ls_add(token_list,PV &token); } return; help: lr_mes(&arg[0].pt_ps, "The correct format is: \"@t title \"."); lr_mes(&arg[0].pt_ps, " where = titlefont | smalltitlefont | normalfont."); lr_mes(&arg[0].pt_ps, " and = left | centre | right."); lr_mes(&arg[0].pt_ps, " and = text delimited by double quotes."); lr_mes(&arg[0].pt_ps,"Directive ignored."); return; } /******************************************************************************/ LOCAL void do_pgtyp P_((uword,p_pt_t)); LOCAL void do_pgtyp(numarg,arg) /* Parse a typesetter pragma. */ uword numarg; p_pt_t arg; { tr_k_t pragtyp; ps_t psprag; /* Make sure that there are exactly three arguments. */ if (numarg != 3) /* "typesetter" "=" "name". */ { lr_err(&arg[0].pt_ps, "This typesetter pragma has the wrong number of arguments."); goto help; } /* Make sure that the second argument is "=". */ if (0 != strcmp(arg[2].pt_pstr,"=")) { lr_err(&arg[2].pt_ps,"Expecting \"=\"."); goto help; } /* Check the third argument. */ if (strcmp(arg[3].pt_pstr,"none")==0) pragtyp=TR_NONE; else if (strcmp(arg[3].pt_pstr,"tex" )==0) pragtyp=TR_TEX; else if (strcmp(arg[3].pt_pstr,"html")==0) pragtyp=TR_HTML; else { lr_err(&arg[3].pt_ps,"Expecting one of [none,tex,html]."); goto help; } /* Construct a shorthand for the start of the pragma. */ ASSIGN(psprag,arg[0].pt_ps); /* Make sure that the pragma does not contradict an earlier pragma. */ if (seentyp && (tr_codes != pragtyp)) { sprintf(linet1,"This pragma is opposed by the pragma at line %lu.", (unsigned long) psprag.ps_line); lr_mes(&ps_typ,linet1); sprintf(linet1,"This pragma opposes the pragma at line %lu.", (unsigned long) ps_typ.ps_line); lr_err(&psprag,linet1); lr_mes(&psprag,"You can have as many typesetter pragmas as you like,"); lr_mes(&psprag,"but they all have to be the same!"); lr_mes(&psprag,"Pragma ignored."); return; } /* Success: Record the pragma information. */ seentyp = TRUE; /* Record that we have seen a pragma. */ tr_codes = pragtyp; /* Record what the pragma said. */ ASSIGN(ps_typ,psprag); /* Record where the pragma was. */ return; help: lr_mes(&arg[0].pt_ps, "The correct format is: \"@p typesetter = none|tex|html\"."); lr_mes(&arg[0].pt_ps,"Pragma ignored."); return; } /******************************************************************************/ LOCAL void do_pragma P_((p_ps_t,bool)); LOCAL void do_pragma(p_ps,is_typ) /* Upon entry, the current character is: */ /* is_typ=FALSE => The P of a @p. */ /* is_typ=TRUE => The T of a @t. */ /* This function processes these contructs. */ p_ps_t p_ps; bool is_typ; { #define MAXPARG 10 /* Maximum recorded arguments to a pragma. */ #define PRAGMA_MAX 100 /* Maximum length of a pragma. */ char praglin[PRAGMA_MAX+1]; /* Array to hold pragma as a complete line. */ char pragstr[PRAGMA_MAX+1]; /* Array to hold pragma as strings. */ pt_t pragarg[MAXPARG+1]; /* Array of pragma arguments. */ uword length; /* Helps prevent scanning overrun. */ char *p,*q; /* Temporary. */ uword numarg,na; /* Number of arguments seen so far. */ /* Complain if the pragma directive is not at the start of a line. */ if (p_ch-1 != p_sol) { if (is_typ) { lr_err(p_ps,"Typesetter directive @t must be at the start of a line."); lr_mes(p_ps,"The rest of this line will be ignored."); } else { lr_err(p_ps,"Pragma sequence @p must be at the start of a line."); lr_mes(p_ps,"The rest of this line will be ignored."); } skiptoeol(); goto help; } /* The include command should be followed by a blank. Get the next char. */ NEXTCH; /* Complain if the next character is not a blank. */ if (ch != ' ') { /* Note: If we position this error correctly, it gets put after the */ /* help message! */ if (is_typ) lr_err(p_ps,"Typesetter directive @t must be followed by a blank."); else lr_err(p_ps,"Pragma sequence @p must be followed by a blank."); skiptoeol(); goto help; } /* Copy the rest of the line to the pragma arrays. */ NEXTCH; p = &praglin[0]; q = &pragstr[0]; length=0; while (ch!=EOL) { if (++length > PRAGMA_MAX-3) /* 3 is for "@p " or "@t " */ { if (is_typ) { lr_err(p_ps,"This typestter directive line is too long."); sprintf(linet1,"The maximum typesetter directive line length is %u characters.", (unsigned) PRAGMA_MAX); lr_mes(p_ps,linet1); } else { lr_err(p_ps,"This pragma line is too long."); sprintf(linet1,"The maximum pragma line length is %u characters.", (unsigned) PRAGMA_MAX); lr_mes(p_ps,linet1); } skiptoeol(); goto help; } *p++=ch; *q++=ch; NEXTCH; } *p=EOS; *q=EOS; /* Note: Current position is now on the EOL at the end of the @p line. */ /* That is the way we want to leave it for the scanspec() routine. */ /* So far we have copied the body of the pragma line into two arrays. The */ /* next lump of code parses that line into a sequence of non-blank arguments.*/ /* The result is an array of pt_t objects each of which contains the */ /* position of each argument, a pointer to the first character of each */ /* argument in praglin, and also a pointer to a string containing the arg. */ /* The string resides in the array pragstr which is the same as praglin */ /* except that some blanks have been replaced by EOSs so as to allow us to */ /* point into it to form strings. All this probably seems rather overdone */ /* for the analysis of a "simple" pragma, but I have found that pulling the */ /* different kinds of pragma lines apart separately is very messy. Far */ /* better to suffer here in what is at least reasonably neat code than */ /* later in the specific pragma routines. */ numarg=0; p= &praglin[0]; q= &pragstr[0]; while (TRUE) { /* Skip whitespace between arguments. */ while (*p==' ') {p++;q++;} /* Exit if we have hit the end of the line. */ if ((numarg==MAXPARG) || (*p==EOS)) break; /* We have found another argument! */ numarg++; /* Record the argument. */ ASSIGN(pragarg[numarg].pt_ps,*p_ps); pragarg[numarg].pt_ps.ps_column=4+(p-praglin); pragarg[numarg].pt_pinl=p; pragarg[numarg].pt_pstr=q; /* Skip to the end of the argument. */ while (*p!=' ' && *p!=EOS) {p++;q++;} /* Drop a null in the string array to complete string rep of argument. */ *q=EOS; } /* At this point numarg is MIN(arguments,MAXPARG), and pragargs contains an */ /* entry for each of the numarg arguments. */ /* It is handy to have the position of the pragma itself handy. */ ASSIGN(pragarg[0].pt_ps,*p_ps); /* CHECK: Make sure that the line and string arrays square up. */ { uword i; for (i=1;i<=numarg;i++) { uword j; uword t=strlen(pragarg[i].pt_pstr); for (j=0;j "); lr_mes(p_ps," @t vskip mm"); lr_mes(p_ps,"The blanks between arguments are important."); lr_mes(p_ps,"Typesetter directive ignored."); } else { lr_err(p_ps,"Unrecognised pragma. Possible legal pragmas are:"); lr_mes(p_ps," @p indentation = none | blank"); lr_mes(p_ps," @p maximum_input_line_length = |infinity"); lr_mes(p_ps," @p maximum_output_line_length = |infinity"); lr_mes(p_ps," @p typesetter = none | tex"); lr_mes(p_ps,"The blanks between arguments are important."); lr_mes(p_ps,"Pragma ignored."); } } /******************************************************************************/ LOCAL void chksol P_((void)); LOCAL void chksol() /* This function is called when the current character is the character after */ /* an @. The function checks to see if the @ was at the start of a line and */ /* issues a error message if it isn't. */ { ps_t ps; grabchps(&ps); if (ps.ps_column != 2) { ps.ps_column--; sprintf(linet1,"@%c is legal only at the start of a line.",ch); lr_err(&ps,linet1); } } /******************************************************************************/ LOCAL void scanspec P_((void)); LOCAL void scanspec() /* Upon entry the current character is the special character (usually '@'). */ /* The task is to scan the special sequence. Upon exit, the current character */ /* is the character following the special sequence. */ { ps_t ps_spec; /* Position of start of special sequence. */ /* Make a note of where the special sequence starts. */ grabchps(&ps_spec); /* Move onto the character following the special (escape) character. */ NEXTCH; /* Now react to the character. In most cases, the special sequence is simply */ /* a marker in the input and we can simply transmit it. The nasty special */ /* case sequences are left until the end of the switch statement. */ /* Purists will complain about how all the case options are hardwired and */ /* say that symbols should have been used. They once were, but were taken */ /* out when it was discovered that the symbols had cryptic names (because of */ /* the portability eight-character rule) and were only used here anyway. */ switch (toupper(ch)) { case '<': sendspec(&ps_spec,TK_ONAM,DONTCARE); break; case '>': sendspec(&ps_spec,TK_CNAM,DONTCARE); break; case '{': sendspec(&ps_spec,TK_ODEF,DONTCARE); break; case '}': sendspec(&ps_spec,TK_CDEF,DONTCARE); break; case '(': sendspec(&ps_spec,TK_OPAR,DONTCARE); break; case ')': sendspec(&ps_spec,TK_CPAR,DONTCARE); break; case ',': sendspec(&ps_spec,TK_COMA,DONTCARE); break; case '"': sendspec(&ps_spec,TK_QUOT,DONTCARE); break; case '/': sendspec(&ps_spec,TK_EMPH,DONTCARE); break; case 'A': sendspec(&ps_spec,TK_NSEC,1); chksol(); break; case 'B': sendspec(&ps_spec,TK_NSEC,2); chksol(); break; case 'C': sendspec(&ps_spec,TK_NSEC,3); chksol(); break; case 'D': sendspec(&ps_spec,TK_NSEC,4); chksol(); break; case 'E': sendspec(&ps_spec,TK_NSEC,5); chksol(); break; case '1': sendspec(&ps_spec,TK_PARM,1); break; case '2': sendspec(&ps_spec,TK_PARM,2); break; case '3': sendspec(&ps_spec,TK_PARM,3); break; case '4': sendspec(&ps_spec,TK_PARM,4); break; case '5': sendspec(&ps_spec,TK_PARM,5); break; case '6': sendspec(&ps_spec,TK_PARM,6); break; case '7': sendspec(&ps_spec,TK_PARM,7); break; case '8': sendspec(&ps_spec,TK_PARM,8); break; case '9': sendspec(&ps_spec,TK_PARM,9); break; case 'M': sendspec(&ps_spec,TK_MANY,DONTCARE); break; case 'Z': sendspec(&ps_spec,TK_ZERO,DONTCARE); break; case 'L': sendspec(&ps_spec,TK_LIBR,DONTCARE); break; case 'O': sendspec(&ps_spec,TK_FDEF,DONTCARE); chksol(); break; case '$': sendspec(&ps_spec,TK_MDEF,DONTCARE); chksol(); break; case EOL: lr_err(&ps_spec," is not a legal special sequence."); break; case ' ': lr_err(&ps_spec," is not a legal special sequence."); break; case '@': /* @ instructs FunnelWeb to replace the special construct with the */ /* special character. Luckily one appears just before the @ !! */ /* Note: FALSE is OK because space is not a legal specialch. */ sendtext(&ps_spec,p_ch-1,p_ch-1,FALSE); break; case '-': /* - instructs FunnelWeb to suppress the following end of line. */ if (*(p_ch+1) == EOL) NEXTCH else lr_err(&ps_spec, "Suppress EOL sequence is legal only at the end of a line."); break; case '+': /* + instructs FunnelWeb to insert an EOL. We can't look to the end of */ /* the previous line to find an EOL as this might be the first line. */ /* Running ahead to the end of the line is expensive, and having the */ /* liner mini-package maintain a variable for it would be extra */ /* housekeeping. Instead of all this, we just point to a static. */ {STAVAR char stateol = EOL; sendtext(&ps_spec,&stateol,&stateol,TRUE);} break; case '=': /* = instructs FunnelWeb to change the special character to the */ /* character following the = sequence. */ NEXTCH; if (ch == ' ') { lr_err(&ps_spec,"You cannot set the special character to !"); lr_mes(&ps_spec,"Special sequence ignored."); } else if (ch == EOL) { lr_err(&ps_spec, "You cannot set the special character to !"); lr_mes(&ps_spec,"Special sequence ignored."); } else specialch=ch; break; case '!': /* ! instructs FunnelWeb to ignore the rest of the line (a comment). */ skiptoeol(); break; case 'I': /* i instructs FunnelWeb to include a file. */ incl_fil(&ps_spec); break; case '^': /* ^ instructs FunnelWeb to insert a specific ascii character. */ do_ascii(&ps_spec); break; case '#': /* # instructs FunnelWeb to transmit a two character name "#c". */ do_name(&ps_spec); break; case 'P': /* P is used as a miscellaneous PRAGMA. */ do_pragma(&ps_spec,FALSE); break; case 'T': /* T introduces a one-line typesetting directive. */ do_pragma(&ps_spec,TRUE); break; default: lr_err(&ps_spec,"Unknown special sequence."); break; } /* The switch statment absorbs the special sequence and its effects. */ /* This NEXTCH places us on the character following the special sequence. */ NEXTCH; } /******************************************************************************/ LOCAL void scantext P_((void)); LOCAL void scantext() /* Upon entry, we know that the current character is not EOF and that it is */ /* not the special character. Our task is to parse as much text as we can and */ /* ship it off as a text token. The scanner will probably spend most of its */ /* time in the loops in this function so it is important that they be */ /* efficient. That is why two loops are used to deal with detecting */ /* whitespace rather than a flag. */ /* Upon return, the current character is the character following the text */ /* sequence. This is guaranteed to be the special character or an EOF. */ { ps_t ps_start; /* Position of first character of text sequence. */ char *p_first = p_ch; /* Pointer to first character of text sequence. */ /* Grab a copy of the position of this token. */ grabchps(&ps_start); /* Scan whitespace. */ while (ch==' ' || ch==EOL) NEXTCH; /* If we hit something that ends a text token */ /* then we can transmit a white text token. */ if (ch==specialch || ch==EOFCH) {sendtext(&ps_start,p_first,p_ch-1,TRUE); return;} /* Otherwise we have some more (non-white) text to scan. */ /* We can then send a non-white text token. */ while (ch!=specialch && ch!=EOFCH) NEXTCH; sendtext(&ps_start,p_first,p_ch-1,FALSE); } /******************************************************************************/ LOCAL void scan_file(p_fname) /* This function scans a single file. It's argument is the name of the file */ /* to be scanned. scan_file calls the mapper to map in the file and then */ /* scans the text of the mapped file using the liner mini-package. The result */ /* of the scan is additions to the line and token list, and diagnostics sent */ /* to the lister package. If an include directive is encountered, this */ /* function is called recursively. */ char *p_fname; { char *p_mapped; /* Pointer to the mapped file. */ ulong length; /* Number of bytes in the mapped file. */ char *errstr; /* Error string returned by mapper. */ bool addedeol; /* Did we have to add an EOL to the end of the last line? */ /* Check to see if the file exists. */ if (!fexists(p_fname)) { if (inclevel==0) { /* Failure to find the main file is a severe error. */ if (option.op_b7_b) sprintf(linet1,"S: Error opening input file \"%s\".",SUPPNAME); else sprintf(linet1,"S: Error opening input file \"%s\".",p_fname); wl_l(linet1); /* Although strictly speaking we should suppress this error from the */ /* screen stream unless option.op_s_b is set, absence of an input file */ /* is such an important error, that we write it out anyway. */ /* if (option.op_s_b) */ wl_sj(linet1); num_sev++; return; } else { /* Failure to find an include file is an ordinary error. */ ps_t ps; ps.ps_line = globalno; ps.ps_column = 4; lr_err(&ps,"Error opening include file."); if (option.op_b7_b) sprintf(linet1, "The include file's expanded name was \"%s\".",SUPPNAME); else sprintf(linet1, "The include file's expanded name was \"%s\".",p_fname); lr_mes(&ps,linet1); return; } } /* Map the specified file into memory. We need to change from the scanner */ /* clock to the mapper clock to keep the time accounting correct here. */ ck_stop(p_scan); ck_start(p_mapp); errstr=map_file(p_fname,&p_mapped,&length); ck_stop(p_mapp); ck_start(p_scan); /* Abort if the mapping was not possible. */ if (errstr != NULL) if (inclevel==0) { /* Failure to map the main file is a severe error. */ if (option.op_b7_b) sprintf(linet1,"S: Error reading input file \"%s\".",SUPPNAME); else sprintf(linet1,"S: Error reading input file \"%s\".",p_fname); wl_l(linet1); if (option.op_s_b) wl_sj(linet1); wl_l(errstr); if (option.op_s_b) wl_sj(errstr); num_sev++; return; } else { /* Failure to find an include file is an ordinary error. */ ps_t ps; ps.ps_line = globalno; ps.ps_column = 4; lr_err(&ps,"Error reading include file."); lr_mes(&ps,errstr); if (option.op_b7_b) sprintf(linet1,"The include file's expanded name was \"%s\".", SUPPNAME); else sprintf(linet1,"The include file's expanded name was \"%s\".", p_fname); lr_mes(&ps,linet1); return; } /* Dump the mapped file if requested. */ if (option.op_b1_b) { if (option.op_b7_b) sprintf(linet1,"Dump of mapped file \"%s\".",SUPPNAME); else sprintf(linet1,"Dump of mapped file \"%s\".",p_fname); wl_l(linet1); dm_mem(&f_l,p_mapped,length); } /* If the file is absolutely empty, we have to warn the user. Also, this is */ /* a special case we can do without, and so we return here if file is empty. */ if (length==0) { ps_t ps; /* The empty file could be the main file or an include file. */ /* If the empty file is the main file, we want the diagnostic to point to */ /* the EOF marker which will appear as line 1. */ /* If the empty file is an include file, we wish to point the diagnostic */ /* to the line containing the include command. This is globalno. */ /* In both cases, we want the diagnostic to point to column 1. */ ps.ps_column=1; if (inclevel==0) { ps.ps_line=1; lr_war(&ps,"Input file is empty (not a byte in syte)!"); } else { /* 08-May-1999 RNW Include file name added. */ /* See 8 May note at the end of the function. */ ps.ps_line=globalno; if (option.op_b7_b) sprintf(linet1,"Include file \"%s\" is empty (not a byte in syte)!",SUPPNAME); else sprintf(linet1,"Include file \"%s\" is empty (not a byte in syte)!",p_fname); lr_war(&ps,linet1); } return; } /* Scanning is considerably simplified if we can guarantee that we will not */ /* run into an EOF without first hitting an EOL. The following code takes */ /* care of this by tacking one on the end if necessary and also adds an */ /* EOF character on the end, which also simplifies the scanning. We can get */ /* away with all this because the mapper purposefully leaves at least two */ /* bytes free for us at the end of the mapped file. */ addedeol=FALSE; if (p_mapped[length-1] != EOL) {p_mapped[length++]=EOL; addedeol=TRUE;} p_mapped[length]=EOFCH; /* Initialize the variables "instantiated over a single file". */ inln_max = 80; specialch = CH_DSPE; localno = 0; p_eof = &p_mapped[length]; /* Crank up the line subscanner system with a call to prepline. */ /* Then enter the main scanning loop. */ /* All input consists of alternating special and text sequences */ /* terminated by EOF. */ prepline(p_mapped); while (ch!=EOFCH) if (ch==specialch) scanspec(); else scantext(); /* Now that we are at the end of the scanned file and the scanning markers */ /* are all sitting on the end of the file, it is a good time to issue */ /* diagnostics about problems at the end of the file. */ if (addedeol) { ps_t ps; /* We want the diagnostic to point to the EOF line. Hence "global+1". */ ps.ps_line = globalno+1; ps.ps_column = 1; if (inclevel==0) lr_war(&ps,"The last line of the input file was terminated by EOF."); else { /* 08-May-1999 RNW FunnelWeb 3.0 didn't give the name of the included */ /* file when saying that an included file was not properly terminated. */ /* This caused significant confusion when there was a list of */ /* inclusions, as FunnelWeb's error was seemingly pointing to the NEXT */ /* inclusion! So I've added code to include the filename in the error */ /* message so as to eliminate the possibility of such confusion. */ if (option.op_b7_b) sprintf(linet1,"The last line of the include file \"%s\" was terminated by EOF.",SUPPNAME); else sprintf(linet1,"The last line of the include file \"%s\" was terminated by EOF.",p_fname); lr_war(&ps,linet1); } lr_mes(&ps,"An EOL was inserted at the end of the last line."); } } /******************************************************************************/ EXPORT void scanner(p_amapp,p_ascan) /* This is the scanner's main routine and the only exported function. */ p_ck_t p_amapp; /* Mapper's clock (stopped). */ p_ck_t p_ascan; /* Scanner's clock (running). */ { /* Copy the arguments into globals where we can get at them. */ p_mapp=p_amapp; p_scan=p_ascan; /* Apart from diagnostic messages sent to the lister, the only output of */ /* the scanner is two global lists holding a list of lines and a list of */ /* tokens. The scanner creates these lists simultaneously. */ /* We have to initialize them here before we get into 'scan_file' which */ /* calls itself recursively if an include file command is encountered. */ line_list =ls_cre(sizeof(ln_t)); token_list=ls_cre(sizeof(tk_t)); /* Initialize all the variables instantiated throughout the entire scan. */ globalno = 0; inclevel = 0; seenind = FALSE; seentyp = FALSE; seenlimo = FALSE; /* We also have to initialize localno in case the input file is empty and */ /* it never gets initialized before being sucked into being used as the */ /* local number for the end of file marker. */ localno=0; /* Initialize the global indentation flag to the default value. */ tgindent=TRUE; /* Initialize the global product line length limit to the default value. */ tglinmax=80; /* Initialize the global typesetter flag to the default value. */ tr_codes=TR_NONE; /* Scan the top level file whose name is obtained from the command line. */ as_cold(option.op_f_b,"scanner: -F!!!!"); /* Work out what the input file name should be. */ { fn_t fname; strcpy(fname,""); /* Start with an empty string. */ fn_ins(fname,".fw"); fn_ins(fname,option.op_f_s); scan_file(fname); } /* The scan_file function scans the main input file and all of its included */ /* files, but it does not append a TK_EOF token to the end. This call does */ /* this and also adds a line to the line list for EOF. */ add_eof(); } /******************************************************************************/ /* End of SCANNER.C */ /******************************************************************************/