/*
** ____ _
** ___| _ \ ___ _ __| |
** / _ \ |_) / _ \ '__| |
** | __/ __/ __/ | | |
** \___|_| \___|_| |_|
**
** ePerl -- Embedded Perl 5 Language
**
** ePerl interprets an ASCII file bristled with Perl 5 program statements
** by evaluating the Perl 5 code while passing through the plain ASCII
** data. It can operate both as a standard Unix filter for general file
** generation tasks and as a powerful Webserver scripting language for
** dynamic HTML page programming.
**
** ======================================================================
**
** Copyright (c) 1996,1997,1998 Ralf S. Engelschall <rse@engelschall.com>
**
** This program is free software; it may be redistributed and/or modified
** only under the terms of either the Artistic License or the GNU General
** Public License, which may be found in the ePerl source distribution.
** Look at the files ARTISTIC and COPYING or run ``eperl -l'' to receive
** a built-in copy of both license files.
**
** This program is distributed in the hope that it will be useful, but
** WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See either the
** Artistic License or the GNU General Public License for more details.
**
** ======================================================================
**
** eperl_parse.c -- ePerl parser stuff
*/
#include "eperl_config.h"
#include "eperl_global.h"
#include "eperl_proto.h"
/*
**
** Static Data
**
*/
char *ePerl_begin_delimiter = NULL;
char *ePerl_end_delimiter = NULL;
int ePerl_case_sensitive_delimiters = TRUE;
int ePerl_convert_entities = FALSE;
int ePerl_line_continuation = FALSE;
static char ePerl_ErrorString[1024] = "";
/*
**
** Functions
**
*/
/*
** set ePerl error string
*/
void ePerl_SetError(char *str, ...)
{
va_list ap;
va_start(ap, str);
vsprintf(ePerl_ErrorString, str, ap);
va_end(ap);
return;
}
/*
** get ePerl error string
*/
char *ePerl_GetError(void)
{
return ePerl_ErrorString;
}
/*
** fprintf for internal buffer
*/
char *ePerl_fprintf(char *cpOut, char *str, ...)
{
va_list ap;
va_start(ap, str);
vsprintf(cpOut, str, ap);
va_end(ap);
return cpOut+strlen(cpOut);
}
/*
** fwrite for internal buffer
*/
char *ePerl_fwrite(char *cpBuf, int nBuf, int cNum, char *cpOut)
{
char *cp;
int n;
n = nBuf*cNum;
(void)strncpy(cpOut, cpBuf, n);
cp = cpOut + n;
*cp = NUL;
return cp;
}
/*
** fwrite for internal buffer WITH character escaping
*/
char *ePerl_Efwrite(char *cpBuf, int nBuf, int cNum, char *cpOut)
{
char *cpI;
char *cpO;
for (cpI = cpBuf, cpO = cpOut; cpI < (cpBuf+(nBuf*cNum)); ) {
switch (*cpI) {
case '"': *cpO++ = '\\'; *cpO++ = *cpI++; break;
case '@': *cpO++ = '\\'; *cpO++ = *cpI++; break;
case '$': *cpO++ = '\\'; *cpO++ = *cpI++; break;
case '\\': *cpO++ = '\\'; *cpO++ = *cpI++; break;
case '\t': *cpO++ = '\\'; *cpO++ = 't'; cpI++; break;
case '\n': *cpO++ = '\\'; *cpO++ = 'n'; cpI++; break;
default: *cpO++ = *cpI++;
}
}
*cpO = NUL;
return cpO;
}
/*
** fwrite for internal buffer WITH HTML entity conversion
*/
struct html2char {
char *h;
char c;
};
static struct html2char html2char[] = {
{ "copy", '©' }, /* Copyright */
{ "die", '¨' }, /* Diæresis / Umlaut */
{ "laquo", '«' }, /* Left angle quote, guillemot left */
{ "not", '¬' }, /* Not sign */
{ "ordf", 'ª' }, /* Feminine ordinal */
{ "sect", '§' }, /* Section sign */
{ "um", '¨' }, /* Diæresis / Umlaut */
{ "AElig", 'Æ' }, /* Capital AE ligature */
{ "Aacute", 'Á' }, /* Capital A, acute accent */
{ "Acirc", 'Â' }, /* Capital A, circumflex */
{ "Agrave", 'À' }, /* Capital A, grave accent */
{ "Aring", 'Å' }, /* Capital A, ring */
{ "Atilde", 'Ã' }, /* Capital A, tilde */
{ "Auml", 'Ä' }, /* Capital A, diæresis / umlaut */
{ "Ccedil", 'Ç' }, /* Capital C, cedilla */
{ "ETH", 'Ð' }, /* Capital Eth, Icelandic */
{ "Eacute", 'É' }, /* Capital E, acute accent */
{ "Ecirc", 'Ê' }, /* Capital E, circumflex */
{ "Egrave", 'È' }, /* Capital E, grave accent */
{ "Euml", 'Ë' }, /* Capital E, diæresis / umlaut */
{ "Iacute", 'Í' }, /* Capital I, acute accent */
{ "Icirc", 'Î' }, /* Capital I, circumflex */
{ "Igrave", 'Ì' }, /* Capital I, grave accent */
{ "Iuml", 'Ï' }, /* Capital I, diæresis / umlaut */
{ "Ntilde", 'Ñ' }, /* Capital N, tilde */
{ "Oacute", 'Ó' }, /* Capital O, acute accent */
{ "Ocirc", 'Ô' }, /* Capital O, circumflex */
{ "Ograve", 'Ò' }, /* Capital O, grave accent */
{ "Oslash", 'Ø' }, /* Capital O, slash */
{ "Otilde", 'Õ' }, /* Capital O, tilde */
{ "Ouml", 'Ö' }, /* Capital O, diæresis / umlaut */
{ "THORN", 'Þ' }, /* Capital Thorn, Icelandic */
{ "Uacute", 'Ú' }, /* Capital U, acute accent */
{ "Ucirc", 'Û' }, /* Capital U, circumflex */
{ "Ugrave", 'Ù' }, /* Capital U, grave accent */
{ "Uuml", 'Ü' }, /* Capital U, diæresis / umlaut */
{ "Yacute", 'Ý' }, /* Capital Y, acute accent */
{ "aacute", 'ß' }, /* Small a, acute accent */
{ "acirc", 'â' }, /* Small a, circumflex */
{ "acute", '´' }, /* Acute accent */
{ "aelig", 'æ' }, /* Small ae ligature */
{ "agrave", 'à' }, /* Small a, grave accent */
{ "amp", '&' }, /* Ampersand */
{ "aring", 'å' }, /* Small a, ring */
{ "atilde", 'ã' }, /* Small a, tilde */
{ "auml", 'ä' }, /* Small a, diæresis / umlaut */
{ "brkbar", '¦' }, /* Broken vertical bar */
{ "brvbar", '¦' }, /* Broken vertical bar */
{ "ccedil", 'ç' }, /* Small c, cedilla */
{ "cedil", '¸' }, /* Cedilla */
{ "cent", '¢' }, /* Cent sign */
{ "curren", '¤' }, /* General currency sign */
{ "deg", '°' }, /* Degree sign */
{ "divide", '÷' }, /* Division sign */
{ "eacute", 'é' }, /* Small e, acute accent */
{ "ecirc", 'ê' }, /* Small e, circumflex */
{ "egrave", 'è' }, /* Small e, grave accent */
{ "eth", 'ð' }, /* Small eth, Icelandic */
{ "euml", 'ë' }, /* Small e, diæresis / umlaut */
{ "frac12", '½' }, /* Fraction one-half */
{ "frac14", '¼' }, /* Fraction one-fourth */
{ "frac34", '¾' }, /* Fraction three-fourths */
{ "gt", '>' }, /* Greater than */
{ "hibar", '¯' }, /* Macron accent */
{ "iacute", 'í' }, /* Small i, acute accent */
{ "icirc", 'î' }, /* Small i, circumflex */
{ "iexcl", '¡' }, /* Inverted exclamation */
{ "igrave", 'ì' }, /* Small i, grave accent */
{ "iquest", '¿' }, /* Inverted question mark */
{ "iuml", 'ï' }, /* Small i, diæresis / umlaut */
{ "lt", '<' }, /* Less than */
{ "macr", '¯' }, /* Macron accent */
{ "micro", 'µ' }, /* Micro sign */
{ "middot", '·' }, /* Middle dot */
{ "nbsp", ' ' }, /* Non-breaking Space */
{ "ntilde", 'ñ' }, /* Small n, tilde */
{ "oacute", 'ó' }, /* Small o, acute accent */
{ "ocirc", 'ô' }, /* Small o, circumflex */
{ "ograve", 'ò' }, /* Small o, grave accent */
{ "ordm", 'º' }, /* Masculine ordinal */
{ "oslash", 'ø' }, /* Small o, slash */
{ "otilde", 'õ' }, /* Small o, tilde */
{ "ouml", 'ö' }, /* Small o, diæresis / umlaut */
{ "para", '¶' }, /* Paragraph sign */
{ "plusmn", '±' }, /* Plus or minus */
{ "pound", '£' }, /* Pound sterling */
{ "quot", '"' }, /* Quotation mark */
{ "raquo", '»' }, /* Right angle quote, guillemot right */
{ "reg", '®' }, /* Registered trademark */
{ "shy", '' }, /* Soft hyphen */
{ "sup1", '¹' }, /* Superscript one */
{ "sup2", '²' }, /* Superscript two */
{ "sup3", '³' }, /* Superscript three */
{ "szlig", 'ß' }, /* Small sharp s, German sz */
{ "thorn", 'þ' }, /* Small thorn, Icelandic */
{ "times", '×' }, /* Multiply sign */
{ "uacute", 'ú' }, /* Small u, acute accent */
{ "ucirc", 'û' }, /* Small u, circumflex */
{ "ugrave", 'ù' }, /* Small u, grave accent */
{ "uuml", 'ü' }, /* Small u, diæresis / umlaut */
{ "yacute", 'ý' }, /* Small y, acute accent */
{ "yen", '¥' }, /* Yen sign */
{ "yuml",'\255' }, /* Small y, diæresis / umlaut */
{ NULL, NUL }
};
char *ePerl_Cfwrite(char *cpBuf, int nBuf, int cNum, char *cpOut)
{
char *cpI;
char *cpO;
int i;
int n;
char *cpE;
cpI = cpBuf;
cpO = cpOut;
cpE = cpBuf+(nBuf*cNum);
while (cpI < cpE) {
if (*cpI == '&') {
for (i = 0; html2char[i].h != NULL; i++) {
n = strlen(html2char[i].h);
if (cpI+1+n+1 < cpE) {
if (*(cpI+1+n) == ';') {
if (strncmp(cpI+1, html2char[i].h, n) == 0) {
*cpO++ = html2char[i].c;
cpI += 1+n+1;
continue;
}
}
}
}
}
*cpO++ = *cpI++;
}
*cpO = NUL;
return cpO;
}
/*
**
** Own string functions with maximum length (n) support
**
*/
char *strnchr(char *buf, char chr, int n)
{
char *cp;
char *cpe;
for (cp = buf, cpe = buf+n-1; cp <= cpe; cp++) {
if (*cp == chr)
return cp;
}
return NULL;
}
#ifdef NOTDEF
char *strnstr(char *buf, char *str, int n)
{
char *cp;
char *cpe;
int len;
len = strlen(str);
for (cp = buf, cpe = buf+n-len; cp <= cpe; cp++) {
if (strncmp(cp, str, len) == 0)
return cp;
}
return NULL;
}
#endif
char *strncasestr(char *buf, char *str, int n)
{
char *cp;
char *cpe;
int len;
len = strlen(str);
for (cp = buf, cpe = buf+n-len; cp <= cpe; cp++) {
if (strncasecmp(cp, str, len) == 0)
return cp;
}
return NULL;
}
char *strndup(char *buf, int n)
{
char *cp;
cp = (char *)malloc(n+1);
strncpy(cp, buf, n);
return cp;
}
/*
** convert buffer from bristled format to plain format
*/
char *ePerl_Bristled2Plain(char *cpBuf)
{
char *rc;
char *cpOutBuf = NULL;
char *cpOut = NULL;
char *cps, *cpe;
char *cps2, *cpe2;
int nBuf;
char *cpEND;
int n;
if (strlen(cpBuf) == 0) {
/* make sure we return a buffer which the caller can free() */
cpOutBuf = (char *)malloc(sizeof(char) * 1);
*cpOutBuf = NUL;
return cpOutBuf;
}
nBuf = strlen(cpBuf);
cpEND = cpBuf+nBuf;
/* allocate memory for the Perl code */
n = sizeof(char) * nBuf * 10;
if (nBuf < 1024)
n = 16384;
if ((cpOutBuf = (char *)malloc(n)) == NULL) {
ePerl_SetError("Cannot allocate %d bytes of memory", n);
CU(NULL);
}
cpOut = cpOutBuf;
/* now step through the file and convert it to legal Perl code.
This is a bit complicated because we have to make sure that
we parse the correct delimiters while the delimiter
characters could also occur inside the Perl code! */
cps = cpBuf;
while (cps < cpEND) {
if (ePerl_case_sensitive_delimiters)
cpe = strnstr(cps, ePerl_begin_delimiter, cpEND-cps);
else
cpe = strncasestr(cps, ePerl_begin_delimiter, cpEND-cps);
if (cpe == NULL) {
/* there are no more ePerl blocks, so
just encapsulate the remaining contents into
Perl print constructs */
if (cps < cpEND) {
cps2 = cps;
/* first, do all complete lines */
while (cps2 < cpEND && (cpe2 = strnchr(cps2, '\n', cpEND-cps2)) != NULL) {
if (ePerl_line_continuation && cps < cpe2 && *(cpe2-1) == '\\') {
if (cpe2-1-cps2 > 0) {
cpOut = ePerl_fprintf(cpOut, "print \"");
cpOut = ePerl_Efwrite(cps2, cpe2-1-cps2, 1, cpOut);
cpOut = ePerl_fprintf(cpOut, "\";");
}
cpOut = ePerl_fprintf(cpOut, "\n");
}
else {
cpOut = ePerl_fprintf(cpOut, "print \"");
cpOut = ePerl_Efwrite(cps2, cpe2-cps2, 1, cpOut);
cpOut = ePerl_fprintf(cpOut, "\\n\";\n");
}
cps2 = cpe2+1;
}
/* then do the remainder which is not
finished by a newline */
if (cpEND > cps2) {
cpOut = ePerl_fprintf(cpOut, "print \"");
cpOut = ePerl_Efwrite(cps2, cpEND-cps2, 1, cpOut);
cpOut = ePerl_fprintf(cpOut, "\";");
}
}
break; /* and break the whole processing step */
}
else {
/* Ok, there is at least one more ePerl block */
/* first, encapsulate the content from current pos
up to the begin of the ePerl block as print statements */
if (cps < cpe) {
cps2 = cps;
while ((cpe2 = strnchr(cps2, '\n', cpe-cps2)) != NULL) {
if (ePerl_line_continuation && cps < cpe2 && *(cpe2-1) == '\\') {
if (cpe2-1-cps2 > 0) {
cpOut = ePerl_fprintf(cpOut, "print \"");
cpOut = ePerl_Efwrite(cps2, cpe2-1-cps2, 1, cpOut);
cpOut = ePerl_fprintf(cpOut, "\";");
}
cpOut = ePerl_fprintf(cpOut, "\n");
}
else {
cpOut = ePerl_fprintf(cpOut, "print \"");
cpOut = ePerl_Efwrite(cps2, cpe2-cps2, 1, cpOut);
cpOut = ePerl_fprintf(cpOut, "\\n\";\n");
}
cps2 = cpe2+1;
}
if (cpe > cps2) {
cpOut = ePerl_fprintf(cpOut, "print \"");
cpOut = ePerl_Efwrite(cps2, cpe-cps2, 1, cpOut);
cpOut = ePerl_fprintf(cpOut, "\";");
}
}
/* just output a leading space to make
the -x display more readable. */
if (cpOut > cpOutBuf && *(cpOut-1) != '\n')
cpOut = ePerl_fprintf(cpOut, " ");
/* skip the start delimiter */
cps = cpe+strlen(ePerl_begin_delimiter);
/* recognize the 'print' shortcut with '=',
* e.g. <:=$var:>
*/
if (*cps == '=') {
cpOut = ePerl_fprintf(cpOut, "print ");
cps++;
}
/* skip all following whitespaces.
Be careful: we could skip newlines too, but then the
error output will give wrong line numbers!!! */
while (cps < cpEND) {
if (*cps != ' ' && *cps != '\t')
break;
cps++;
}
cpe = cps;
/* move forward to end of ePerl block. */
if (ePerl_case_sensitive_delimiters)
cpe = strnstr(cpe, ePerl_end_delimiter, cpEND-cpe);
else
cpe = strncasestr(cpe, ePerl_end_delimiter, cpEND-cpe);
if (cpe == NULL) {
ePerl_SetError("Missing end delimiter");
CU(NULL);
}
/* step again backward over whitespaces */
for (cpe2 = cpe;
cpe2 > cps && (*(cpe2-1) == ' ' || *(cpe2-1) == '\t' || *(cpe2-1) == '\n');
cpe2--)
;
/* pass through the ePerl block without changes! */
if (cpe2 > cps) {
if (ePerl_convert_entities == TRUE)
cpOut = ePerl_Cfwrite(cps, cpe2-cps, 1, cpOut);
else
cpOut = ePerl_fwrite(cps, cpe2-cps, 1, cpOut);
/* be smart and automatically add a semicolon
if not provided at the end of the ePerl block.
But know the continuation indicator "_". */
if ((*(cpe2-1) != ';') &&
(*(cpe2-1) != '_') )
cpOut = ePerl_fprintf(cpOut, ";");
if (*(cpe2-1) == '_')
cpOut = cpOut - 1;
}
/* end preserve newlines for correct line numbers */
for ( ; cpe2 <= cpe; cpe2++)
if (*cpe2 == '\n')
cpOut = ePerl_fprintf(cpOut, "\n");
/* output a trailing space to make
the -x display more readable when
no newlines have finished the block. */
if (cpOut > cpOutBuf && *(cpOut-1) != '\n')
cpOut = ePerl_fprintf(cpOut, " ");
/* and adjust the current position to the first character
after the end delimiter */
cps = cpe+strlen(ePerl_end_delimiter);
/* finally just one more feature: when an end delimiter
is directly followed by ``//'' this discards all
data up to and including the following newline */
if (cps < cpEND-2 && *cps == '/' && *(cps+1) == '/') {
/* skip characters */
cps += 2;
for ( ; cps < cpEND && *cps != '\n'; cps++)
;
if (cps < cpEND)
cps++;
/* but preserve the newline in the script */
cpOut = ePerl_fprintf(cpOut, "\n");
}
}
}
RETURN_WVAL(cpOutBuf);
CUS:
if (cpOutBuf)
free(cpOutBuf);
RETURN_EXRC;
}
/*EOF*/
syntax highlighted by Code2HTML, v. 0.9.1