/* * Copyright (c) 2001-2002 Secure Software, Inc * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * */ %x IN_PHP_SCRIPT %x IN_PHP_OCOMMENT %option stack %{ #include #include "tokens.h" #include "engine.h" int phplexreal_column = 0; int phplex_column = 0; int phplex_lineno = 1; int yyphplength = 0; int yyphpsize = 0; char *yyphpcomment = NULL; static void count(void); static int identifier(void); static void reset_comment(void); static int cstyle_comment(void); static void no_match(void); static void gobble_string(char c); static void scan_yytext(void); #define YY_INPUT(buf, result, max_size) \ if (((result = fread(buf, 1, max_size, yyin)) == 0) && ferror(yyin)) { \ YY_FATAL_ERROR("input in flex scanner failed"); \ } else { \ char *c, *end = (buf) + result - 1; \ for (c = (buf); c < end; c++) { \ if (*c == '\r') *c = ' '; \ if (*c == '\\' && *(c + 1) == '\n') { \ memmove(c + 1, c + 2, end - c); \ result--; \ end--; \ *c = '\r'; \ } \ } \ if (*end == '\r') *end = ' '; \ if (*end == '\\') { \ result--; \ fseek(yyin, -1, SEEK_CUR); \ } \ } %} LNUM [0-9]+ DNUM ([0-9]*[\.][0-9]+)|([0-9]+[\.][0-9]*) EXPONENT_DNUM (({LNUM}|{DNUM})[eE][+-]?{LNUM}) HNUM "0x"[0-9a-fA-F]+ LABEL [a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]* WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@] NEWLINE ("\r"|"\n"|"\r\n") %% "" { BEGIN(IN_PHP_SCRIPT); scan_yytext(); count(); return TOKEN_PHP_IN_SCRIPT; } "<%="|""<%" { BEGIN(IN_PHP_SCRIPT); count(); return TOKEN_PHP_IN_SCRIPT; } "("?>"|""){NEWLINE}? { BEGIN(INITIAL); scan_yytext(); count(); return TOKEN_PHP_IN_SCRIPT; } "%>"{NEWLINE}? { BEGIN(INITIAL); scan_yytext(); count(); return TOKEN_PHP_IN_SCRIPT; } "#"|"//" { BEGIN(IN_PHP_OCOMMENT); count(); return TOKEN_COMMENT; } "/*" {count();return cstyle_comment();} "$" {count();return '$';} "old_function" {count();return TOKEN_FUNCTION;} "function"|"cfunction" {count();return TOKEN_FUNCTION;} "const" {count();return TOKEN_CONST;} "return" {count();return TOKEN_RETURN;} "if" {count();return TOKEN_IF;} "elseif" {count();return TOKEN_ELSEIF;} "else" {count();return TOKEN_ELSE;} "while" {count();return TOKEN_WHILE;} "endwhile" {count();return TOKEN_ENDWHILE;} "do" {count();return TOKEN_DO;} "for" {count();return TOKEN_FOR;} "endfor" {count();return TOKEN_ENDFOR;} "foreach" {count();return TOKEN_FOREACH;} "endforeach" {count();return TOKEN_ENDFOREACH;} "declare" {count();return TOKEN_DECLARE;} "enddeclare" {count();return TOKEN_ENDDECLARE;} "as" {count();return TOKEN_AS;} "switch" {count();return TOKEN_SWITCH;} "endswitch" {count();return TOKEN_ENDSWITCH;} "case" {count();return TOKEN_CASE;} "default" {count();return TOKEN_DEFAULT;} "break" {count();return TOKEN_BREAK;} "continue" {count();return TOKEN_CONTINUE;} "print" {count();return TOKEN_PRINT;} "class" {count();return TOKEN_CLASS;} "extends" {count();return TOKEN_EXTENDS;} "var" {count();return TOKEN_VAR;} "=>" {count();return TOKEN_DOUBLE_ARROW;} "++" {count();return TOKEN_INC_OP;} "--" {count();return TOKEN_DEC_OP;} "===" {count();return TOKEN_T_EQUAL;} "!==" {count();return TOKEN_T_NOTEQUAL;} "==" {count();return TOKEN_EQ_OP;} "!="|"<>" {count();return TOKEN_NE_OP;} "<=" {count();return TOKEN_LE_OP;} ">=" {count();return TOKEN_GE_OP;} "+=" {count();return TOKEN_ADD_ASSIGN;} "-=" {count();return TOKEN_SUB_ASSIGN;} "*=" {count();return TOKEN_MUL_ASSIGN;} "/=" {count();return TOKEN_DIV_ASSIGN;} ".=" {count();return TOKEN_CONCAT_ASSIGN;} "%=" {count();return TOKEN_MOD_ASSIGN;} "<<=" {count();return TOKEN_LEFT_ASSIGN;} ">>=" {count();return TOKEN_RIGHT_ASSIGN;} "&=" {count();return TOKEN_AND_ASSIGN;} "|=" {count();return TOKEN_OR_ASSIGN;} "^=" {count();return TOKEN_XOR_ASSIGN;} "||" {count();return TOKEN_OR_OP;} "&&" {count();return TOKEN_AND_OP;} "OR" {count();return TOKEN_OR_OP;} "AND" {count();return TOKEN_AND_OP;} "XOR" {count();return TOKEN_XOR_OP;} "<<" {count();return TOKEN_LEFT_OP;} ">>" {count();return TOKEN_RIGHT_OP;} {HNUM} {count();return TOKEN_HEX_CONST;} {DNUM} {count();return TOKEN_DEC_CONST;} {LNUM} {count();return TOKEN_DEC_CONST;} {EXPONENT_DNUM} {count();return TOKEN_DEC_CONST;} {LABEL} {count();return identifier();} ";" { count();return ';'; } "{" { count();return '{'; } "}" { count();return '}'; } "," { count();return ','; } ":" { count();return ':'; } "=" { count();return '='; } "(" { count();return '('; } ")" { count();return ')'; } "[" { count();return '['; } "]" { count();return ']'; } "." { count();return '.'; } "&" { count();return '&'; } "!" { count();return '!'; } "~" { count();return '~'; } "-" { count();return '-'; } "+" { count();return '+'; } "*" { count();return '*'; } "/" { count();return '/'; } "%" { count();return '%'; } "<" { count();return '<'; } "`" { count();return '`'; } ">" { count();return '>'; } "^" { count();return '^'; } "@" {count();return '@'; } "|" { count();return '|'; } "?" { count();return '?'; } ("\"") { count();gobble_string('"'); return TOKEN_STRING_CONST; } ("'") { count();gobble_string('\''); return TOKEN_STRING_CONST; } <*>[ \t\v\f] { /* eat white space */ } [\n\r] { BEGIN(IN_PHP_SCRIPT); count();phplex_lineno++; } [\n\r] { count();phplex_lineno++; } . { count();/* eat it! */} . { count();no_match(); } . { count();/* it's just HTML, we don't care */} %% int yywrap(void) { return 1; } static void count() { int i; if (phplexreal_column != 0) { phplex_column = phplexreal_column+1; } for (i = 0; yytext[i] != '\0'; i++) { if (yytext[i] == '\n') { phplexreal_column = 0; phplex_column = 0; } else if (yytext[i] == '\t') { phplexreal_column += 8 - (phplexreal_column % 8); }else { phplexreal_column++; } } } static void gobble_string(char which) { int bslash = 0; char c; while ((c = input()) && c != -1) { phplexreal_column++; switch(c) { case '\\': if (!bslash) bslash = 1; else bslash = 0; break; case '\n': phplexreal_column = 0; phplex_column = 0; phplex_lineno++; bslash = 0; break; default: if (c == which && !bslash) { return; } bslash = 0; break; } } } static void scan_yytext(void) { char *tmp; tmp = yytext; while(*tmp) { if(*tmp == '\n' || *tmp == '\r') { phplexreal_column = 0; phplex_column = 0; phplex_lineno++; } tmp++; } } static int identifier(void) { char * c; while ((c = strchr(yytext, '\r')) != (char *)NULL) { memmove(c, c + 1, strlen(c)); phplexreal_column = 0; phplex_column = 0; phplex_lineno++; } return TOKEN_IDENTIFIER; } static void no_match(void) { fprintf(stderr, "%s:%d: warning: bad token `%s'\n", current_file, phplex_lineno, yytext); } static void accumulate_comment(char *data, int length) { int need; char * text = yyphpcomment; need = yyphplength + length + 1; need = (need + 127) / 128 * 128; if (need > yyphpsize) { text = (char *)(yyphpsize ? realloc(yyphpcomment, need) : malloc(need)); if (text == (char *)NULL) return; yyphpsize = need; yyphpcomment = text; } memcpy(yyphpcomment + yyphplength, data, length); yyphplength += length; *(yyphpcomment + yyphplength) = '\0'; } static void reset_comment(void) { if (yyphpcomment != (char *)NULL) *yyphpcomment = '\0'; yyphplength = 0; } static int cstyle_comment(void) { char c; reset_comment(); while ((c = input()) && c != -1) { phplexreal_column++; accumulate_comment(&c, 1); if (c == '\n' || c == '\r') { phplexreal_column = 0; phplex_column = 0; phplex_lineno++; } while (c == '*') { phplexreal_column++; if (!(c = input()) || c == -1) { return TOKEN_COMMENT; } if (c == '\n' || c == '\r') { phplexreal_column = 0; phplex_column = 0; phplex_lineno++; } if (c == '/') { return TOKEN_COMMENT; } else { char tmp[2] = { '*', c }; accumulate_comment(tmp, sizeof(tmp)); } } } return TOKEN_COMMENT; }