/* * LANGCOMPILE - Language compiler * * Author: * Emile van Bergen, emile@evbergen.xs4all.nl * * Permission to redistribute an original or modified version of this program * in source, intermediate or object code form is hereby granted exclusively * under the terms of the GNU General Public License, version 2. Please see the * file COPYING for details, or refer to http://www.gnu.org/copyleft/gpl.html. * * History: * 2001/04/27 - EvB - Created * 2002/04/23 - EvB - Sanitized conversion operators */ char langcompile_id[] = "LANGCOMPILE - Copyright (C) 2001 Emile van Bergen."; /* NOTE: If this file is compiled with -DCOMP_MEM, then the compiler is built to operate on source provided as a string instead of a TEXT stream. Can be used for testing purposes. */ /* * INCLUDES & DEFINES */ #include /* For strtol */ #include /* For strncpy */ #include /* For MIN */ #ifndef COMP_MEM #include /* For TEXT */ #endif #include /* For IFACE */ #include #include /* (Shortened) type definitions for use in the standard operator table */ #define NON -1 /* As context: no type (unary prefix op) */ /* As right term's cvt tgt: don't recurse */ /* (unary postfix op) */ /* As new context: re-allow unary prefix ops */ #define ANY -2 /* As context: any type */ /* As left term's cvt tgt: don't convert */ /* As right cvt tgt: don't require anything */ #define INT MT_INTEGER #define IPA MT_IPADDR #define DAT MT_DATE #define STR MT_STRING /* Left or right associating */ #define L 0 #define R 1 /* * TYPES */ /* Standard operator definition */ typedef struct stdop { char *name; /* Operator name */ int ctx_l; /* Op applies in this context only */ int prec; /* Precedence */ int assoc_r; /* Right associating operator if nonzero */ int cvt_l; /* Convert left term to type */ int cvt_r; /* Required type for right term */ int newctx; /* Set context to this */ int op; /* Opcode to emit */ } STDOP; /* * GLOBALS */ /* The standard operators we know */ #define STDOP_CNT 85 static STDOP stdops[STDOP_CNT] = { /* Statements (don't leave anything on stack) */ { "halt", NON, 32, L, ANY, NON, INT, OP_HALT }, { "abort", NON, 32, L, ANY, NON, INT, OP_ABORT }, { "accept", NON, 32, L, ANY, NON, INT, OP_ACCEPT }, { "reject", NON, 32, L, ANY, NON, INT, OP_REJECT }, { "acctresp", NON, 32, L, ANY, NON, INT, OP_ACCTRESP }, /* High precedence unary prefix operators. Unary prefix is always R. */ { "delall", NON, 32, R, ANY, ANY, INT, OP_DELALLAV }, { "del", NON, 32, R, ANY, ANY, INT, OP_DELAV }, { "moveall", NON, 32, R, ANY, ANY, INT, OP_MOVEALLAV }, { "all", NON, 32, R, ANY, ANY, STR, OP_JOINSTR }, /* Typecasts / auto-conversion (uses autodetected format for ints) */ { "toint", ANY, 30, L, INT, NON, INT, OP_NOP }, { "toip", ANY, 30, L, IPA, NON, IPA, OP_NOP }, { "todate", ANY, 30, L, DAT, NON, DAT, OP_NOP }, { "tostr", ANY, 30, L, STR, NON, STR, OP_NOP }, /* Convert particular string format to int (oct/dec/hex/raw -> int) */ { "fromoct", ANY, 30, L, STR, NON, INT, OP_OCTSTR2ORD }, { "fromdec", ANY, 30, L, STR, NON, INT, OP_DECSTR2ORD }, { "fromhex", ANY, 30, L, STR, NON, INT, OP_HEXSTR2ORD }, { "fromraw", ANY, 30, L, STR, NON, INT, OP_RAWSTR2ORD }, /* Convert int to particular string format (int -> oct/dec/hex/raw) */ { "tooct", ANY, 30, L, INT, NON, STR, OP_ORD2OCTSTR }, { "todec", ANY, 30, L, INT, NON, STR, OP_ORD2DECSTR }, { "tohex", ANY, 30, L, INT, NON, STR, OP_ORD2HEXSTR }, { "toraw", ANY, 30, L, INT, NON, STR, OP_ORD2RAWSTR }, /* Other unary postfix operators */ { "exists", ANY, 30, L, ANY, NON, INT, OP_EXISTS }, /* Other unary prefix operators. If in doubt, use prefix, not postfix */ { "~", NON, 28, R, ANY, INT, INT, OP_NOT }, { "-", NON, 28, R, ANY, INT, INT, OP_NEG }, { "dictencode", NON, 28, R, ANY, ANY, STR, OP_DICTENC }, { "!", NON, 28, R, ANY, ANY, INT, OP_BOOLNOT }, { "not", NON, 28, R, ANY, ANY, INT, OP_BOOLNOT }, { "no", NON, 28, R, ANY, ANY, INT, OP_BOOLNOT }, { "random", NON, 28, R, ANY, ANY, STR, OP_RANDOM }, { "md5", NON, 28, R, ANY, STR, STR, OP_MD5 }, { "hex", NON, 28, R, ANY, STR, STR, OP_HEX }, { "upper", NON, 28, R, ANY, STR, STR, OP_UPPER }, { "lower", NON, 28, R, ANY, STR, STR, OP_LOWER }, /* Binary operators */ { "*", ANY, 25, L, INT, INT, INT, OP_MUL }, { "/", IPA, 25, L, ANY, INT, IPA, OP_CIDRMASK }, { "/", ANY, 25, L, INT, INT, INT, OP_DIV }, { "%", ANY, 25, L, INT, INT, INT, OP_MOD }, { "+", DAT, 24, L, ANY, INT, DAT, OP_ADD }, { "+", IPA, 24, L, ANY, INT, IPA, OP_ADD }, { "+", ANY, 24, L, INT, INT, INT, OP_ADD }, { "-", ANY, 24, L, INT, INT, INT, OP_SUB }, { ">>", ANY, 23, L, INT, INT, INT, OP_SHR }, { "<<", ANY, 23, L, INT, INT, INT, OP_SHL }, { "^", INT, 22, L, ANY, INT, INT, OP_XOR }, { "^", IPA, 22, L, ANY, IPA, IPA, OP_XOR }, { "^", DAT, 22, L, ANY, DAT, DAT, OP_XOR }, { "^", STR, 22, L, ANY, STR, STR, OP_XORSTR }, { "&", ANY, 21, L, INT, INT, INT, OP_AND }, { "|", ANY, 20, L, INT, INT, INT, OP_OR }, { "asmac", ANY, 18, L, STR, STR, STR, OP_STR2MAC }, { "papdecrypt", ANY, 18, L, STR, STR, STR, OP_PAPDECR }, { "papencrypt", ANY, 18, L, STR, STR, STR, OP_PAPENCR }, { "unixcrypt", ANY, 18, L, STR, STR, STR, OP_UNIXCRYPT }, { "hmacmd5", ANY, 18, L, STR, STR, STR, OP_HMACMD5 }, { "dictdecode", ANY, 18, L, STR, ANY, INT, OP_DICTDEC }, /* Convert to run-time specified format (only implemented for dates) */ { "as", ANY, 18, L, DAT, STR, STR, OP_ORD2DFMSTR }, { "beforefirst", ANY, 16, L, STR, STR, STR, OP_BF }, { "afterfirst", ANY, 16, L, STR, STR, STR, OP_AF }, { "beforelast", ANY, 16, L, STR, STR, STR, OP_BL }, { "afterlast", ANY, 16, L, STR, STR, STR, OP_AL }, { "firstof", ANY, 16, R, INT, STR, STR, OP_FO }, { "lastof", ANY, 16, R, INT, STR, STR, OP_LO }, { ".", ANY, 14, L, STR, STR, STR, OP_CONCAT }, { ">=", STR, 12, L, ANY, STR, INT, OP_GESTR }, { ">=", ANY, 12, L, INT, INT, INT, OP_GE }, { "<=", STR, 12, L, ANY, STR, INT, OP_LESTR }, { "<=", ANY, 12, L, INT, INT, INT, OP_LE }, { ">", STR, 12, L, ANY, STR, INT, OP_GTSTR }, { ">", ANY, 12, L, INT, INT, INT, OP_GT }, { "<", STR, 12, L, ANY, STR, INT, OP_LTSTR }, { "<", ANY, 12, L, INT, INT, INT, OP_LT }, { "!=", STR, 11, L, ANY, STR, INT, OP_NESTR }, { "!=", ANY, 11, L, ANY, ANY, INT, OP_NE }, { "==", STR, 11, L, ANY, STR, INT, OP_EQSTR }, { "==", ANY, 11, L, ANY, ANY, INT, OP_EQ }, { "=", INT, 7, R, ANY, INT, INT, OP_ADDAV }, { "=", IPA, 7, R, ANY, IPA, IPA, OP_ADDAV }, { "=", DAT, 7, R, ANY, DAT, DAT, OP_ADDAV }, { "=", STR, 7, R, ANY, STR, STR, OP_ADDAV }, { ":=", INT, 7, R, ANY, INT, INT, OP_REPLACEAV }, { ":=", IPA, 7, R, ANY, IPA, IPA, OP_REPLACEAV }, { ":=", DAT, 7, R, ANY, DAT, DAT, OP_REPLACEAV }, { ":=", STR, 7, R, ANY, STR, STR, OP_REPLACEAV }, { "pokedwith", STR, 7, R, ANY, STR, STR, OP_POKEAV }, { ",", ANY, 1, L, ANY, NON, NON, OP_POP } }; /* * FUNCTIONS */ #ifndef COMP_MEM static char *dbg_cvttxt(TEXT *t, ssize_t buflen) { static char dbg_cvtbuf[DBG_CVTBUFLEN]; int len; len = meta_atoprt(t->f->r->buf, MIN(ring_maxget(t->f->r), buflen), t->f->r->r, t->f->r->size, 0, 0, dbg_cvtbuf, DBG_CVTBUFLEN - 1); dbg_cvtbuf[len] = 0; return dbg_cvtbuf; } #endif /* Returns auto conversion opcode based on source and destination type */ static int cvtop(int srctype, int dsttype) { if (srctype == STR) { switch(dsttype) { case INT: return OP_INTSTR2ORD; case IPA: return OP_IPASTR2ORD; case DAT: return OP_DATSTR2ORD; } } else if (dsttype == STR) { switch(srctype) { case INT: return OP_ORD2DECSTR; case IPA: return OP_ORD2IPASTR; case DAT: return OP_ORD2DATSTR; } } /* All other conversions are really only typecasts */ return OP_NOP; } /* Find a named constant by specification from source */ #ifdef COMP_MEM static META_VAL *getvalbyspec(META *m, META_ITEM *i, char **source, int len) #else static META_VAL *getvalbyspec(META *m, META_ITEM *i, TEXT *source, int len) #endif { META_NAME spec; if (len >= sizeof(META_NAME)) len = sizeof(META_NAME) - 1; #ifdef COMP_MEM memcpy(spec, *source, len); #else len = text_peekdata(source, spec, len); if (len < 1) return 0; #endif spec[len] = 0; return meta_getvalbyname(m, i, spec); } /* Find a field or attribute by specification from source */ #ifdef COMP_MEM static META_ITEM *getitembyspec(META *m, char **source, int len) #else static META_ITEM *getitembyspec(META *m, TEXT *source, int len) #endif { META_SPEC spec; if (len >= sizeof(META_SPEC)) len = sizeof(META_SPEC) - 1; #ifdef COMP_MEM memcpy(spec, *source, len); #else len = text_peekdata(source, spec, len); if (len < 1) return 0; #endif spec[len] = 0; return meta_getitembyspec(m, spec); } /* Define prototype and input macros depending on source type */ #ifdef COMP_MEM #define C (**source) #define I(n) ((*source)[(n)]) #define S(n) ((*source) += (n)) #define GETSTR(s,n,rl) meta_prttoa(*source, strlen(*source), 0,0, (rl),(s),(n)) #define GETIP(rl) meta_atoip(*source, strlen(*source), 0, 0, (rl)) #define GETORDN(b,r,n) meta_atoord(*source,MIN(strlen(*source),(n)),0,0,(r),(b)) #define GETORD(ba,rl) meta_atoord(*source, strlen(*source), 0, 0, (rl), (ba)) #define SPN(s) strspn(*source, (s)) #define NCMP(s,n) strncmp(*source, s, (n)) #define DBGCVT(n) dbg_cvtstr(*source, (n)) static ssize_t reccompile(META *m, IFACE *ifaces, char **source, INSN **buf, ssize_t bufsize, int precedence, int req_ctx, META_ITEM *ctx_item, int *ret_ctx) #else #define C text_peek(source, 0) #define I(n) text_peek(source, (n)) #define S(n) text_discard(source, (n)) #define GETSTR(s,n,rl) meta_prttoa(source->f->r->buf, \ text_ensure(source, \ MIN((n), source->f->r->size >> 1), 0), \ source->f->r->r, source->f->r->size, \ (rl), (s), (n)) #define GETIP(rl) meta_atoip(source->f->r->buf, \ ring_maxget(source->f->r), source->f->r->r, \ source->f->r->size, (rl)) #define GETORDN(b,rl,n) meta_atoord(source->f->r->buf, \ MIN(ring_maxget(source->f->r), (n)), \ source->f->r->r, \ source->f->r->size, (rl), (b)) #define GETORD(ba,rl) meta_atoord(source->f->r->buf, \ ring_maxget(source->f->r), source->f->r->r,\ source->f->r->size, (rl), (ba)) #define SPN(s) text_strspn(source, (s), strlen(s), 0) #define NCMP(s,n) text_strncmp(source, s, (n)) #define DBGCVT(n) dbg_cvttxt(source, (n)) static ssize_t reccompile(META *m, IFACE *ifaces, TEXT *source, INSN **buf, ssize_t bufsize, int precedence, int req_ctx, META_ITEM *ctx_item, int *ret_ctx) #endif { signed char *s, c; INSN *o, *saved_o; ssize_t l, ret; int context, n; META_VAL *v; STDOP *so; IFACE *ifc; if (!m || !source || !buf || !*buf || !bufsize) { msg(F_MISC, L_ERR, "reccompile: BUG: Invalid argument(s)!\n"); return 0; } context = NON; o = *buf; ret = 0; D1(msg(F_LANG, L_DEBUG, "reccompile: c=%c, prec=%d, req_ctx=%d, " "context=%d, ctx_item=%s\n", C, precedence, req_ctx, context, ctx_item ? ctx_item->name:"-")); while((c = C) > 0 && ret < bufsize - (sizeof(INSN) << 1)) { /* Whitespace */ if (c == ' ' || c == '\n' || c == '\r' || c == '\t') { S(1); continue; } /* Comment */ if (c == '#') { S(1); while((c = C) != '\n' && c != '\r') S(1); continue; } /* * TERMS */ /* Numeric term. Also supports dotted-decimal IP addresses. */ if (((c >= '0' && c <= '9') || (c == '-' && I(1) >= '0' && I(1) <= '9')) && context == NON) { o->op = OP_PUSHINT; o->imm.ord = GETIP(&l); context = IPA; if (!l) { o->imm.ord = GETORD(0, &l); context = INT; } S(l); o++; ret += sizeof(INSN); continue; } /* String term. Supports hex and octal escaped chars. */ if ((c == '\"' || c == '\'') && context == NON) { o->op = OP_PUSHSTR; s = (char *)(o + 1); /* the space after this insn */ n = GETSTR(s, bufsize - ret - sizeof(INSN), &l); S(l); /* Set string length */ o->imm.d.str_len = n; /* Calculate length as insn count and save in disp */ n = (n + sizeof(INSN) - 1) / sizeof(INSN); o->imm.d.disp = n; /* On to the next insn and skip the displacement */ o += 1 + n; ret += (1 + n) * sizeof(INSN); context = STR; continue; } /* * SPECIAL OPERATORS */ /* Parens reset the precedence level so that we may continue undisturbed until we meet the first closing one. At that point, return until we're up at the precedence level the opening one brought us, then skip it. */ if (c == '(' && context == NON) { S(1); l = reccompile(m, ifaces, source, &o, bufsize - ret, 0, ANY, ctx_item, &context); if (l == -1) return -1; ret += l; continue; } if (c == ')' && context != NON) { if (precedence > 0) break; S(1); break; } /* These boolean operators use short-circuit evaluation. They return the last evaluated term as the result. This makes constructs possible like ArgA || ArgB, returning ArgA if that has a true value, or ArgB if ArgA is false. */ if (((c == '&' && I(1) == '&') || (c == 'a' && I(2) == 'd' && I(1) == 'n')) && context != NON) { if (precedence > 5) break; S(2 + (c == 'a')); saved_o = o; o->op = OP_JMPZ; o++; o->op = OP_POP; o++; /* WARNING: Funky feature: && resets context, because a false int looks exactly like a false str. So the left hand term's type is really irrelevant. */ context = req_ctx; ctx_item = 0; l = reccompile(m, ifaces, source, &o, bufsize - ret, 5, context, ctx_item, 0); if (l == -1) return -1; saved_o->imm.d.disp = l / sizeof(INSN) + 1; ret += l + 2 * sizeof(INSN); continue; } if (((c == '|' && I(1) == '|') || (c == 'o' && I(1) == 'r')) && context != NON) { if (precedence > 4) break; S(2); saved_o = o; o->op = OP_JMPNZ; o++; o->op = OP_POP; o++; l = reccompile(m, ifaces, source, &o, bufsize - ret, 4, context, ctx_item, 0); if (l == -1) return -1; saved_o->imm.d.disp = l / sizeof(INSN) + 1; ret += l + 2 * sizeof(INSN); continue; } /* * INTERFACE OPERATORS */ /* Search among callable interfaces. These apply only in context none, have precedence 32 and recurse. */ if (context == NON) { for(ifc = ifaces; ifc && NCMP(ifc->name, l = strlen(ifc->name)); ifc = ifc->next); if (ifc) { /* Found interface. Check precedence */ if (precedence >= 32) break; /* Process operator */ S(l); /* Recurse, but no required context */ l = reccompile(m, ifaces, source, &o, bufsize-ret, 32,ANY,0, &context); if (l == -1) return -1; ret += l; /* Emit opcode */ o->op = OP_CALLIFACE; o->imm.iface = ifc; o++; ret += sizeof(INSN); /* Done */ continue; } } /* * STANDARD OPERATORS */ /* Search in standard operator table for first match of name and context (or any context). This means that table must be ordered, with the more specific names and contexts first. */ for(n = 0, so = stdops; n < STDOP_CNT && !(NCMP(so->name, l = strlen(so->name)) == 0 && (so->ctx_l == context || (so->ctx_l == ANY && context != NON))); n++, so++); if (n < STDOP_CNT) { /* Check precedence. Associating right means don't break if same precedence but continue recursing. */ if (precedence >= so->prec + (so->assoc_r != 0)) break; /* We're now processing this operator, so remove it */ S(l); /* Convert stack top to required left hand type */ if (so->cvt_l != ANY && so->cvt_l != context) { o->op = cvtop(context, so->cvt_l); if (o->op != OP_NOP) o++, ret += sizeof(INSN); } /* Recurse, specifying required right hand type */ if (so->cvt_r != NON) { l = reccompile(m,ifaces, source,&o, bufsize-ret, so->prec, so->cvt_r, ctx_item, 0); if (l == -1) return -1; ret += l; } /* Emit opcode */ if (so->op != OP_NOP) { o->op = so->op; o++; ret += sizeof(INSN); } /* Set new context; reset ctx_item if none (comma) */ context = so->newctx; if (context == NON) ctx_item = 0; continue; } /* * DICTIONARY TERMS */ /* Note that these are tested after the operators, unlike the normal terms, to prevent unnecessary searches through the dictionary. */ /* Attribute reference or named value term */ if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { /* Process request or reply override and 'first' flag */ o->imm.i.flags = 0; if (!NCMP("REQ:", 4)) { S(4); o->imm.i.flags = AV_USEREPVALID; } else if (!NCMP("REP:", 4)) { S(4); o->imm.i.flags = AV_USEREPVALID|AV_USEREP; } if (!NCMP("F:", 2)) { S(2); o->imm.i.flags |= AV_FIRST; } /* Set l to length of identifier, leave colon or minus out if it's the last char, for int:=3 and int-5 */ l = SPN("ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz0123456789:-"); if (I(l - 1) == ':' || I(l - 1) == '-') l--; /* If we have an attribute context in scope and * there were no attribute reference flags given, * we search the constant value list first */ if (ctx_item && o->imm.i.flags == 0 && (v = getvalbyspec(m, ctx_item, source, l))) { S(l); context = INT; o->imm.ord = v->nr; o->op = OP_PUSHINT; o++; ret += sizeof(INSN); continue; } /* Not found; search in dictionary item list */ o->imm.i.item = getitembyspec(m, source, l); if (o->imm.i.item) { S(l); /* If we didn't have an explicit list override, make a lowercase first letter invert the default list by setting AV_USEREP without AV_USEREPVALID. Mainly for 'int', 'ip' etc */ if (!(o->imm.i.flags & AV_USEREPVALID) && ISLWR(o->imm.i.item->name[0])) { o->imm.i.flags |= AV_USEREP; } context = o->imm.i.item->val_type; ctx_item = o->imm.i.item; o->op = OP_PUSHAV; o++; ret += sizeof(INSN); continue; } /* Error. See if we have an itemized context or not. */ if (ctx_item) { msg(F_LANG, L_ERR, "compile: ERROR: Unknown " "attribute, value, operator " "or interface '%s' (for " "%s/%s/%s)!\n", DBGCVT(l), ctx_item->spc->name, meta_getvndbynr(m, ctx_item->vnd)->name, ctx_item->name); } else { msg(F_LANG, L_ERR, "compile: ERROR: Unknown " "attribute, operator or " "interface '%s'!\n", DBGCVT(l)); } return -1; } #ifdef COMP_MEM l = strlen(*source); #else /* This will be the length that was read because of the scanning functions used above, or more */ l = ring_maxget(source->f->r); #endif msg(F_LANG, L_ERR, "compile: ERROR: Unknown operator or " "misplaced term at '%s%s'!\n", DBGCVT(l > 10 ? 10 : l), (l > 10 ? " ..." : "")); return -1; } /* Add conversion operator if requested context doesn't match current */ if (req_ctx != context && req_ctx != ANY) { o->op = cvtop(context, req_ctx); if (o->op != OP_NOP) o++, ret += sizeof(INSN); } *buf = o; if (ret_ctx) *ret_ctx = context; D1(msg(F_LANG, L_DEBUG, "reccompile: Returned.\n")); return ret; } #ifdef COMP_MEM ssize_t lang_compile(META *m, IFACE *ifaces, char *source, INSN *buf, ssize_t bufsize) { ssize_t ret; ret = reccompile(m, ifaces, &source, &buf, bufsize, 0, ANY, 0, 0); #else ssize_t lang_compile(META *m, IFACE *ifaces, TEXT *source, INSN *buf, ssize_t bufsize) { ssize_t ret; ret = reccompile(m, ifaces, source, &buf, bufsize, 0, ANY, 0, 0); #endif if (ret != -1 && ret < bufsize - sizeof(INSN)) { buf->op = OP_HALT; ret += sizeof(INSN); } return ret; }