/* * Copyright (c) 1996-2007, OpenFWTK Development Group * All rights reserved. See LICENSE. */ /* maketable.c */ /* Copyright 1997-1999 by Eberhard Mattes Donated to the public domain. No warranty. 1997-07-19 Initial version 1997-09-06 Lower-case stuff moved to libem 1999-01-01 Accept '&' in attribute values (CTYPE_VALUE) */ #include #include #include #include #include #define SELECT_CTYPE_HTML #define SELECT_CTYPE_HTTP #define SELECT_CTYPE_URL #include "tables.h" enum target { TARGET_HTTP, TARGET_HTML, TARGET_URL }; static void usage (void) { puts ("Usage:"); puts (" maketable -t Make character-type table for http, html, or url"); exit (1); } static void do_not_edit (void) { puts ("/* DO NOT EDIT THIS FILE -- " "it is automatically generated by maketable */\n"); } /* Build a table for character classification. The table maps an unsigned char to an unsigned char containing a set of bits. The output contains just the initialization data, without braces. */ static void make_ctype (enum target type) { int i; unsigned c; do_not_edit (); i = 0; for (;;) { c = 0; if (i >= 'A' && i <= 'Z') c |= CTYPE_UPALPHA; if (i >= 'a' && i <= 'z') c |= CTYPE_LOALPHA; if (i >= '0' && i <= '9') c |= CTYPE_DIGIT; if (i == SP || i == HT) /* Be tolerant: treat HT as space for HTTP */ c |= CTYPE_WHITE; switch (type) { case TARGET_HTML: if (i == LF || i == CR) c |= CTYPE_WHITE; if (i == '.' || i == '-') c |= CTYPE_I_NAME; if (i != HT && i != LF && i != CR && (i < 0x20 || (i >= 0x7f && i <= 0x9a) || i == '<' || i == '>' || i == '&' || i == '"' || i == '\'')) c |= CTYPE_ESCAPE; /* Note that we cannot yet deal with quotes and "&" in parse_attributes(). */ if (i >= 0x21 && i != 0x7f && i != '"' && i != '\'' && i != '<' && i != '>') c |= CTYPE_VALUE; break; case TARGET_HTTP: /* See `token' in RFC 2068. This is used for cookies (RFC 2109). */ if (i >= 32 && i != '(' && i != ')' && i != '@' && i != ',' && i != ';' && i != ':' && i != '\\' && i != '"' && i != '/' && i != '[' && i != ']' && i != '?' && i != '=' && i != '{' && i != '}' && i != SP && i != HT) c |= CTYPE_TOKEN; break; case TARGET_URL: /* Oops, the BNF in RFC 1630 does not allow "+" and "." in schemes! Go with RFC 1738. Add '_' for Squid's "cache_object". */ if (i == '+' || i == '-' || i == '.' || i == '_') c |= CTYPE_I_SCHEME; if ((i >= 'a' && i <= 'f') || (i >= 'A' && i <= 'F')) c |= CTYPE_I_HEX; /* "hsegment" of RFC 1738. Of course, RFC 1738 and RFC 2068 contradict. In fact, we implement "uchar" of RFC 2068, plus "&", "/", "+", ":", ";", "<", "=", ">", and "@". */ if (i >= 33 && i != '"' && i != '#' && i != '%' && i != '?') c |= CTYPE_I_HPATH; /* "fsegment" of RFC 1738. Of course, RFC 1738 and RFC 2068 contradict. In fact, we implement "uchar" of RFC 2068, plus "&", "/", "+", ":", "<", "=", ">", and "@", minus ";". */ if (i >= 33 && i != '"' && i != '#' && i != '%' && i != ';' && i != '?') c |= CTYPE_I_FPATH; /* "user" of RFC 1738, sans "alpha", "digit", and "escape". */ if (/* "user" */ i == ';' || i == '?' || i == '&' || i == '=' /* "safe" */ || i == '$' || i == '-' || i == '_' || i == '.' || i == '+' /* "extra" */ || i == '!' || i == '*' || i == '\'' || i == '(' || i == ')' || i == ',') c |= CTYPE_I_USER; break; } printf ("0x%.2x", c); if (i == UCHAR_MAX) break; ++i; if (i % 8 == 0) fputs (",\n", stdout); else fputs (", ", stdout); } putchar ('\n'); } int main (int argc, char *argv[]) { if (argc == 3 && strcmp (argv[1], "-t") == 0) { if (strcmp (argv[2], "http") == 0) make_ctype (TARGET_HTTP); else if (strcmp (argv[2], "html") == 0) make_ctype (TARGET_HTML); else if (strcmp (argv[2], "url") == 0) make_ctype (TARGET_URL); else usage (); } else usage (); if (fflush (stdout) != 0) { perror ("maketable"); exit (2); } return 0; }