/* * Copyright (c) 1997-2007, OpenFWTK Development Group * All rights reserved. See LICENSE. */ /* http.c */ /* Status reporting by ArkanoiD, 2001 */ /* Copyright 1997-2000 by Eberhard Mattes Donated to the public domain. No warranty. 1997-07-19 Initial version 1997-09-06 New error message for empty header; X-Pad 1997-09-09 Insert space after colon; improved Content-Transfer-Encoding 1997-09-10 -redir; SP in request URL; simple-response 1997-09-12 more log options 1997-09-13 url_check_path() removed 1997-10-05 Don't reject multiple identical Content-Type fields 1997-10-19 Remove trailing whitespace from Location header field 1997-10-20 Accept cache_object scheme 1997-11-01 Don't make a full-response for cache_object 1998-01-11 Cope with HTTP header lines terminated with CR CR LF 1998-02-19 Accept Content-Type: text/css 1998-09-03 "log content-type-conflict"; default Content-Type text/html 1999-01-01 Accept unknown Content-Type, replace with application/binary 1999-02-27 Accept "multipart/x-tar"; warn about allowing "multipart|*" 1999-03-17 Support CONNECT (http/SSL) 1999-05-10 "referer" configuration 1999-09-16 allow/block style 2000-03-30 CRLF of status line in separate packet; Content-Type: (null) 2000-04-17 Make NAME_data const; H_DUP 2000-05-01 Use emi_fill() 2000-05-28 Use emo_set_read(), emo_read(), and emo_shutdown() 2000-08-25 Copy with missing blank line in HTTP response */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include "unicode.h" #include "firewall.h" #include "libemfw.h" #include "firewall2.h" #include "emio.h" #include "squid-gw.h" #define SELECT_CTYPE_HTTP #include "tables.h" #include "auth.h" #include "ctype.h" #include "magic.h" #include "ci_milter.h" extern magic_t proxy_magic; extern struct ctype_t* ctypes; /* Statistics */ /* =========================== PRELIMINARY STUFF =========================== */ /* firewall.h lacks some declarations. */ int conn_server (const char *, int, int, char *); /* Maximum total size of header. */ #define MAX_HDR_SIZE (128 * 1024) /* Maximum body size. */ #define MAX_CONTENT_LENGTH (1024 * 1024 * 1024) /* HTTP configuration. */ struct conf_http cf_http; /* Debug the server side of squid-gw by using file descriptors 3 and 4 for input from and output to, respectively, the server if this variable is true. This is enabled by "-debug -server". */ int debug_server; /* Flag bits for method_flags in struct request_state. */ #define METHOD_REQUEST_BODY 0x0001 /* Request has a body */ #define METHOD_RESPONSE_BODY 0x0002 /* Response may have a body */ #define METHOD_CONNECT 0x0004 /* CONNECT, handled specially */ /* QOS */ int sdscp = 0; /* Magic */ extern magic_t magic; /* Auth */ extern int needauth; /* This structure holds data extracted from the request header. */ struct request_state { unsigned method_flags; int has_body; int user_agent_seen; long content_length; const char *method_name; octet ipaddr[4]; /* For CONNECT */ int port; /* For CONNECT */ char http_version[3+1]; /* For CONNECT, e.g. "1.1" */ }; /* This structure holds data extracted from the response header. */ struct response_state { int code; /* -1 while processing request */ long content_length; const char *content_type; const char *content_encoding; }; /* Data extracted from the request and response headers. We keep it in structures to keep this program structured. */ static struct request_state req_st; static struct response_state res_st; /* This is the parsed request URL. */ struct url req_url; const char *req_url_str; /* Charset type (for multibyte encodings) */ int charset_type = CHARSET_GENERIC; /* This buffer is used for various tasks, therefore we have to be cautious about not overwriting it in functions which take pointers which may point into this buffer. Using multiple buffers would enlarge the working set. */ static char buffer[65536]; /* ============================== CHARACTERS =============================== */ /* Character classes as defined by RFC 1945. */ #define HTTP_CHAR_TYPE(c,x) (http_char_type[(unsigned char)c] & (x)) #define LOALPHA(c) HTTP_CHAR_TYPE (c, CTYPE_LOALPHA) #define UPALPHA(c) HTTP_CHAR_TYPE (c, CTYPE_UPALPHA) #define ALPHA(c) HTTP_CHAR_TYPE (c, CTYPE_ALPHA) #define DIGIT(c) HTTP_CHAR_TYPE (c, CTYPE_DIGIT) #define WHITE(c) HTTP_CHAR_TYPE (c, CTYPE_WHITE) /* Also referenced by cookies.c. */ const unsigned char http_char_type[UCHAR_MAX+1] = { #include "http-ctype.h" }; /* ============================ REPORTING ERRORS =========================== */ /* Prototypes. */ static void error (int, const char *, ...) ATTR_PRINTF (2, 3) ATTR_NORETURN; static void read_error (EMI_FILE *) ATTR_NORETURN; static void write_error (EMO_FILE *) ATTR_NORETURN; static void redirect (const char *) ATTR_NORETURN; /* Keep byte count in sync */ void em_update_status() { if (!s_in || !s_out) return; proxy_stats.inbytes = emi_amount(s_in,0); proxy_stats.outbytes = emo_amount(s_out,0); proxy_update_status(); } /* Return error code 400 or N, depending on whether we are processing the request header or the response header. */ static int error_code (int n) { return res_st.code == -1 ? 400 : n; } /* Return "request" or "response", depending on whether we are processing the request header or the response header. */ static const char *error_header (void) { return res_st.code == -1 ? "request" : "response"; } /* Return "client" or "server", depending on whether the file F is used for the client or server, respectively. */ static const char *cs (void *f) { return (f == c_in || f == c_out) ? "client" : "server"; } /* Log an error via syslog() and send an error message to the client. CODE is the HTTP status code to be returned to the client. */ static void error (int code, const char *fmt, ...) { va_list arg_ptr; va_start (arg_ptr, fmt); vsyslog (LLEV, fmt, arg_ptr); va_end (arg_ptr); if (!cf_http.test && emo_set_read (c_out) != 0) syslog (LLEV, "emo_set_read(): %s",strerror(errno)); errno = 0; emo_printf (c_out, "HTTP/1.0 %d squid-gw error\r\n", code); emo_puts (c_out, "Content-type: text/html\r\n\r\n" "squid-gw error" "" "Error detected by squid-gw:
"); va_start (arg_ptr, fmt); emo_vprintf (c_out, fmt, arg_ptr); va_end (arg_ptr); emo_puts (c_out, "\r\n"); if (emo_flush (c_out) != 0) /* This sets errno! */ syslog (LLEV, "error(): %s",strerror(errno)); if (!cf_http.test) { if (emo_shutdown (c_out) != 0) syslog (LLEV, "emo_shutdown(): %s",strerror(errno)); if (emo_read (c_out, 10) != 0) syslog (LLEV, "emo_read(): %s",strerror(errno)); } quit (0); } static void authreq () { if (!cf_http.test && emo_set_read (c_out) != 0) syslog (LLEV, "emo_set_read(): %s",strerror(errno)); errno = 0; emo_puts (c_out, "HTTP/1.1 407 Proxy Authentication Required\r\n"); emo_puts (c_out, "Proxy-Authenticate: Basic realm=\"OpenFWTK\"\r\n"); emo_puts (c_out, "Content-type: text/html\r\n\r\n" "Authentication error" "" "

Authentication Failed!
"); emo_puts (c_out, "\r\n"); if (emo_flush (c_out) != 0) /* This sets errno! */ syslog (LLEV, "error(): %s",strerror(errno)); if (!cf_http.test) { if (emo_shutdown (c_out) != 0) syslog (LLEV, "emo_shutdown(): %s",strerror(errno)); if (emo_read (c_out, 10) != 0) syslog (LLEV, "emo_read(): %s",strerror(errno)); } quit (0); } /* Helper function for error reporting. */ static void read_error (EMI_FILE *f) { error ((f == c_in) ? 400 : 503, "Cannot receive from %s: %s", cs (f), strerror (errno)); } /* Helper function for error reporting. */ static void write_error (EMO_FILE *f) { if (f == c_out) error (404, "Cannot send to server: %s", strerror (errno)); else { syslog (LLEV, "cannot send to %s: %s", cs (f), strerror(errno)); quit (1); } } /* =========================== INPUT AND OUTPUT ============================ */ /* Write LEN characters from the buffer pointed to by P to the file F. */ static void write_buf (EMO_FILE *f, const void *p, size_t len) { if (emo_write (f, p, len) != 0) write_error (f); em_update_status(); } /* Write the null-terminated string pointed to by S to the file F. */ static void write_str (EMO_FILE *f, const char *s) { write_buf (f, s, strlen (s)); } /* Flush the buffer of the output file F. */ static void write_flush (EMO_FILE *f) { if (emo_flush (f) != 0) write_error (f); em_update_status(); } /* Copy up to SIZE bytes from file SRC to file DST. */ void http_copy (EMO_FILE *dst, EMI_FILE *src, long size) { size_t n; while (size > 0) { n = (size < (long)sizeof (buffer)) ? (size_t)size : sizeof (buffer); n = emi_read (src, buffer, n); if (n <= 0) break; write_buf (dst, buffer, n); size -= n; em_update_status(); } if (emi_error (src)) read_error (src); } /* Save up to SIZE bytes from file SRC to a temporary file. */ char* http_save (char *template, EMI_FILE *src, long size) { size_t n; char* qf; int tfd; qf = xstrdup(template); if ((tfd = mkstemp(qf)) == -1) { syslog(LLEV,"fwtksyserr: cannot create temporary file, %s", strerror(errno)); error (503, "Cannot create temporary file"); } while (size > 0) { n = (size < (long)sizeof (buffer)) ? (size_t)size : sizeof (buffer); n = emi_read (src, buffer, n); if (n <= 0) break; if (write (tfd, buffer, n) != n) { syslog(LLEV,"fwtksyserr: cannot write to temporary file, %s", strerror(errno)); error (503, "Cannot write to temporary file"); } size -= n; em_update_status(); } if (emi_error (src)) read_error (src); close(tfd); return(qf); } void http_load (char *qfile, EMO_FILE *dst, long size) { size_t n; int tfd; if ((tfd = open(qfile, O_RDONLY)) == -1) { syslog(LLEV,"fwtksyserr: cannot open temporary file, %s", strerror(errno)); error (503, "Cannot open temporary file"); } while (size > 0) { n = (size < (long)sizeof (buffer)) ? (size_t)size : sizeof (buffer); n = read (tfd, buffer, n); if (n <= 0) break; write_buf (dst, buffer, n); size -= n; em_update_status(); } } /* ==================== READING AND WRITING HEADER LINES =================== */ /* We collect header lines in a linked list and process them after having read all of them. This structure stores one element of such a linked list. */ struct hdr_line { struct hdr_line *next; /* Pointer to next element */ char *line; /* Pointer to malloc()ed buffer */ size_t length; /* Length of line */ }; /* Read one line terminated by CR*LF (i.e., a LF character preceded by any number of CR characters) from F, storing it to the array of SIZE characters pointed to by BUF. Log the line if MSG is not NULL. The resulting string may contain embedded control characters (except for the null character, which is rejected). The terminating CR*LF is removed. This function returns the length of the line. */ static size_t read_line (EMI_FILE *f, char *buf, size_t size, const char *msg, int lineno) { size_t i = 0; int c; while ((c = emi_getc (f)) != EMI_EOF && c != LF) { if (i + 1 >= size) error (error_code (502), "%s header line too long", error_header ()); if (c == 0) error (error_code (502), "%s header line contains invalid character", error_header ()); buf[i++] = (char)c; } em_update_status(); if (c == LF || (c == EMI_EOF && !emi_error (f) /* FIN */ && i == 0 /* empty line */ && res_st.code >= 0 /* reading response header */ && (res_st.code == 301 || res_st.code == 302 || res_st.code == 303))) { /* End of line. There are some pathological cases: The OSU server terminates HTTP header lines with CR CR LF. Ignore any number of CRs. http://www.hotmail.com/ (reporting "Server: Microsoft-IIS/5.0") omits the blank line for "302 Redirected". That's what the second part of the condition above is for. */ if (c == EMI_EOF) syslog (LLEV, "Missing blank line after response header"); while (i != 0 && buf[i-1] == CR) --i; buf[i] = 0; /* For syslog() */ if (msg != NULL && c != EMI_EOF) syslog (LLEV, "%s: %.512s", msg, buf); return i; } if (emi_error (f)) read_error (f); else error (error_code (502), "premature end of %s header (line=%d length=%u bytes=%lu)", error_header (), lineno, (unsigned)i, emi_amount (f, 0)); return 0; /* unreached */ } /* Read multiple lines from F and return them as a linked list of lines. Concatenate lines with their continuation lines. Stop reading at the first empty line. This function returns a pointer to the first element of the linked list. Log all the lines if MSG is not NULL. */ static struct hdr_line *read_lines (EMI_FILE *f, const char *msg) { struct hdr_line *first, *p, **add; size_t line_len, cur_len, total; int lineno = 0; first = NULL; add = &first; total = 0; /* Read the first line. RFC 2068 (HTTP/1.1) says that empty leading lines should be ignored. */ do { line_len = read_line (f, buffer, sizeof (buffer), msg, ++lineno); } while (line_len == 0); if (WHITE (buffer[0])) error (error_code (503), "%s header starts with continuation line", error_header ()); /* Read lines until an empty line is reached. Lines starting with whitespace are continuation lines and are appended to the previous line. */ for (;;) { /* In fact, we always append the next line to the current line. If it's a continuation line, this is exactly what is desired. If it's not a continuation line, flush the previous line from the buffer and start a new line. The buffer already contains LINE_LEN characters at this point. */ DEBUG_ASSERT (line_len < sizeof (buffer)); cur_len = read_line (f, buffer + line_len, sizeof (buffer) - line_len, msg, ++lineno); total += cur_len; if (total + 2 > MAX_HDR_SIZE) /* 2 for CR LF */ error (error_code (503), "%s header too big", error_header ()); if (cur_len != 0 && WHITE (buffer[line_len])) { /* It's a continuation line. Adjust the length of the line and continue reading. Note that we just keep the leading whitespace. */ line_len += cur_len; } else { /* It's not a continuation line. Move the previous line to the linked list. TODO: Use fastheap. */ p = (struct hdr_line *)xmalloc (sizeof (struct hdr_line)); p->line = xstrndup (buffer, line_len); p->next = NULL; p->length = line_len; *add = p; add = &p->next; total += 2; /* CR LF */ /* If the new line is empty we're finished. */ if (cur_len == 0) return first; /* Otherwise, move the line to the beginning of the buffer to make room for appending continuation lines. */ memmove (buffer, buffer + line_len, cur_len); line_len = cur_len; } } /* * Unreached */ return first; } /* Write the lines from the linked list pointed to by P to the file F. */ static void write_lines (EMO_FILE *f, const struct hdr_line *p) { while (p != NULL) { write_buf (f, p->line, p->length); write_buf (f, "\r\n", 2); p = p->next; } } /* Send the response header to the client. After sending the response header, error() won't do the right thing. */ static void write_header (struct hdr_line *hdr) { unsigned long n; const char *pad = "X-Pad: avoid browser bug\r\n"; write_lines (c_out, hdr); /* TODO: check HTTP version of server? */ if (strcmp (req_st.http_version, "1.1") == 0) write_str (c_out, "Connection: close\r\n"); n = emo_amount (c_out, 1); if (n == 256 || n == 257) { /* According to a comment the Apache source code, Netscape Navigator does not correctly parse the header if the terminating CR LF starts at the 256th or 257th byte. According to the code, these numbers are zero-based. */ write_str (c_out, pad); } write_buf (c_out, "\r\n", 2); } /* ====================== MANAGING HTTP HEADER FIELDS ====================== */ /* Return a pointer to the value of the header field in the line pointed to by HL. Store the length to the object pointed to by PLEN if PLEN is not NULL. */ static char *field_value (const struct hdr_line *hl, size_t *plen) { char *s = memchr (hl->line, ':', hl->length); DEBUG_ASSERT (s != NULL); ++s; while (WHITE (*s)) ++s; if (plen != NULL) *plen = (hl->line + hl->length) - s; return s; } /* Parse the value of a header field as integer. Return 0 on success, -1 on failure. */ static int field_number (const struct hdr_line *hl, long *num) { const char *s; char *end; s = field_value (hl, NULL); if (!DIGIT (*s)) return -1; /* No number */ errno = 0; *num = strtol (s, &end, 10); if (errno != 0 || end == s) return -1; /* strtol() failed */ /* Check for junk after the number. Only white space can follow. */ DEBUG_ASSERT (!WHITE (0)); while (WHITE (*end)) ++end; return *end == 0 ? 0 : -1; } /* Remove trailing whitespace from an HTTP header field. */ static void http_trim_value (struct hdr_line *hl) { char *s = hl->line; size_t len = hl->length; while (len != 0 && s[len-1] == ' ') --len; s[len] = 0; hl->length = len; } /* Allocate space for LEN characters in the header line pointed to by HL. Setting the contents and length of the line is up to the caller. */ static void alloc_field (struct hdr_line *hl, size_t len) { DEBUG_ASSERT (hl != NULL); if (len > hl->length) { DEBUG_ASSERT (hl->line != NULL); free (hl->line); hl->length = len; hl->line = xmalloc (len + 1); } } /* Prefix the header line pointed to by HL with PREFIX. */ static void prefix_field (struct hdr_line *hl, const char *prefix) { size_t len; char *p; DEBUG_ASSERT (hl != NULL); DEBUG_ASSERT (prefix != NULL); len = strlen (prefix); if (hl->length >= len && memcmp (hl->line, prefix, len) == 0) return; p = xmalloc (len + hl->length + 1); DEBUG_ASSERT (hl->line != NULL); memcpy (p, prefix, len); memcpy (p + len, hl->line, hl->length); p[len + hl->length] = 0; free (hl->line); hl->length += len; hl->line = p; } /* Log a header field. */ static void log_field (const struct hdr_line *hl, const char *msg) { syslog (LLEV, "%s %s header field: %.512s", msg, error_header (), hl->line); } /* Remove a header field. HEAD points to the pointer pointing to the header field to be removed. Return true if the header field has been removed from the linked list. */ /* TODO: Fix memory leak when implementing persistent connections */ static int remove_field (struct hdr_line **head, const struct rej_policy *rp, const char *msg) { DEBUG_ASSERT (*head != NULL); if (rp->log && msg != NULL) log_field (*head, msg); switch (rp->output) { case REJ_DROP: *head = (*head)->next; return 1; case REJ_COPY: return 0; case REJ_PREFIX: prefix_field (*head, "REMOVED-"); return 0; default: ALWAYS_ASSERT (0); } return 0; } /* Perform an action on a field (keep or drop) and move to the next field. */ #define FA_KEEP 0 #define FA_REMOVE 1 /* Without logging */ #define FA_DROP_SILENT 2 #define FA_DROP_PRIVACY 3 #define FA_DROP_UNKNOWN 4 #define FA_DROP_DANGER 5 #define FA_DROP_INVALID 6 static struct hdr_line **field_action (struct hdr_line **head, int action) { int removed; switch (action) { case FA_KEEP: removed = 0; break; case FA_REMOVE: *head = (*head)->next; removed = 1; break; case FA_DROP_SILENT: removed = remove_field (head, &cf_http.field_silent, "silently dropped"); break; case FA_DROP_PRIVACY: removed = remove_field (head, &cf_http.field_privacy, "privacy disturbing"); break; case FA_DROP_UNKNOWN: removed = remove_field (head, &cf_http.field_unknown, "unknown"); break; case FA_DROP_DANGER: removed = remove_field (head, &cf_http.field_dangerous, "dangerous"); break; case FA_DROP_INVALID: removed = remove_field (head, &cf_http.field_invalid, "invalid"); break; default: ALWAYS_ASSERT (0); } return removed ? head : &(*head)->next; } /* Remove the header field named NAME from a list of header lines. HEAD points to the pointer pointing to the head of the list. The name of the header field must be in lower case. */ static void field_action_by_name (struct hdr_line **head, const char *name, int action) { size_t len; struct hdr_line *hl; len = strlen (name); while (*head != NULL) { hl = *head; if (hl->length > len && hl->line[len] == ':' && lower_cmpn (hl->line, name, len) == 0) head = field_action (head, action); else head = &(*head)->next; /* FA_KEEP */ } } /* Fake an HTTP header. */ static void fake_header_line (struct hdr_line **list, const char *s) { struct hdr_line *p; /* TODO: Use fastheap. */ p = (struct hdr_line *)xmalloc (sizeof (struct hdr_line)); p->length = strlen (s); p->line = xstrndup (s, p->length); p->next = NULL; /* This is slow but that shouldn't matter here. */ while (*list != NULL) list = &(*list)->next; *list = p; } /* Replace the Content-Type field. */ static void sanitize_content_type (struct hdr_line **head) { if (cf_http.log_unknown_content_type) syslog (LLEV, "Unknown Content-Type: %.512s", res_st.content_type); field_action_by_name (head, "content-type", FA_REMOVE); fake_header_line (head, "Content-Type: application/binary"); res_st.content_type = "application/binary"; } /* ================== PARSING AND FILTERING HEADER FIELDS ================== */ /* Check value of "Pragma". Return 0 if acceptable. */ int check_pragma (const octet *s, size_t len) { size_t i; /* Ignore trailing SP characters. */ while (len > 0 && s[len-1] == SP) --len; /* Accept "no-cache". */ if ((len == 8 && lower_cmpn ((char*) s, "no-cache", 8) == 0) || (len == 7 && lower_cmpn ((char*) s, "nocache", 7) == 0)) /* Typo? */ return 0; /* Accept "max-age=". */ if (len > 8 && lower_cmpn ((char*) s, "max-age=", 8) == 0) { i = 8; while (i < len && DIGIT (s[i])) ++i; if (i == len) return 0; } /* Reject everything else. */ return -1; } /* Check value of "Refresh". Return 0 if acceptable. */ int check_refresh (const octet *s, size_t len, int block_javascript) { size_t i; struct url u; i = 0; if (i >= len || !DIGIT (s[i])) return -1; while (i < len && DIGIT (s[i])) ++i; while (i < len && s[i] == SP) ++i; if (i < len && s[i] == ',') { /* "seconds,url" */ ++i; while (i < len && s[i] == SP) ++i; } else if (i < len && s[i] == ';') { /* "seconds; URL=url" */ ++i; while (i < len && s[i] == SP) ++i; /* Between "URL" and "=" is the only place where a SP is not allowed. */ if (i + 3 >= len || lower_cmpn ((char*)s + i, "url=", 4) != 0) return -1; i += 4; while (i < len && s[i] == SP) ++i; if (i + 2 < len && (s[i] == '"' || s[i] == '\'') && s[len-1] == s[i]) { ++i; --len; } } else if (i == len) { /* "seconds" */ return 0; } else return -1; /* Check the URL. */ DEBUG_ASSERT (i <= len); if (url_parse (&u, s + i, len - i, 0) != 0) return -1; if (u.scheme != URL_HTTP && u.scheme != URL_HTTPS) return -1; /* Apply "href" configuration rules. */ if (config_href ((char*)s + i, &u) != 0) return -1; return 0; } /* Header field handler: keep the header field. */ static int hh_keep (struct hdr_line *hl) { return FA_KEEP; } /* Header field handler: drop the header field silently. */ static int hh_drop_silent (struct hdr_line *hl) { return FA_DROP_SILENT; } /* Header field handler: drop a header field which may disturb privacy. */ static int hh_privacy (struct hdr_line *hl) { return FA_DROP_PRIVACY; } /* Header field handler: remove dangerous header field. */ static int hh_dangerous (struct hdr_line *hl) { return FA_DROP_DANGER; } /* Header field handler: "Location". */ static int hh_location (struct hdr_line *hl) { const char *s; struct url u; size_t vlen; /* Some HTTP servers (Netscape-Communications/1.12 ?) put a space character after the URL, which makes url_parse() reject the URL. */ http_trim_value (hl); s = field_value (hl, &vlen); if (url_parse (&u, (unsigned char*)s, vlen, 0) != 0) return FA_DROP_INVALID; switch (u.scheme) { case URL_HTTP: case URL_HTTPS: case URL_FTP: case URL_GOPHER: return FA_KEEP; case URL_JAVASCRIPT: if (cf_http.block_javascript.v) return FA_DROP_DANGER; return FA_KEEP; default: /* TODO: Configuration, URL_CLSID, URL_JAVA */ return FA_DROP_DANGER; } } /* Header field handler: "Pragma". */ static int hh_pragma (struct hdr_line *hl) { size_t len; const octet *s = (octet*) field_value (hl, &len); if (check_pragma (s, len) == 0) return FA_KEEP; else return FA_DROP_INVALID; } /* Header field handler: "Refresh". */ static int hh_refresh (struct hdr_line *hl) { size_t len; const octet *s = (octet*) field_value (hl, &len); if (check_refresh (s, len, cf_http.block_javascript.v) == 0) return FA_KEEP; else return FA_DROP_INVALID; } /* Header field handler: "Transfer-Encoding". */ static int hh_tf_encoding (struct hdr_line *hl) { /* Don't let the bad guys circumvent HTML checking by sending their evil stuff with `Transfer-Encoding: chunked', which I'm too lazy to implement. */ error (501, "Transfer-Encoding is not implemented"); req_st.has_body = 1; /* TODO: request vs. response */ return FA_DROP_DANGER; /* Not reached */ } /* Header field handler: "Content-Length" in request header. */ static int hh_req_ct_len (struct hdr_line *hl) { long n; if (field_number (hl, &n) != 0 || n < 0 || n > MAX_CONTENT_LENGTH) error (400, "Invalid Content-Length"); DEBUG_ASSERT (req_st.content_length == -1); req_st.content_length = n; req_st.has_body = 1; return FA_KEEP; } /* Header field handler: "Content-Length" in response header. */ static int hh_res_ct_len (struct hdr_line *hl) { long n; if (field_number (hl, &n) != 0 || n < 0 || n > MAX_CONTENT_LENGTH) error (503, "Invalid Content-Length"); DEBUG_ASSERT (res_st.content_length == -1); res_st.content_length = n; return FA_KEEP; } /* Header field handler: "User-Agent". */ static int hh_user_agent (struct hdr_line *hl) { const char ua[] = "User-Agent: "; const char *s; size_t len; /* Don't let the server override the configuration if someone added hh_user_agent to http-res.tab by accident. */ ALWAYS_ASSERT (res_st.code == -1); /* This is ensured by handle_header_lines(). */ DEBUG_ASSERT (!req_st.user_agent_seen); /* Configure depending on the user agent. */ req_st.user_agent_seen = 1; s = field_value (hl, NULL); if (cf_http.log_user_agent) syslog (LLEV, "User-Agent: %.512s", s); config_browsers (s); /* Replace the value of "User-Agent". Note that the configuration class depending on the old value is already in effect! There's no point in replacing the value if the request method is CONNECT as the (unencrypted) HTTP header is thrown away. */ if (cf_http.user_agent != NULL && !(req_st.method_flags & METHOD_CONNECT)) { len = sizeof (ua) - 1 + strlen (cf_http.user_agent); alloc_field (hl, len); memcpy (hl->line, ua, sizeof (ua) - 1); strlcpy (hl->line + sizeof (ua) - 1, cf_http.user_agent, len - sizeof(ua) + 2); hl->length = len; } return FA_KEEP; } /* Header field handler: "Content-Range". */ static int hh_ct_range (struct hdr_line *hl) { error (501, "Content-Range not implemented"); } /* Header field handler: "Content-Type" in response header. */ static int hh_res_ct_type (struct hdr_line *hl) { const char *s = field_value (hl, NULL); if (res_st.content_type != NULL) { if (cf_http.log_content_type_conflict && strcmp (res_st.content_type, s) != 0) syslog (LLEV, "Conflicting values for Content-Type: %.128s vs. %.128s", res_st.content_type, s); /* We keep the first Content-Type and drop all others. */ return FA_DROP_SILENT; } res_st.content_type = field_value (hl, NULL); return FA_KEEP; } /* Header field handler: "Content-Encoding" in response header. */ static int hh_res_ct_enc (struct hdr_line *hl) { res_st.content_encoding = field_value (hl, NULL); return FA_KEEP; } /* Header field handler: "Content-Transfer-Encoding" in response header. */ static int hh_cte (struct hdr_line *hl) { const char *s = field_value (hl, NULL); /* We accept all identity encodings (dropping the header field in case a browser ignores bit 7 for "7bit") and reject everything else completely. */ if (lower_cmpz (s, "7bit") == 0 || lower_cmpz (s, "8bit") == 0 || lower_cmpz (s, "binary") == 0) return FA_DROP_SILENT; error (501, "Content-Transfer-Encoding %.128s not implemented", s); } /* Header field handler: "Set-Cookie" in response header. */ static int hh_set_cookie (struct hdr_line *hl) { const char field_name[] = "Set-Cookie: "; char *s; size_t vlen, nlen, flen; int r; s = field_value (hl, &vlen); r = cookies_parse_set_cookie ((octet*) s); if (r < 0) return FA_DROP_INVALID; if (r > 0) return FA_DROP_PRIVACY; if (cookies_rebuild ((octet*) buffer, sizeof (buffer)) != 0) return FA_DROP_INVALID; nlen = cookies_rebuild_length (); if (nlen <= vlen) { memcpy (s, buffer, nlen); hl->length -= vlen - nlen; } else { flen = sizeof (field_name) - 1; alloc_field (hl, flen + nlen); memcpy (hl->line, field_name, flen); memcpy (hl->line + flen, buffer, nlen); hl->line[flen + nlen] = 0; } return FA_KEEP; } /* Header field handler: "Cookie" in request header. */ static int hh_cookie (struct hdr_line *hl) { const char field_name[] = "Cookie: "; char *s; size_t vlen, nlen, flen; int r; s = field_value (hl, &vlen); r = cookies_parse_cookie ((octet*) s); if (r < 0) return FA_DROP_INVALID; if (r > 0) return FA_DROP_PRIVACY; if (cookies_rebuild ((octet*)buffer, sizeof (buffer)) != 0) return FA_DROP_INVALID; nlen = cookies_rebuild_length (); if (nlen <= vlen) { memcpy (s, buffer, nlen); hl->length -= vlen - nlen; } else { flen = sizeof (field_name) - 1; alloc_field (hl, flen + nlen); memcpy (hl->line, field_name, flen); memcpy (hl->line + flen, buffer, nlen); hl->line[flen + nlen] = 0; } return FA_KEEP; } /* Header field handler: "If-Modified-Since" in request header. */ static int hh_if_modified (struct hdr_line *hl) { char *v, *s, *sc; v = field_value (hl, NULL); sc = strchr (v, ';'); if (sc != NULL) { s = sc + 1; while (WHITE (*s)) ++s; if (lower_cmpn (s, "length=", 7) == 0) { /* Remove "; length=length = sc - hl->line; } } return FA_KEEP; } /* Header field handler: Referer (and Referrer). */ static int hh_referer (struct hdr_line *hl) { struct url u; const octet *s; size_t n; switch (cf_http.referer) { case REF_DROP: break; case REF_KEEP_ALL: return FA_KEEP; case REF_KEEP_SAME_SITE: s = (const octet *)field_value (hl, &n); if (url_parse (&u, s, n, 0) == 0 && u.scheme == req_url.scheme && u.port == req_url.port && u.host_length == req_url.host_length && lower_cmpn ((char*) s + u.host_start, req_url_str + req_url.host_start, u.host_length) == 0) return FA_KEEP; break; case REF_KEEP_MATCH: DEBUG_ASSERT (cf_http.referer_mask != NULL); s = (const octet *)field_value (hl, &n); if (url_parse (&u, s, n, 0) == 0 && url_compare ((octet*) cf_http.referer_mask, &cf_http.referer_url, s, &u, UCF_IGNORE_CASE | UCF_WILDCARD) == 0) return FA_KEEP; break; default: ALWAYS_ASSERT (0); } return FA_DROP_PRIVACY; } static int hh_proxyauth (struct hdr_line *hl) { size_t n; char *s; char authdata[1024]; char *pwd; char buf[1024]; bzero(authdata,sizeof(authdata)); if (needauth) { /* * base64 decode */ int k,i,j,flag; unsigned char a[4]; k = i = 0; s = index(field_value (hl, &n),' '); if (!s || (n >= 256)) error(400, "Invalid proxy-authentication"); s++; while (*s) { a[k++]=*(s++); if (k==4) { flag=0; for (j=0;j<4;j++) { if (isupper(a[j])) a[j]=a[j]-65; else if (islower(a[j])) a[j]=a[j]-71; else if (isdigit(a[j])) a[j]=a[j]+4; else if (a[j]=='+') a[j]=62; else if (a[j]=='/') a[j]=63; else if (a[j]=='=') { a[j]=0; flag++; } } k = 0; if (flag==2) /* two '=' */ { authdata[i++]=(a[0]<<2) | (a[1]>>4); break; } else if (flag==1) /* one '=' */ { authdata[i++]=(a[0]<<2) | (a[1]>>4); authdata[i++]=(a[1]<<4) | (a[2]>>2); break; } else { authdata[i++]=(a[0]<<2) | (a[1]>>4); authdata[i++]=(a[1]<<4) | (a[2]>>2); authdata[i++]=(a[2]<<6) | (a[3]); } } } authdata[i] = 0; if (!(pwd = index(authdata,':'))) pwd = xstrdup(""); else *(pwd++) = '\0'; if (auth_open(proxy_confp)) { syslog(LLEV,"fwtksyserr: cannot connect to authentication server"); error(503,"cannot connect to authentication server"); } if (auth_recv(buf,sizeof(buf)) || strncmp(buf,"Authsrv ready",13)) { syslog(LLEV,"fwtksyserr: cannot read from authentication server"); error(503,"cannot read from authentication server"); } snprintf(buf,sizeof(buf),"authorize %.128s 'squid-gw %.128s/%.128s'", authdata,proxy_stats.rladdr,proxy_stats.riaddr); if (auth_send(buf)) { syslog(LLEV,"fwtksyserr: cannot write to authentication server"); error(503,"cannot write to authentication server"); } if (auth_recv(buf,sizeof(buf))) { syslog(LLEV,"fwtksyserr: cannot read from authentication server"); error(503,"cannot read from authentication server"); } if (!strncmp(buf, "ok", 2)) goto authdone; if (strncmp(buf,"password",8)) { error(407,"proxy authentication failed, %.128s",buf); } snprintf(buf,sizeof(buf),"response '.128%s'", pwd); if (auth_send(buf) || auth_recv(buf,sizeof(buf)) || strncmp(buf,"ok",2)) { error(407,"proxy authentication failed, %.128s",buf); } authdone: auth_close(); needauth = 0; strncpy(proxy_stats.authuser,authdata,sizeof(proxy_stats.authuser)); return FA_DROP_SILENT; } /* * Fall through if we do not authenticate - maybe next proxy does */ return FA_KEEP; } /* Table of request header fields: `request_hash'. This include file is generated from "http-req.tab" by maketable. We use the `data' member for keeping a pointer to the line in which the header field occurred. If the function pointed to by HANDLER returns a non-zero value, the header field will be deleted. */ #include "http-req.h" /* Table of response header fields: `response_hash'. This include file is generated from "http-res.tab" by maketable. We use the `data' member for keeping a pointer to the line in which the header occurred. If the function pointed to by HANDLER returns a non-zero value, the header field will be deleted. */ #include "http-res.h" /* Table of CONNECT header fields: `connect_hash'. This include file is generated from "http-con.tab" by maketable. We use the `data' member for keeping a pointer to the line in which the header field occurred. */ #include "http-con.h" /* Mutable data for the above tables. */ #define HF_SEEN 0x01 /* Field already seen. */ static unsigned char request_flags[REQUEST_HASH_SIZE]; static unsigned char response_flags[RESPONSE_HASH_SIZE]; static unsigned char connect_flags[CONNECT_HASH_SIZE]; static const char *response_values[RESPONSE_HASH_SIZE]; /* Handle the header fields in the linked list whose head pointer is pointed to by HEAD according to the table pointed to by TABLE. */ static void handle_header_lines (struct hdr_line **head, const struct hash_descr *descr, unsigned char *flags) { const char *s; int nlen, action; int (*handler) (struct hdr_line *); const struct hash_entry *he; struct hdr_line *hl; unsigned multi = H_MULTI; if (cf_http.test) multi |= H_MUTST; /* testhttp cannot add H_MULTI (const!) */ while (*head != NULL) { hl = *head; s = hl->line; DEBUG_ASSERT (hl->length != 0); DEBUG_ASSERT (!WHITE (s[0])); /* Find the colon. The colon must be in the first line, not a continuation line. As continuation lines always introduce white space, we just need to stop at the first white space character. */ nlen = 0; while (nlen < hl->length && s[nlen] != ':' && !WHITE (s[nlen])) ++nlen; /* Remove the header line if there's no colon. Some HTTP servers put the response line twice into the response header; we'll complain about the second one here as it doesn't have a colon. */ if (s[nlen] != ':') { if (cf_http.field_invalid.log) syslog (LLEV, "missing colon in %s header line: %.512s", error_header (), s); action = FA_REMOVE; } else { /* RFC 1945 (HTTP/1.0) requires a single space after the colon. RFC 2068 (HTTP/1.1) allows any amount of linear white space. Be tolerant and insert space if missing. Note that the line is null-terminated, so we don't have to check NLEN. */ if (!WHITE (s[nlen+1])) { /* There's no white space after the colon. Insert a space. */ char *p = xmalloc (hl->length + 2); memcpy (p, s, nlen + 1); p[nlen+1] = ' '; memcpy (p + nlen + 2, s + nlen + 1, hl->length + 1 - (nlen + 1)); free (hl->line); hl->line = p; hl->length += 1; s = p; DEBUG_ASSERT (s[hl->length] == 0); } /* Note that read_lines() uses `buffer' for reading the line, so no (concatenated) line can be bigger than `buffer'. Insertion of a space above doesn't change NLEN. */ DEBUG_ASSERT (nlen <= sizeof (buffer)); lower_copy (buffer, s, nlen); he = find_hash_entry2 (descr, buffer, nlen); if (he == NULL) { if (cf_http.field_unknown.output == REJ_COPY) action = FA_KEEP; else action = FA_DROP_UNKNOWN; } else { int idx = HASH_IDX2 (*descr, he); handler = he->handler; ALWAYS_ASSERT (CONNECT_HANDLER (handler) || REQUEST_HANDLER (handler) || RESPONSE_HANDLER (handler)); action = handler (hl); /* Complain about duplicate header fields. TODO: Remove duplicates. */ if (!(flags[idx] & HF_SEEN)) { flags[idx] |= HF_SEEN; DEBUG_ASSERT (response_values[idx] == NULL); if (he->flags & H_DUP) response_values[idx] = xstrdup (s); } else if (response_values[idx] != NULL) { DEBUG_ASSERT (he->flags & H_DUP); if (strcmp (response_values[idx], s)) { syslog(LLEV,"mismatch in duplicate %s header field: %.256s",error_header (), response_values[idx]); action = FA_DROP_INVALID; } else action = FA_DROP_SILENT; /* Remove duplicate */ } else if (!(he->flags & (H_MERGE | multi))) error (error_code (502), "Duplicate %s header field: %.512s", error_header (), s); } } /* Perform the requested action. Move to the next header field unless the current one is removed, in which case *HEAD already points to the next one. */ head = field_action (head, action); } } /* ============================= HTTP REQUEST ============================== */ /* The request header. */ static struct hdr_line *req_hdr; /* Parse the HTTP-Version portion of a request header or a response-header. Return the length. Return -1 on error. */ static int handle_http_version (const char *s) { const char *s0 = s; char *end; int major, minor; if (strncmp (s, "HTTP/", 5) != 0 || !DIGIT (s[5])) return -1; s += 5; major = strtol (s, &end, 10); if (end == s || *end != '.' || !DIGIT (end[1])) return -1; s = end + 1; minor = strtol (s, &end, 10); if (end == s || !(*end == 0 || WHITE (*end))) return -1; if (major != 1) return -1; /* We support HTTP/1.0 and HTTP/1.1 */ if (minor < 0 || minor > 1) return -1; req_st.http_version[0] = (char)(major + '0'); req_st.http_version[1] = '.'; req_st.http_version[2] = (char)(minor + '0'); req_st.http_version[3] = 0; return end - s0; } /* This table defines the supported request methods. */ struct method { const char *name; int len; unsigned flags; }; static struct method methods[] = { { "CONNECT", 7, METHOD_CONNECT }, { "GET", 3, METHOD_RESPONSE_BODY}, { "HEAD", 4, 0}, { "POST", 4, METHOD_REQUEST_BODY | METHOD_RESPONSE_BODY}, /* Subversion/WebDAV */ { "PROPFIND",8, METHOD_REQUEST_BODY | METHOD_RESPONSE_BODY}, { "PROPPATCH",9,METHOD_REQUEST_BODY | METHOD_RESPONSE_BODY}, { "MKCOL", 5, 0}, { "COPY", 4, METHOD_REQUEST_BODY | METHOD_RESPONSE_BODY}, { "MOVE", 4, METHOD_REQUEST_BODY | METHOD_RESPONSE_BODY}, { "LOCK", 4, METHOD_REQUEST_BODY | METHOD_RESPONSE_BODY}, { "MERGE", 5, METHOD_REQUEST_BODY | METHOD_RESPONSE_BODY}, { "UNLOCK", 6, 0}, { "REPORT", 6, METHOD_REQUEST_BODY | METHOD_RESPONSE_BODY}, { "CHECKOUT",8, METHOD_REQUEST_BODY | METHOD_RESPONSE_BODY}, { "CHECKIN", 7, METHOD_REQUEST_BODY | METHOD_RESPONSE_BODY}, { "MKACTIVITY",10, 0}, /* Unsupported methods: LINK, OPTIONS, PATCH, PUT, TRACE, UNLINK */ { "DELETE", 6, METHOD_RESPONSE_BODY}, }; #define METHODS (sizeof (methods) / sizeof (methods[0])) /* Set req_st.method_name and req_st.method_flags from the method. Return 0 on success, -1 on failure. */ int http_method (const char *s, int len) { int m; /* Request methods are case-sensitive. */ for (m = 0; m < METHODS; ++m) if (methods[m].len == len && memcmp (s, methods[m].name, len) == 0) { req_st.method_name = methods[m].name; req_st.method_flags = methods[m].flags; return 0; } return -1; } /* Implement the -redir option of "deny-destinations". */ static void redirect (const char *url) { if (cf_http.log_redirected) syslog (LLEV, "redirected to %.512s", url); emo_puts (c_out, "HTTP/1.0 302 Moved\r\n"); emo_printf (c_out, "Location: %s\r\n", url); emo_puts (c_out, "Content-type: text/html\r\n\r\n" "Moved"); emo_printf (c_out, "Click here.", url); emo_puts (c_out, ""); if (emo_flush (c_out) != 0) /* This sets errno! */ syslog (LLEV, "redirect(): %s", strerror(errno)); quit (0); } /* Parse the request line pointed to by REQ. This function assumes that a null character terminates the line (a null character is invalid in a request line anyway). REQ must not point into `buffer'. */ static void handle_request_line (const char *req) { int j, url_start, url_end, version_start, version_len; const char *msg, *redir; /* TODO: Check for control characters */ /* Process the method. */ j = 0; while (req[j] != 0 && !WHITE (req[j])) ++j; if (http_method (req, j) != 0) error (400, "Bad request: unknown method"); /* Find the URL (or the hostname for CONNECT). */ while (WHITE (req[j])) /* Be tolerant */ ++j; url_start = j; while (req[j] != 0 && !WHITE (req[j])) ++j; url_end = j; /* Find and check the HTTP version. */ j = strlen (req); while (j > 0 && !WHITE (req[j-1])) --j; version_start = j; version_len = handle_http_version (req + j); if (version_len < 0 || req[version_start + version_len] != 0) error (400, "Bad request: invalid HTTP version"); /* Don't allow SP in the request URI. */ j = url_end; while (WHITE (req[j])) ++j; if (j != version_start) error (400, "Bad request: bad URL"); req_url_str = req + url_start; if (req_st.method_flags & METHOD_CONNECT) { static char hostname[512]; int port; /* Parse the hostname and the port number. */ if (parse_connect (hostname, sizeof (hostname), &port, req_url_str) != 0) error (400, "Bad request: bad hostname"); /* Permissions depending on destination. */ if (config_connect (hostname, port, req_st.ipaddr, &msg) != 0) error (403, msg != NULL ? msg : "Forbidden"); req_st.port = port; } else { /* Parse the URI as URL and check the scheme. We accept proxy requests only. TODO: Currently, we support http and ftp only. */ if (url_parse (&req_url,(octet*) req_url_str, url_end - url_start, 0) != 0 || (req_url.scheme == URL_HTTP && req_url.host_length == 0)) /* Relative URL! */ error (400, "Bad request: bad URL"); if (req_url.scheme != URL_HTTP && req_url.scheme != URL_HTTPS && req_url.scheme != URL_FTP && req_url.scheme != URL_CACHE_OBJECT) error (400, "Bad request: unsupported scheme"); /* Permissions depending on destination. */ if (config_destinations (req_url_str, &req_url, req_st.method_name, &msg, &redir) != 0) { if (redir != NULL) redirect (redir); else error (403, msg != NULL ? msg : "Forbidden"); } /* TODO: Convert scheme to lower case */ /* TODO: Rebuild URL from its parts? */ } } /* Read the client's request from c_in. */ void http_read_request (void) { res_st.code = -1; /* Record that we're handling the request */ req_st.user_agent_seen = 0; req_st.has_body = 0; req_st.content_length = -1; req_hdr = read_lines (c_in, cf_http.log_request_header ? "request header" : NULL); } /* Send the request to s_out. (This interface is provided for regression testing.) Read the body, if necessary, from c_in. */ void http_send_request1 (void) { write_lines (s_out, req_hdr); if (strcmp (req_st.http_version, "1.1") == 0) write_str (s_out, "Connection: close\r\n"); write_buf (s_out, "\r\n", 2); if (req_st.has_body && (req_st.method_flags & METHOD_REQUEST_BODY)) { if (req_st.content_length == -1) error (400, "Content-Length required"); /* TODO: Filter file uploads */ http_copy (s_out, c_in, req_st.content_length); } write_flush (s_out); } /* Send the request to the server. */ void http_send_request (void) { int fd; if (debug_server) { s_in = emi_fdopen (3, 4096); s_out = emo_fdopen (4, 4096); } else { if (cf_http.server == NULL) error (500, "fwtkcfgerr: server required"); /* Well... */ if ((fd = conn_server (cf_http.server, cf_http.port, 0, (char *)0)) < 0) error (404, "Cannot connect to server: %s", strerror (errno)); s_in = emi_fdopen (fd, S_IN_BUFSIZE); s_out = emo_fdopen (fd, 4096); if (sdscp) proxy_set_dscp(fd,sdscp); } if (s_in == NULL || s_out == NULL) error (500, "fdopen() failed: %s", strerror (errno)); emi_timeout (s_in, cf_http.server_timeout); emo_timeout (s_out, cf_http.server_timeout); http_send_request1 (); } /* Process the client's request. Return 0 for CONNECT, 1 otherwise. */ int http_process_request (void) { if (req_hdr == NULL) error (400, "Bad request: empty"); if (cf_http.log_request) syslog (LLEV, "request: %.512s", req_hdr->line); proxy_update_operation(req_hdr->line); handle_request_line (req_hdr->line); if (req_st.method_flags & METHOD_CONNECT) { handle_header_lines (&req_hdr->next, &connect_descr, connect_flags); if (!req_st.user_agent_seen) config_browsers ("no-connect-user-agent"); if (needauth) authreq(); return 0; } else { handle_header_lines (&req_hdr->next, &request_descr, request_flags); fake_header_line(&req_hdr,"Accept-Encoding: identity"); if (!req_st.user_agent_seen) config_browsers ("no-user-agent"); if (needauth) authreq(); return 1; } } /* ===================== RESPONSE BODY (CONTENT-TYPE) ====================== */ /* Modify response header for "text/html". As we change the length of the body by rewriting HTML, we have to drop the Content-Length header field. As we change the contents of the body by rewriting HTML, we have to drop the Content-MD5 header field. */ static void header_html (struct hdr_line **head) { field_action_by_name (head, "content-length", FA_REMOVE); /* TODO: Recompute Content-MD5. */ field_action_by_name (head, "content-md5", FA_REMOVE); /* TODO: If-Modified-Since: ; length= */ } /* Content-type handler: just copy the body unchecked and unchanged. */ static void ch_copy (struct hdr_line **head, int body) { write_header (*head); if (body) { if (res_st.content_length != -1) http_copy (c_out, s_in, res_st.content_length); else http_copy (c_out, s_in, MAX_CONTENT_LENGTH); } } static void ch_inspect_copy (struct hdr_line **head, int body, char **filters) { char *qf; char **flt; char **rcpt = xmalloc(sizeof(char*) * 2); rcpt[0] = xstrdup("unauth@gateway"); rcpt[1] = NULL; if (body) { if (res_st.content_length != -1) qf = http_save ("/tmp/sqgw.XXXXXXXXX", s_in , res_st.content_length); else qf = http_save ("/tmp/sqgw.XXXXXXXXX", s_in , MAX_CONTENT_LENGTH); for (flt = filters; *flt; flt++) { int rc = milter_inspect_binary("squid-gw", proxy_stats.rladdr, proxy_stats.riaddr, *flt, qf, "unauth@gateway", &rcpt, "attachment.dat", "application/octet-stream"); if (rc & MS_ER) { syslog(LLEV,"fwtksyserr: milter %s failed on %s", *flt, qf); error(403,"Content inpsection failed"); } if (rc & MS_QU) { syslog(LLEV,"milter %s requested quarantine of %s", *flt, qf); error(403,"Content quarantined by filter request"); } if (rc & MS_RJ) { unlink(qf); error(403,"Content rejected"); } } write_header (*head); if (res_st.content_length != -1) http_load(qf, c_out, res_st.content_length); else http_load(qf, c_out, MAX_CONTENT_LENGTH); unlink(qf); } else write_header (*head); } /* Content-type handler: reject the body. */ static void ch_reject (struct hdr_line **head, int body) { error (501, "Content-Type unsupported: %.512s", res_st.content_type); } static void ch_html (struct hdr_line **head, int body) { if (res_st.content_encoding != NULL) { /* Squid's ftpget adds "Content-Encoding: 8bit" for "*.htm" and "*.html". Any other content encoding is considered dangerous as we are not prepared to undo the content encoding and therefore cannot look at the HTML code. */ if ((strcmp (res_st.content_encoding, "8bit") != 0) && (strcmp (res_st.content_encoding, "identity") != 0)) error (501, "Content-Encoding %.128s not supported for text/html", res_st.content_encoding); } /* Remove Content-Length and Content-MD5 even for HEAD. */ header_html (head); write_header (*head); if (body) { if (res_st.content_length != -1) html_copy (c_out, s_in, res_st.content_length); else html_copy (c_out, s_in, MAX_CONTENT_LENGTH); } } static void ch_xml (struct hdr_line **head, int body) { if (res_st.content_encoding != NULL) { if (strcmp (res_st.content_encoding, "8bit") != 0) error (501, "Content-Encoding %.128s not supported for text/xml", res_st.content_encoding); } /* Header modification is similar to HTML */ header_html (head); /* XML parser/validator to be placed here! */ ch_copy (head, body); } /* Tags for MSIE brain damage (see below). */ static struct { const char *tag; int len; } auto_html_tags[] = { {"html", 4}, {"head", 4}, {"body", 4} }; #define AUTO_HTML_TAGS (sizeof (auto_html_tags) / sizeof (auto_html_tags[0])) /* Content-type handler: attempt to automatically detect whether the body contains HTML or not. This is a very stupid thing, but we have to do this because Microsoft Internet Explorer does it. If we didn't, bad guys could circumvent all HTML filtering for MSIE just by providing any Content-Type but text/html. */ static void ch_auto (struct hdr_line **head, int body) { int i, j, n; char **milters = NULL; char *magic_ctype = NULL; /* MSIE brain damage. */ if (body) { /* Look ahead. */ DEBUG_ASSERT (cf_http.auto_html_limit <= sizeof (buffer)); n = cf_http.auto_html_limit; if (res_st.content_length != -1 && res_st.content_length < n) n = (int)res_st.content_length; n = emi_fill (s_in, n); if (n == -1) read_error (s_in); n = emi_peek (s_in, buffer, n); if (n == -1) read_error (s_in); /* Look for magic. If there is no magic in config, don't even bother */ if (ctypes) magic_ctype = xstrdup(magic_buffer(proxy_magic, buffer, n)); else magic_ctype = "unknown"; /* Look for tags. */ j = INT_MAX; i = 0; while (i < n) { while (i < n && buffer[i] != '<') ++i; if (i < n) { /* TODO: speed up */ for (j = 0; j < AUTO_HTML_TAGS; ++j) if (i + 1 + auto_html_tags[j].len <= n && lower_cmpn (buffer + i + 1, auto_html_tags[j].tag, auto_html_tags[j].len) == 0) break; if (j < AUTO_HTML_TAGS) break; /* Match! */ ++i; } } if (i < n) { /* Match! Be as stupid as MSIE. */ DEBUG_ASSERT (j < AUTO_HTML_TAGS); syslog (LLEV, "Treating as HTML: <%s at %d", auto_html_tags[j].tag, i); ch_html (head, body); return; } } if (ctype_policy(magic_ctype, &milters) || ctype_policy(res_st.content_type, &milters)) { syslog(LLEV,"deny host=%s/%s content-type=%s", proxy_stats.rladdr,proxy_stats.riaddr, magic_ctype); error(403,"%s is not permitted", magic_ctype); } else if (milters) { syslog(LLEV, "type is %s", magic_ctype); ch_inspect_copy (head, body, milters); } else ch_copy (head, body); } /* Don't allow javascript even indirectly */ static void ch_jscript (struct hdr_line **head, int body) { if (cf_http.block_javascript.v) error(403,"Javascript is not permitted"); else ch_auto (head, body); } /* Content-type handler: replace the Content-Type with "application/binary" and call ch_auto(). */ static void ch_other (struct hdr_line **head, int body) { sanitize_content_type (head); ch_auto (head, body); } /* Table of acceptable response content types, along with functions for handling them. NAME is the name of the content type, without parameters; the subtype "*" matches any subtype. HANDLER points to a function which, if there's a body, reads the body from the server, checks the body, and passes it to the client. */ struct ct_type_entry { const char *name; void (*handler)(struct hdr_line **, int); }; /* NOTE: Use lower case for the strings! */ /* NOTE: Accepting "text|*" (*) is a bad idea as this will let, say, "text/html " through (note the blank space!) and the browser may take that for "text/html"! NOTE: Do not alter the table to allow "multipart/mixed", "multipart/x-mixed-replace", "multipart|*" (*), etc., as that would prevent squid-gw from checking the contents! Unfortunately, application/octet-stream is used for Java, so we cannot filter Java applets by Content-Type. (*) "/" replaced by "|" to avoid having something that looks like a C comment inside a C comment. */ static struct ct_type_entry response_content_types[] = { {"text/css", ch_auto}, /* HTML 4.0 */ {"text/html", ch_html}, /* RFC 1866 */ {"text/javascript", ch_jscript}, {"text/plain", ch_auto}, /* RFC 1521 */ {"text/richtext", ch_auto}, {"text/sgml", ch_auto}, {"text/x-setext", ch_auto}, {"text/x-sgml", ch_auto}, {"text/tab-separated-values", ch_auto}, {"text/vnd.wap.wml", ch_auto}, {"text/xml", ch_xml}, {"multipart/x-zip", ch_auto}, {"multipart/x-tar", ch_auto}, {"multipart/*", ch_reject}, /* DANGER, see above! */ {"message/*", ch_reject}, /* TODO */ {"application/*", ch_auto}, /* TODO */ {"image/*", ch_auto}, {"audio/*", ch_auto}, {"video/*", ch_auto}, {"zip", ch_auto}, /* Sonyericsson.com are fucking morons */ {"(null)", ch_other}, /* Broken HTTP servers */ {"*/*", ch_other} /* Everything else maps to ch_reject */ }; #define RESPONSE_CONTENT_TYPES \ (sizeof (response_content_types) / sizeof (response_content_types[0])) /* Check if MASK matches the content-type CHECK. This function does not ignore case, so both strings should have been translated to lower case by the caller. */ static int content_type_match (const char *mask, const char *check) { const char *s; int mask_len, check_len; s = strchr (mask, '/'); if (s == NULL) return strcmp (mask, check) == 0; mask_len = s - mask; s = strchr (check, '/'); if (s == NULL) return 0; check_len = s - check; if (!(mask[0] == '*' && mask_len == 1)) if (mask_len != check_len || memcmp (mask, check, mask_len) != 0) return 0; mask += mask_len + 1; check += check_len + 1; if (mask[0] == '*' && mask[1] == 0) return 1; /* TODO: Check syntax of subtype */ mask_len = strlen (mask); check_len = 0; while (check[check_len] != 0 && !WHITE (check[check_len]) && check[check_len] != ';') ++check_len; if (mask_len != check_len) return 0; return memcmp (mask, check, mask_len) == 0; } int multibyte_charset (const char *content_type) { const char *s; s = content_type; while ((s = strchr (s, ';'))) { s++; while (*s && WHITE(*s)) s++; if (!*s) return(CHARSET_GENERIC); if (lower_cmpn(s,"charset=",8)) continue; s += 8; if (!lower_cmpn(s,"utf-8",5)) { return(CHARSET_UTF8); } else return(CHARSET_GENERIC); } return(0); } /* Handle the body of a response, according to the content type. HEAD points to the pointer to the head of the list of response headers. Note that the response header has not yet been sent to the client. BODY is true if there is a body. */ static void handle_response_body (struct hdr_line **head, int body) { int i; size_t len; void (*handler)(struct hdr_line **, int); /* If we haven't seen a Content-Type field, we'll add "Content-Type: text/html" which is safe because that's the only Content-Type handled specially (filtered). */ if (res_st.content_type == NULL) { if (body && cf_http.log_missing_content_type) syslog (LLEV, "missing Content-Type"); fake_header_line (head, "Content-Type: text/html"); res_st.content_type = "text/html"; } /* Log the Content-Type if desired. */ if (cf_http.log_content_type) syslog (LLEV, "Content-Type: %.512s", res_st.content_type); /* Well, due to the way read_line() handles continuation lines, `res_st.content_type' cannot overflow `buffer'. However, someone might change the behavior of read_line()... */ DEBUG_ASSERT (res_st.content_type != NULL); len = strlen (res_st.content_type); if (len > sizeof (buffer) - 1) len = sizeof (buffer) - 1; lower_copy (buffer, res_st.content_type, len); buffer[len] = 0; charset_type = multibyte_charset (buffer); /* TODO: Hash, configuration */ if (len > 64) handler = ch_reject; /* Play safe */ else { for (i = 0; i < RESPONSE_CONTENT_TYPES; ++i) if (content_type_match (response_content_types[i].name, buffer)) break; if (i < RESPONSE_CONTENT_TYPES) handler = response_content_types[i].handler; else handler = ch_reject; } if (cf_http.filter_html.v == 0) { if (handler == ch_other) sanitize_content_type (head); handler = ch_copy; } /* Paranoia! */ ALWAYS_ASSERT (handler == ch_auto || handler == ch_reject || handler == ch_html || handler == ch_copy || handler == ch_xml || handler == ch_other || handler == ch_jscript); handler (head, body); } /* ============================= HTTP RESPONSE ============================= */ /* Parse the status line. Return true if there's a full-response. This function will be called twice. */ static int handle_status_line (const char *line) { int i, j; j = handle_http_version (line); if (j < 0 || !WHITE (line[j])) return 0; while (WHITE (line[j])) ++j; res_st.code = 0; for (i = 0; i < 3; ++i) { if (!DIGIT (line[j+i])) return 0; res_st.code = res_st.code * 10 + line[j+i] - '0'; } /* Some web servers omit the (mandatory) space after the status code. */ if (!WHITE (line[j+3]) && line[j+3] != 0) return 0; return 1; } /* Handle the response from the server. */ void http_response (void) { const char *msg = cf_http.log_response_header ? "response header" : NULL; struct hdr_line *list; int i, j, n; /* We're now processing the response header. See error_code() and error_header(). */ res_st.code = 0; res_st.content_length = -1; res_st.content_type = NULL; res_st.content_encoding = NULL; /* Do some look ahead for distinguishing a simple-response (HTTP/0.9) from a a full-response. */ DEBUG_ASSERT (sizeof (buffer) > 128); /* One extra char for NUL */ n = emi_fill (s_in, 128); if (n == -1) read_error (s_in); n = emi_peek (s_in, buffer, 128); if (n == -1) read_error (s_in); if (n == 0) error (502, "Empty response from server"); /* Skip leading empty lines, as suggested by RFC 2068. Actually, we ignore all leading CR and LF characters (well, up to 128 of them, see above). */ i = 0; while (i < n && (buffer[i] == LF || buffer[i] == CR)) ++i; /* Find the end of the first line, using the same definition of "end of line" as read_line() does (CR*LF). If no LF is found, J will be set to N. */ j = i; while (j < n && buffer[j] != LF) ++j; if (j < n) { /* Found a LF character. Ignore any preceding CR characters. */ while (j > i && buffer[j-1] == CR) --j; } buffer[j] = 0; /* Note: This works for j==n, see above. */ /* Check for a status line (even for j==n, as there are HTTP servers which send the CRLF pair in a separate packet). */ if (handle_status_line (buffer + i)) { /* full-response. Read the complete header, that is, all lines up to an empty line. The empty line is not put into the linked list. */ list = read_lines (s_in, msg); } else if (req_url.scheme == URL_CACHE_OBJECT && buffer[i] == '{') { /* The response to a cache_object request is a simple-response. Just copy the body and return. */ http_copy (c_out, s_in, MAX_CONTENT_LENGTH); write_flush (c_out); return; } else { /* simple-response or just some weird stuff. Pretend we received a full-response. */ if (cf_http.log_simple_response) syslog (LLEV, "simple-response: %.512s", buffer + i); list = NULL; fake_header_line (&list, "HTTP/1.0 200 OK"); fake_header_line (&list, "Content-Type: text/html"); } /* Call handle_status_line() again. */ i = handle_status_line (list->line); DEBUG_ASSERT (i); handle_header_lines (&list->next, &response_descr, response_flags); /* Copy the message body if there is one. See RFC 2068 4.3. */ if ((req_st.method_flags & METHOD_RESPONSE_BODY) && !(res_st.code >= 100 && res_st.code <= 199) && !(res_st.code == 204) && !(res_st.code == 304)) handle_response_body (&list, 1); else handle_response_body (&list, 0); write_flush (c_out); } /* Just call http_connect2(), passing some private stuff of this module. */ void http_connect (void) { http_connect2 (req_st.ipaddr, req_st.port, req_st.http_version, cf_http.server_timeout); } /* ============================= CONFIGURATION ============================= */ void http_default_config (void) { cf_http.server = NULL; cf_http.client_timeout = PROXY_TIMEOUT; cf_http.server_timeout = PROXY_TIMEOUT; cf_http.log_content_type = 0; cf_http.log_content_type_conflict = 0; cf_http.log_missing_content_type = 0; cf_http.log_redirected = 0; cf_http.log_request = 0; cf_http.log_request_header = 0; cf_http.log_response_header = 0; cf_http.log_simple_response = 0; cf_http.log_unknown_content_type = 0; cf_http.log_user_agent = 0; cf_http.log_stats = 0; cf_http.parse_cookies = PC_NETSCAPE; cf_http.referer = REF_DROP; cf_http.referer_mask = NULL; cf_http.auto_html_limit = 256; cf_http.filter_html.v = 1; cf_http.filter_html.force = 0; cf_http.block_cookies.v = 1; cf_http.block_cookies.force = 0; cf_http.block_javascript.v = 1; cf_http.block_javascript.force = 0; cf_http.block_style.v = 1; cf_http.block_style.force = 0; cf_http.user_agent = NULL; cf_http.field_dangerous.output = REJ_PREFIX; cf_http.field_dangerous.log = 1; cf_http.field_unknown.output = REJ_PREFIX; cf_http.field_unknown.log = 1; cf_http.field_silent.output = REJ_DROP; cf_http.field_silent.log = 0; cf_http.field_privacy.output = REJ_DROP; cf_http.field_privacy.log = 0; cf_http.field_invalid.output = REJ_DROP; cf_http.field_invalid.log = 1; cf_http.test = 0; } /* Parse the argument of the "cookies" attribute. Return 0 on success, -1 on error. */ int parse_cookies_mode (const char *s) { if (strcmp (s, "nocheck") == 0) cf_http.parse_cookies = PC_NOCHECK; else if (strcmp (s, "netscape") == 0) cf_http.parse_cookies = PC_NETSCAPE; else if (strcmp (s, "netscape-quote") == 0) cf_http.parse_cookies = PC_NETSCAPE_QUOTE; else if (strcmp (s, "rfc2109") == 0) cf_http.parse_cookies = PC_RFC2109; else return -1; return 0; } /* Parse the argument of the "referer" attribute. Return 0 on success, -1 on error. */ int parse_referer_mode (const char *s) { if (strcmp (s, "drop") == 0) cf_http.referer = REF_DROP; else if (strcmp (s, "keep-all") == 0) cf_http.referer = REF_KEEP_ALL; else if (strcmp (s, "keep-same-site") == 0) cf_http.referer = REF_KEEP_SAME_SITE; else if (strcmp (s, "keep-match") == 0) cf_http.referer = REF_KEEP_MATCH; else return -1; return 0; }