/*
* zsync - client side rsync over http
* Copyright (C) 2004,2005 Colin Phipps <cph@moria.org.uk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the Artistic License v2 (see the accompanying
* file COPYING for the full license terms), or, at your option, any later
* version of the same license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* COPYING file for details.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include "config.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <netdb.h>
#include <time.h>
#include "http.h"
#include "url.h"
#include "progress.h"
int connect_to(const char* node, const char* service)
{
struct addrinfo hint;
struct addrinfo *ai;
int rc;
memset(&hint,0,sizeof hint);
hint.ai_family = AF_UNSPEC;
hint.ai_socktype = SOCK_STREAM;
if ((rc = getaddrinfo(node, service, &hint, &ai)) != 0) {
perror(node);
return -1;
} else {
struct addrinfo *p;
int sd = -1;
for (p = ai; sd == -1 && p != NULL; p = p->ai_next) {
if ((sd = socket(p->ai_family, p->ai_socktype, p->ai_protocol)) == -1) {
perror("socket");
} else
if (connect(sd, p->ai_addr, p->ai_addrlen) < 0) {
perror(node); close(sd); sd = -1;
}
}
freeaddrinfo(ai);
return sd;
}
}
FILE* http_get_stream(int fd, int* code)
{
FILE* f = fdopen(fd, "r");
char buf[256];
char *p;
if (fgets(buf,sizeof(buf),f) == NULL || memcmp(buf, "HTTP/1", 6) != 0 || (p = strchr(buf, ' ')) == NULL) {
*code = 0; fclose(f); return NULL;
}
*code = atoi(++p);
return f;
}
char* get_location_url(FILE* f, const char* cur_url) {
char buf[1024];
while (fgets(buf,sizeof(buf),f)) {
char *p;
if (buf[0] == '\r' || buf[0] == '\n') return NULL;
p = strchr(buf,':');
if (!p) return NULL;
*p++ = 0;
if (strcasecmp(buf,"Location")) continue;
while (*p == ' ') p++;
{ /* Remove trailing whitespace */
char *q = p;
while (*q != '\r' && *q != '\n' && *q != ' ' && *q) q++;
*q = 0;
}
if (!*p) return NULL;
return make_url_absolute(cur_url,p);
}
return NULL; // TODO
}
char *proxy;
char *pport;
char *referer;
int set_proxy_from_string(const char* s)
{
if (!memcmp(s,"http://",7)) {
proxy = malloc(256);
if (!proxy) return 0;
if (!get_host_port(s,proxy,256,&pport))
return 0;
if (!pport) { pport = strdup("webcache"); }
return 1;
} else {
char *p;
proxy = strdup(s);
p = strchr(proxy,':');
if (!p) { pport = strdup("webcache"); return 1; }
*p++ = 0;
pport = strdup(p);
return 1;
}
}
static char* http_date_string(time_t t, char* const buf, const int blen)
{
struct tm d;
if (gmtime_r(&t,&d) != NULL) {
if (strftime(buf, blen, "%a, %d %h %Y %T GMT", &d) > 0) {
return buf;
}
}
return NULL;
}
FILE* http_get(const char* orig_url, char** track_referer, const char* tfname)
{
int allow_redirects = 5;
char* url;
FILE* f = NULL;
FILE* g;
char* fname = NULL;
char ifrange[200] = { "" };
int code;
if (tfname) {
struct stat st;
fname = malloc(strlen(tfname) + 6);
strcpy(fname,tfname); strcat(fname,".part");
if (stat(fname,&st) == 0) {
char buf[50];
if (http_date_string(st.st_mtime,buf,sizeof(buf)) != NULL)
snprintf(ifrange,sizeof(ifrange),"If-Unmodified-Since: %s\r\nRange: bytes=%u-\r\n",buf,st.st_size);
} else if (errno == ENOENT && stat(tfname,&st) == 0) {
char buf[50];
if (http_date_string(st.st_mtime,buf,sizeof(buf)) != NULL)
snprintf(ifrange,sizeof(ifrange),"If-Modified-Since: %s\r\n",buf);
}
}
url = strdup(orig_url);
if (!url) { free(fname); return NULL; }
for (;allow_redirects-- && url && !f;) {
char hostn[256];
const char* connecthost;
char* connectport;
char *p;
char *port;
if ( (p = get_host_port(url,hostn,sizeof(hostn),&port)) == NULL) break;
if (!proxy) {
connecthost = hostn;
connectport = strdup(port);
} else {
connecthost = proxy;
connectport = strdup(pport);
}
{
int sfd = connect_to(connecthost, connectport);
free(connectport);
if (sfd == -1) break;
{
char buf[1024];
snprintf(buf, sizeof(buf), "GET %s HTTP/1.0\r\nHost: %s%s%s\r\nUser-Agent: zsync/%s\r\n%s\r\n",
proxy ? url : p,
hostn, !strcmp(port,"http") ? "" : ":", !strcmp(port,"http") ? "" : port,
VERSION,
ifrange[0] ? ifrange : ""
);
if (send(sfd,buf,strlen(buf),0) == -1) {
perror("sendmsg"); close(sfd); break;
}
}
f = http_get_stream(sfd, &code);
if (!f) break;
if (code == 301 || code == 302) {
char *oldurl = url;
url = get_location_url(f, oldurl);
free(oldurl);
fclose(f); f = NULL;
} else if (code == 412) { // Precondition (i.e. if-unmodified-since) failed
ifrange[0] = 0;
fclose(f); f = NULL; // and go round again without the conditional Range:
} else if (code == 200) { // Downloading whole file
g = fname ? fopen(fname,"w+") : tmpfile();
} else if (code == 206 && fname) { // Had partial content and server confirms not modified
g = fopen(fname,"a+");
} else if (code == 304) { // Unchanged (if-modified-since was false)
g = fopen(tfname,"r");
} else {
fclose(f); f = NULL; break;
}
}
}
if (track_referer)
*track_referer = url;
else free(url);
if (code == 304) {
fclose(f);
free(fname);
return g;
}
if (!f) {
fprintf(stderr,"failed on url %s\n",url ? url : "(missing redirect)");
return NULL;
}
if (!g) { fclose(f); perror("fopen"); return NULL; }
{
size_t len = 0;
{ /* Skip headers. TODO support content-encodings, Content-Location etc */
char buf[512];
do {
fgets(buf,sizeof(buf),f);
sscanf(buf,"Content-Length: %d",&len);
if (ferror(f)) {
perror("read"); exit(1);
}
} while (buf[0] != '\r' && !feof(f));
}
{
size_t got = 0;
struct progress p = {0,0,0,0};
int r;
if (!no_progress)
do_progress(&p,0,got);
while (!feof(f)) {
char buf[1024];
r = fread(buf, 1, sizeof(buf), f);
if (r > 0)
if (r > fwrite(buf, 1, r, g)) {
fprintf(stderr,"short write on %s\n",fname);
break;
}
if (r < 0) { perror("read"); break; }
if (r>0) {
got += r;
if (!no_progress)
do_progress(&p, len ? (100.0*got / len) : 0, got);
}
}
if (!no_progress) end_progress(&p,feof(f) ? 2 : 0);
}
fclose(f);
}
rewind(g);
if (fname) {
rename(fname,tfname);
free(fname);
}
return g;
}
/* HTTP Range: / 206 response interface
*
* If we are reading a multipart/byteranges, boundary is set.
* If we are in the middle of an actual block, block_left is non-zero and offset gives the remembered offset.
*/
struct range_fetch {
char* boundary;
char* url;
char hosth[256];
char* chost;
char* cport;
size_t block_left;
off64_t offset;
int sd;
char buf[4096];
int buf_start, buf_end;
off64_t bytes_down;
int server_close; /* 0: can send more, 1: cannot send more (but one set of headers still to read), 2: cannot send more and all existing headers read */
off64_t* ranges_todo;
int nranges;
int rangesdone;
int rangessent;
};
static int get_more_data(struct range_fetch* rf)
{
if (rf->buf_start) {
memmove(rf->buf, &(rf->buf[rf->buf_start]), rf->buf_end - rf->buf_start);
rf->buf_end -= rf->buf_start; rf->buf_start = 0;
}
{
int n;
do {
n = read(rf->sd, &(rf->buf[rf->buf_end]), sizeof(rf->buf) - rf->buf_end);
} while (n == -1 && errno == EINTR);
if (n < 0) {
perror("read");
} else {
rf->bytes_down += n;
rf->buf_end += n;
}
return n;
}
}
static char* rfgets(char* buf, size_t len, struct range_fetch* rf)
{
char *p;
while (1) {
p = memchr(rf->buf + rf->buf_start, '\n', rf->buf_end - rf->buf_start);
if (!p) {
int n = get_more_data(rf);
if (n <= 0) { /* If cut off, return the rest of the buffer */
p = &(rf->buf[rf->buf_end]);
}
} else p++; /* Step past \n */
if (p) {
register char *bufstart = &(rf->buf[rf->buf_start]);
len--; /* allow for trailing \0 */
if (len > p-bufstart) len = p-bufstart;
memcpy(buf, bufstart, len);
buf[len] = 0;
rf->buf_start += len;
return buf;
}
}
}
struct range_fetch* range_fetch_start(const char* orig_url)
{
struct range_fetch* rf = malloc(sizeof(struct range_fetch));
char *p;
char hostn[sizeof(rf->hosth)];
if (!rf) return NULL;
p = get_host_port(orig_url, hostn, sizeof(hostn), &(rf->cport));
if (!p) { free(rf); return NULL; }
if (strcmp(rf->cport,"http") != 0)
snprintf(rf->hosth,sizeof(rf->hosth),"%s:%s",hostn,rf->cport);
else
snprintf(rf->hosth,sizeof(rf->hosth),"%s",hostn);
if (proxy) {
// URL must be absolute; throw away cport and get port for proxy
rf->url = strdup(orig_url);
free(rf->cport);
rf->cport = strdup(pport);
rf->chost = strdup(proxy);
} else {
// cport already set; set url to relative part and chost to the target
rf->url = strdup(p);
rf->chost = strdup(hostn);
}
rf->block_left = 0;
rf->bytes_down = 0;
rf->boundary = NULL;
rf->buf_start = rf->buf_end = 0;
rf->sd = -1;
rf->ranges_todo = NULL; rf->nranges = rf->rangesdone = 0;
return rf;
}
void range_fetch_addranges(struct range_fetch* rf, off64_t* ranges, int nranges)
{
int existing_ranges = rf->nranges - rf->rangesdone;
off64_t* nr = malloc(2*sizeof(*ranges)*(nranges + existing_ranges));
if (!nr) return;
/* Copy existing queue over */
memcpy(nr,&(rf->ranges_todo[2*rf->rangesdone]),2*sizeof(*ranges)*existing_ranges);
/* And append the new stuff */
memcpy(&nr[2*existing_ranges], ranges, 2*sizeof(*ranges)*nranges);
/* Move back rangessent and rangesdone to the new locations, and update the count. */
rf->rangessent -= rf->rangesdone;
rf->rangesdone = 0;
rf->nranges = existing_ranges + nranges;
free(rf->ranges_todo);
rf->ranges_todo = nr;
}
static void range_fetch_connect(struct range_fetch* rf)
{
rf->sd = connect_to(rf->chost, rf->cport);
rf->server_close = 0;
rf->rangessent = rf->rangesdone;
}
static void range_fetch_getmore(struct range_fetch* rf)
{
char request[2048];
int l;
int max_range_per_request = 20;
/* Only if there's stuff queued to get */
if (rf->rangessent == rf->nranges) return;
snprintf(request,sizeof(request),
"GET %s HTTP/1.1\r\n"
"User-Agent: zsync/" VERSION "\r\n"
"Host: %s"
"%s%s\r\n"
"Range: bytes=",
rf->url, rf->hosth,
referer ? "\r\nReferer: " : "", referer ? referer : ""
);
/* The for loop here is just a sanity check, lastrange is the real loop control */
for (; rf->rangessent < rf->nranges; ) {
int i = rf->rangessent;
int lastrange = 0;
l = strlen(request);
if (l > 1200 || !(--max_range_per_request) || i == rf->nranges-1) lastrange = 1;
snprintf(request + l, sizeof(request)-l, "%lld-%lld%s", rf->ranges_todo[2*i], rf->ranges_todo[2*i+1], lastrange ? "" : ",");
rf->rangessent++;
if (lastrange) break;
}
l = strlen(request);
/* Possibly close the connection (and record the fact, so we definitely don't send more stuff) if this is the last */
snprintf(request + l, sizeof(request)-l, "\r\n%s\r\n", rf->rangessent == rf->nranges ? (rf->server_close = 1, "Connection: close\r\n") : "");
{
size_t len = strlen(request);
char *p = request;
int r = 0;
while (len > 0 && ((r = send(rf->sd,p,len,0)) != -1 || errno == EINTR)) {
if (r >= 0) { p += r; len -= r; }
}
if (r == -1) {
perror("send");
}
}
}
static void buflwr(char* s)
{
char c;
while((c = *s) != 0) {
if (c >= 'A' && c <= 'Z')
*s = c - 'A' + 'a';
s++;
}
}
/* This has 3 cases - EOF returns 0, good returns >0, error returns <0 */
int range_fetch_read_http_headers(struct range_fetch* rf)
{
char buf[512];
{ /* read status line */
char *p;
int c;
if (rfgets(buf,sizeof(buf),rf) == NULL)
return -1;
if (buf[0] == 0) return 0; /* EOF, caller decides if that's an error */
if (memcmp(buf, "HTTP/1", 6) != 0 || (p = strchr(buf, ' ')) == NULL) {
return -1;
}
if ((c = atoi(p+1)) != 206) {
fprintf(stderr,"bad status code %d\n",c);
return -1;
}
if (*(p-1) == '0') { /* HTTP/1.0 server? */
rf->server_close = 2;
}
}
while (1) {
char *p;
if (rfgets(buf,sizeof(buf),rf) == NULL) return -1;
if (buf[0] == '\r' || buf[0] == '\0') {
/* End of headers. We are happy provided we got the block boundary */
if ((rf->boundary || rf->block_left) && !(rf->boundary && rf->block_left)) return 1;
break;
}
p = strstr(buf,": ");
if (!p) break;
*p = 0; p+=2;
buflwr(buf);
/* buf is the header name (lower-cased), p the value */
if (!strcmp(buf,"content-range")) {
off64_t from,to;
sscanf(p,"bytes %llu-%llu/",&from,&to);
if (from <= to) {
rf->block_left = to + 1 - from;
rf->offset = from;
}
/* Can only have got one range. */
rf->rangesdone++;
rf->rangessent = rf->rangesdone;
}
if (!strcmp(buf,"connection") && !strcmp(p,"close")) {
rf->server_close = 2;
}
if (!strcasecmp(buf,"content-type") && !strncasecmp(p,"multipart/byteranges",20)) {
char *q = strstr(p,"boundary=");
if (!q) break;
q += 9;
if (*q == '"') {
rf->boundary = strdup(q+1);
q = strchr(rf->boundary,'"');
if (q) *q = 0;
} else {
rf->boundary = strdup(q);
q = rf->boundary + strlen(rf->boundary)-1;
while (*q == '\r' || *q == ' ' || *q == '\n') *q-- = '\0';
}
}
}
return -1;
}
int get_range_block(struct range_fetch* rf, off64_t* offset, unsigned char* data, size_t dlen)
{
size_t bytes_to_caller = 0;
if (!rf->block_left) {
check_boundary:
if (!rf->boundary) {
int newconn = 0;
int header_result;
if (rf->sd != -1 && rf->server_close == 2) {
close(rf->sd); rf->sd = -1;
}
if (rf->sd == -1) {
if (rf->rangesdone == rf->nranges) return 0;
range_fetch_connect(rf);
if (rf->sd == -1) return -1;
newconn = 1;
range_fetch_getmore(rf);
}
header_result = range_fetch_read_http_headers(rf);
/* Might be the last */
if (rf->server_close == 1) rf->server_close = 2;
/* EOF on first connect is fatal */
if (newconn && header_result == 0) {
fprintf(stderr,"EOF from %s\n",rf->url);
return -1;
}
/* Return EOF or error to caller */
if (header_result <= 0) return header_result ? -1 : 0;
/* HTTP Pipelining - send next request before reading current response */
if (!rf->server_close) range_fetch_getmore(rf);
}
if (rf->boundary) {
char buf[512];
int gotr = 0;
if (!rfgets(buf,sizeof(buf),rf)) return 0;
/* Get, hopefully, boundary marker */
if (!rfgets(buf,sizeof(buf),rf)) return 0;
if (buf[0] != '-' || buf[1] != '-') return 0;
// fprintf(stderr,"boundary %s comparing to %s\n",rf->boundary,buf);
if (memcmp(&buf[2],rf->boundary,strlen(rf->boundary))) {
fprintf(stderr,"got bad block boundary: %s != %s",rf->boundary, buf);
return -1; /* This is an error now */
}
/* Look for last record marker */
if (buf[2+strlen(rf->boundary)] == '-') { free(rf->boundary); rf->boundary = NULL; goto check_boundary; }
for(;buf[0] != '\r' && buf[0] != '\n' && buf[0] != '\0';) {
int from, to;
if (!rfgets(buf,sizeof(buf),rf)) return 0;
buflwr(buf);
if (2 == sscanf(buf,"content-range: bytes %d-%d/",&from,&to)) {
rf->offset = from; rf->block_left = to - from + 1; gotr = 1;
}
}
if (!gotr) {
fprintf(stderr,"got multipart/byteranges but no Content-Range?");
return -1;
}
rf->rangesdone++;
}
}
/* Now the easy bit - we are reading a block */
if (!rf->block_left) return 0;
*offset = rf->offset;
for (;;) {
size_t rl = rf->block_left;
/* Note that we do not use n to test EOF - that is implicit in setting rl
* to min(rl,buf_end-buf_start), as buf_end-buf_start == 0 iff EOF */
/* We want to send rf->block_left to the caller, but we may have less in the buffer, and they may have less buffer space, so reduce appropriately */
if (rl > dlen) rl = dlen;
if (rf->buf_end - rf->buf_start < rl) {
rl = rf->buf_end - rf->buf_start;
/* If we have exhausted the buffer, get more data.
* If we don't get data, drop through and return what we have got.
* If we do, back to top of loop and recalculate how much to return to caller.
*/
if (!rl && get_more_data(rf) > 0) continue;
}
if (!rl)
return bytes_to_caller;
/* Copy as much as we can to their buffer, freeing space in rf->buf */
memcpy(data, &(rf->buf[rf->buf_start]), rl);
rf->buf_start += rl; /* Track pos in our buffer... */
data += rl; dlen -= rl; /* ...and caller's */
bytes_to_caller += rl; /* ...and the return value */
/* Keep track of our location in the stream */
rf->block_left -= rl;
rf->offset += rl;
}
}
off64_t range_fetch_bytes_down(const struct range_fetch* rf)
{ return rf->bytes_down; }
void range_fetch_end(struct range_fetch* rf) {
if (rf->sd != -1) close(rf->sd);
free(rf->ranges_todo);
free(rf->boundary);
free(rf->url);
free(rf->cport);
free(rf->chost);
free(rf);
}
syntax highlighted by Code2HTML, v. 0.9.1