/*
* zsync - client side rsync over http
* Copyright (C) 2004,2005 Colin Phipps <cph@moria.org.uk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the Artistic License v2 (see the accompanying
* file COPYING for the full license terms), or, at your option, any later
* version of the same license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* COPYING file for details.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <errno.h>
#include "config.h"
#include <arpa/inet.h>
#ifdef HAVE_STDINT_H
#include <stdint.h>
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include "makegz.h"
#include "librcksum/rcksum.h"
#include "libzsync/zmap.h"
#include "libzsync/sha1.h"
#include "zlib/zlib.h"
SHA1_CTX shactx;
size_t blocksize = 0;
long long len = 0;
void stream_error(const char* func, FILE* stream)
{
fprintf(stderr,"%s: %s\n",func,strerror(ferror(stream)));
exit(2);
}
static void write_block_sums(char* buf, size_t got, FILE* f)
{
struct rsum r;
unsigned char checksum[CHECKSUM_SIZE];
/* Now pad for our checksum */
if (got < blocksize)
memset(buf+got,0,blocksize-got);
r = rcksum_calc_rsum_block(buf, blocksize);
rcksum_calc_checksum(&checksum[0], buf, blocksize);
r.a = htons(r.a); r.b = htons(r.b);
if (fwrite(&r, sizeof r, 1, f) != 1) stream_error("fwrite",f);
if (fwrite(checksum, sizeof checksum, 1, f) != 1) stream_error("fwrite",f);
}
static inline long long in_position(z_stream* pz)
{ return pz->total_in * 8 - ( 63 & pz->data_type); }
static FILE* zmap;
static int zmapentries;
static long long last_delta_in;
static char* zhead;
static void write_zmap_delta(long long *prev_in, long long *prev_out, long long new_in, long long new_out, int blockstart)
{
struct gzblock g;
{
uint16_t inbits = new_in - *prev_in;
if (*prev_in + inbits != new_in) { fprintf(stderr,"too long between blocks (try a smaller block size with -b)\n"); exit(1); }
inbits = htons(inbits);
g.inbitoffset = inbits;
*prev_in = new_in;
}
{
uint16_t outbytes = new_out - *prev_out;
outbytes &= ~GZB_NOTBLOCKSTART;
if ((long long)outbytes + *prev_out != new_out) { fprintf(stderr,"too long output of block blocks?"); exit(1); }
if (!blockstart) outbytes |= GZB_NOTBLOCKSTART;
outbytes = htons(outbytes);
g.outbyteoffset = outbytes;
*prev_out = new_out;
}
fwrite(&g,sizeof(g),1,zmap);
zmapentries++;
last_delta_in = new_in;
}
void do_zstream(FILE *fin, FILE* fout, const char* bufsofar, size_t got)
{
z_stream zs;
Bytef *inbuf = malloc(blocksize);
const size_t inbufsz = blocksize;
Bytef *outbuf = malloc(blocksize);
int eoz = 0;
int header_bits;
long long prev_in = 0;
long long prev_out = 0;
long long midblock_in = 0;
long long midblock_out = 0;
int want_zdelta = 0;
if (!inbuf || !outbuf) {
fprintf(stderr,"memory allocation failure\n"); exit(1);
}
zs.zalloc = Z_NULL;
zs.zfree = Z_NULL;
zs.opaque = NULL;
zs.next_in = inbuf;
zs.avail_in = 0;
zs.total_in = 0;
zs.next_out = outbuf;
zs.avail_out = 0;
if (inflateInit2(&zs,-MAX_WBITS) != Z_OK) exit(-1);
{ /* Skip gzip header and do initial buffer fill */
const char *p = skip_zhead(bufsofar);
{
int header_bytes = p - bufsofar;
int i;
header_bits = 8*header_bytes;
got -= header_bytes;
zhead = malloc(1+2*header_bytes);
for (i = 0; i < header_bytes; i++)
sprintf(zhead + 2*i, "%02x", (unsigned char)bufsofar[i]);
}
if (got > inbufsz) { fprintf(stderr,"internal failure, %d > %d input buffer available\n",got,inbufsz); exit(2); }
memcpy(inbuf,p,got);
/* Fill the buffer up to offset inbufsz of the input file - we want to try and keep the input blocks aligned with block boundaries in the underlying filesystem and physical storage */
zs.avail_in = got;
if (inbufsz > got +(header_bits/8))
zs.avail_in += fread(inbuf+got,1,inbufsz-got-(header_bits/8),fin);
}
/* Start the zmap. We write into a temp file, which the caller then copies into the zsync file later. */
zmap = tmpfile();
if (!zmap) { perror("tmpfile"); exit(2); }
/* We are past the header, so we are now at the start of the first block */
write_zmap_delta(&prev_in,&prev_out,header_bits, zs.total_out, 1);
zs.avail_out = blocksize;
while (!eoz) {
if (zs.avail_in == 0) {
int rc = fread(inbuf,1,inbufsz,fin);
zs.next_in = inbuf;
if (rc < 0) { perror("read"); exit(2); }
zs.avail_in = rc;
}
{
int rc;
rc = inflate(&zs,Z_BLOCK);
switch (rc) {
case Z_STREAM_END:
eoz = 1;
case Z_BUF_ERROR: /* Not really an error, just means we provided stingy buffers */
case Z_OK:
break;
default:
fprintf(stderr,"zlib error %s\n",zs.msg);
exit(1);
}
if (zs.avail_out == 0 || rc == Z_STREAM_END) {
SHA1Update(&shactx, outbuf, blocksize-zs.avail_out);
/* Completed a block */
write_block_sums(outbuf,blocksize-zs.avail_out,fout);
zs.next_out = outbuf; zs.avail_out = blocksize;
want_zdelta = 1;
}
if (zs.data_type & 128 || rc == Z_STREAM_END) {
write_zmap_delta(&prev_in,&prev_out,header_bits + in_position(&zs),zs.total_out,1);
midblock_in = midblock_out = 0;
want_zdelta = 0;
}
if (want_zdelta && inflateSafePoint(&zs)) {
long long cur_in = header_bits + in_position(&zs);
// fprintf(stderr,"here %lld %lld %lld!\n",cur_in,midblock_in,last_delta_in);
if (midblock_in) {
write_zmap_delta(&prev_in,&prev_out,midblock_in,midblock_out,0);
}
midblock_in = cur_in; midblock_out = zs.total_out;
want_zdelta = 0;
}
}
}
len += zs.total_out;
inflateEnd(&zs);
fputc('\n',fout);
/* Move back to the start of the zmap constructed, ready for the caller to read it back in */
rewind(zmap);
free(inbuf);
free(outbuf);
}
static int no_look_inside;
void read_stream_write_blocksums(FILE* fin, FILE* fout)
{
unsigned char *buf = malloc(blocksize);
if (!buf) {
fprintf(stderr,"out of memory\n"); exit(1);
}
while (!feof(fin)) {
int got = fread(buf, 1, blocksize, fin);
if (got > 0) {
if (!no_look_inside && len == 0 && buf[0] == 0x1f && buf[1] == 0x8b) {
do_zstream(fin,fout,buf,got);
break;
}
/* The SHA-1 sum, unlike our internal block-based sums, is on the whole file and nothing else - no padding */
SHA1Update(&shactx, buf, got);
write_block_sums(buf,got,fout);
len += got;
} else {
if (ferror(fin))
stream_error("fread",fin);
}
}
}
void fcopy(FILE* fin, FILE* fout)
{
unsigned char buf[4096];
size_t len;
while ((len = fread(buf,1,sizeof(buf),fin)) > 0) {
if (fwrite(buf,1,len,fout) < len)
break;
}
if (ferror(fin)) {
stream_error("fread",fin);
}
if (ferror(fout)) {
stream_error("fwrite",fout);
}
}
void fcopy_hashes(FILE* fin, FILE* fout, int rsum_bytes, int hash_bytes)
{
unsigned char buf[20];
size_t len;
while ((len = fread(buf,1,sizeof(buf),fin)) > 0) {
/* write trailing rsum_bytes of the rsum (trailing because the second part of the rsum is more useful in practice for hashing), and leading checksum_bytes of the checksum */
if (fwrite(buf + 4-rsum_bytes, 1, rsum_bytes, fout) < rsum_bytes)
break;
if (fwrite(buf + 4, 1, hash_bytes, fout) < hash_bytes)
break;
}
if (ferror(fin)) {
stream_error("fread",fin);
}
if (ferror(fout)) {
stream_error("fwrite",fout);
}
}
static int read_sample_and_close(FILE* f, size_t l, void* buf)
{
int rc = 0;
if (fread(buf,1,l,f) == l) rc = 1;
else if (errno != EBADF) perror("read");
fclose(f);
return rc;
}
static char* encode_filename(const char* fname)
{
char* cmd = malloc(2 + strlen(fname)*2);
if (!cmd) return NULL;
{
int i,j;
for (i=j=0; fname[i]; i++) {
if (!isalnum(fname[i])) cmd[j++] = '\\';
cmd[j++] = fname[i];
}
cmd[j] = 0;
}
return cmd;
}
static const char * const try_opts[] = { "--best","","--rsync","--rsync --best", NULL };
const char* guess_gzip_options(const char* f)
{
#define SAMPLE 1024
char orig[SAMPLE];
{
FILE* s = fopen(f,"r");
if (!s) { perror("open"); return NULL; }
if (!read_sample_and_close(s,SAMPLE,orig)) return NULL;
}
{
int i;
const char* o;
char* enc_f = encode_filename(f);
for (i=0; (o = try_opts[i]) != NULL; i++) {
char cmd[1024];
snprintf(cmd,sizeof(cmd),"zcat %s | gzip -n %s 2> /dev/null",enc_f,o);
{
FILE* p = popen(cmd,"r");
char samp[SAMPLE];
fprintf(stderr,"running %s to determine gzip options\n", cmd);
if (!p) {
perror(cmd);
} else if (!read_sample_and_close(p,SAMPLE,samp)) {
;
} else {
char *a = skip_zhead(orig);
char *b = skip_zhead(samp);
if (!memcmp(a,b,900))
break;
}
}
}
free(enc_f);
return o;
}
}
off64_t get_len(FILE* f)
{
struct stat s;
if (fstat(fileno(f),&s) == -1)
return 0;
return s.st_size;
}
#include <libgen.h>
#include <math.h>
int main(int argc, char** argv) {
FILE* tf = tmpfile();
FILE* instream;
char * fname = NULL, * zfname = NULL;
char ** url = NULL;
int nurls = 0;
char ** Uurl = NULL;
int nUurls = 0;
char * outfname = NULL;
FILE* fout;
char *infname = NULL;
int rsum_len, checksum_len, seq_matches;
int do_compress = 0;
int do_recompress = -1; // -1 means we decide for ourselves
int do_exact = 0;
const char* gzopts = NULL;
{
int opt;
while ((opt = getopt(argc,argv,"b:Ceo:f:u:U:zZ")) != -1) {
switch (opt) {
case 'e':
do_exact = 1;
break;
case 'C':
do_recompress = 0;
break;
case 'o':
if (outfname) { fprintf(stderr,"specify -o only once\n"); exit(2); }
outfname = strdup(optarg);
break;
case 'f':
if (fname) { fprintf(stderr,"specify -f only once\n"); exit(2); }
fname = strdup(optarg);
break;
case 'b':
blocksize = atoi(optarg);
if ((blocksize & (blocksize-1)) != 0) { fprintf(stderr,"blocksize must be a power of 2 (512, 1024, 2048, ...)\n"); exit(2); }
break;
case 'u':
url = realloc(url,(nurls+1)*sizeof *url);
url[nurls++] = optarg;
break;
case 'U':
Uurl = realloc(Uurl,(nUurls+1)*sizeof *Uurl);
Uurl[nUurls++] = optarg;
break;
case 'z':
do_compress = 1;
break;
case 'Z':
no_look_inside = 1;
break;
}
}
if (optind == argc-1) {
infname = strdup(argv[optind]);
instream = fopen(infname,"rb");
if (!instream) { perror("open"); exit(2); }
if (!fname) fname = basename(argv[optind]);
}
else {
instream = stdin;
}
}
/* If not user-specified, take a best guess */
if (!blocksize) {
blocksize = (get_len(instream) < 100000000) ? 2048 : 4096;
}
if (do_compress) {
char* newfname;
{
char* tryfname = infname;
if (!tryfname) { tryfname = fname; }
if (tryfname) {
newfname = malloc(strlen(tryfname)+4); strcpy(newfname,tryfname); strcat(newfname,".gz");
}
}
if (!newfname) {
newfname = strdup("zsync-target.gz");
}
instream = optimal_gzip(instream, newfname, blocksize);
if (!instream) { fprintf(stderr,"failed to compress\n"); exit(-1); }
if (infname) { free(infname); infname = newfname; }
else free(newfname);
}
SHA1Init(&shactx);
read_stream_write_blocksums(instream,tf);
{ /* Decide how long a rsum hash and checksum hash we need */
seq_matches = 2;
rsum_len = (7.9 + ((log(len) + log(blocksize))/log(2) - 8.6)/seq_matches)/8;
if (rsum_len > 4) rsum_len = 4;
if (rsum_len < 2) rsum_len = 2;
checksum_len = (7.9 + (20 + (log(len) + log(1+len/blocksize)) / log(2))/seq_matches) / 8;
{
int checksum_len2 = (7.9 + (20 + log(1+len/blocksize)/log(2))) / 8;
if (checksum_len < checksum_len2) checksum_len = checksum_len2;
}
}
/* The only danger of the client not getting the original file is if we have compressed;
* in that case we want to recompress iff the compressed version was supplied
* (i.e. we weren't told to generate it ourselves with -z). */
if (do_exact) {
int old_do_recompress = do_recompress;
do_recompress = (zmapentries && !do_compress) ? 2 : 0;
if (old_do_recompress != -1 && (!old_do_recompress) != (!do_recompress)) {
fprintf(stderr,"conflicting request for compression and exactness\n"); exit(2);
}
}
/* We recompress if we were told to, OR if
* we were left to make our own decision about recompression
* the original was compressed & the zsync is of the uncompressed (i.e. there is a zmap)
* AND this compressed original isn't one we made ourselves just for transmission
*/
if ((do_recompress > 0) || (do_recompress == -1 && zmapentries && !do_compress))
gzopts = guess_gzip_options(infname);
/* We now know whether to recompress - if the above and guess_gzip_options worked */
if (do_recompress == -1)
do_recompress = (gzopts != NULL) ? 1 : 0;
if (do_recompress > 1 && gzopts == NULL) {
fprintf(stderr,"recompression required, but %s\n", zmap ? "could not determine gzip options to reproduce this archive" : "we are not looking into a compressed stream");
exit(2);
}
if (fname && zmapentries) {
/* Remove any trailing .gz, as it is the uncompressed file being transferred */
char *p = strrchr(fname,'.');
if (p) {
zfname = strdup(fname);
if (!strcmp(p,".gz")) *p = 0;
if (!strcmp(p,".tgz")) strcpy(p,".tar");
}
}
if (!outfname && fname) {
outfname = malloc(strlen(fname) + 10);
sprintf(outfname,"%s.zsync",fname);
}
if (outfname) {
fout = fopen(outfname,"wb");
if (!fout) { perror("open"); exit(2); }
free(outfname);
} else {
fout = stdout;
}
/* Okay, start writing the zsync file */
fprintf(fout,"zsync: " VERSION "\n");
/* Lines we might include but which older clients can ignore */
if (do_recompress)
if (zfname)
fprintf(fout,"Safe: Z-Filename Recompress\nZ-Filename: %s\n",zfname);
else
fprintf(fout,"Safe: Recompress\n");
if (fname) fprintf(fout,"Filename: %s\n",fname);
fprintf(fout,"Blocksize: %d\n",blocksize);
fprintf(fout,"Length: %lld\n",len);
fprintf(fout,"Hash-Lengths: %d,%d,%d\n",seq_matches,rsum_len,checksum_len);
{ /* Write URLs */
int i;
for (i = 0; i < nurls; i++)
fprintf(fout,"%s: %s\n",zmapentries ? "Z-URL" : "URL", url[i]);
for (i = 0; i < nUurls; i++)
fprintf(fout,"URL: %s\n", Uurl[i]);
}
if (nurls == 0 && infname) {
/* Assume that we are in the public dir, and use relative paths.
* Look for an uncompressed version and add a URL for that to if appropriate. */
fprintf(fout,"%s: %s\n",zmapentries ? "Z-URL" : "URL", infname);
if (zmapentries && fname && !access(fname,R_OK)) {
fprintf(fout,"URL: %s\n",fname);
}
fprintf(stderr,"Relative URL included in .zsync file - you must keep the file being served and the .zsync in the same public directory\n");
}
fputs("SHA-1: ",fout);
{
unsigned char digest[SHA1_DIGEST_LENGTH];
int i;
SHA1Final(digest, &shactx);
for (i = 0; i < sizeof digest; i++)
fprintf(fout,"%02x",digest[i]);
}
fputc('\n',fout);
if (do_recompress)
fprintf(fout,"Recompress: %s %s\n",zhead,gzopts);
if (zmapentries) {
fprintf(fout,"Z-Map2: %d\n",zmapentries);
fcopy(zmap,fout);
fclose(zmap);
}
fputc('\n',fout);
rewind(tf);
fcopy_hashes(tf,fout,rsum_len,checksum_len);
fclose(tf);
fclose(fout);
return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1