/* * zsync - client side rsync over http * Copyright (C) 2004,2005 Colin Phipps * * This program is free software; you can redistribute it and/or modify * it under the terms of the Artistic License v2 (see the accompanying * file COPYING for the full license terms), or, at your option, any later * version of the same license. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * COPYING file for details. */ #include #include #include #include #include #include #include "config.h" #include #ifdef HAVE_STDINT_H #include #endif #include #include #include "makegz.h" #include "librcksum/rcksum.h" #include "libzsync/zmap.h" #include "libzsync/sha1.h" #include "zlib/zlib.h" SHA1_CTX shactx; size_t blocksize = 0; long long len = 0; void stream_error(const char* func, FILE* stream) { fprintf(stderr,"%s: %s\n",func,strerror(ferror(stream))); exit(2); } static void write_block_sums(char* buf, size_t got, FILE* f) { struct rsum r; unsigned char checksum[CHECKSUM_SIZE]; /* Now pad for our checksum */ if (got < blocksize) memset(buf+got,0,blocksize-got); r = rcksum_calc_rsum_block(buf, blocksize); rcksum_calc_checksum(&checksum[0], buf, blocksize); r.a = htons(r.a); r.b = htons(r.b); if (fwrite(&r, sizeof r, 1, f) != 1) stream_error("fwrite",f); if (fwrite(checksum, sizeof checksum, 1, f) != 1) stream_error("fwrite",f); } static inline long long in_position(z_stream* pz) { return pz->total_in * 8 - ( 63 & pz->data_type); } static FILE* zmap; static int zmapentries; static long long last_delta_in; static char* zhead; static void write_zmap_delta(long long *prev_in, long long *prev_out, long long new_in, long long new_out, int blockstart) { struct gzblock g; { uint16_t inbits = new_in - *prev_in; if (*prev_in + inbits != new_in) { fprintf(stderr,"too long between blocks (try a smaller block size with -b)\n"); exit(1); } inbits = htons(inbits); g.inbitoffset = inbits; *prev_in = new_in; } { uint16_t outbytes = new_out - *prev_out; outbytes &= ~GZB_NOTBLOCKSTART; if ((long long)outbytes + *prev_out != new_out) { fprintf(stderr,"too long output of block blocks?"); exit(1); } if (!blockstart) outbytes |= GZB_NOTBLOCKSTART; outbytes = htons(outbytes); g.outbyteoffset = outbytes; *prev_out = new_out; } fwrite(&g,sizeof(g),1,zmap); zmapentries++; last_delta_in = new_in; } void do_zstream(FILE *fin, FILE* fout, const char* bufsofar, size_t got) { z_stream zs; Bytef *inbuf = malloc(blocksize); const size_t inbufsz = blocksize; Bytef *outbuf = malloc(blocksize); int eoz = 0; int header_bits; long long prev_in = 0; long long prev_out = 0; long long midblock_in = 0; long long midblock_out = 0; int want_zdelta = 0; if (!inbuf || !outbuf) { fprintf(stderr,"memory allocation failure\n"); exit(1); } zs.zalloc = Z_NULL; zs.zfree = Z_NULL; zs.opaque = NULL; zs.next_in = inbuf; zs.avail_in = 0; zs.total_in = 0; zs.next_out = outbuf; zs.avail_out = 0; if (inflateInit2(&zs,-MAX_WBITS) != Z_OK) exit(-1); { /* Skip gzip header and do initial buffer fill */ const char *p = skip_zhead(bufsofar); { int header_bytes = p - bufsofar; int i; header_bits = 8*header_bytes; got -= header_bytes; zhead = malloc(1+2*header_bytes); for (i = 0; i < header_bytes; i++) sprintf(zhead + 2*i, "%02x", (unsigned char)bufsofar[i]); } if (got > inbufsz) { fprintf(stderr,"internal failure, %d > %d input buffer available\n",got,inbufsz); exit(2); } memcpy(inbuf,p,got); /* Fill the buffer up to offset inbufsz of the input file - we want to try and keep the input blocks aligned with block boundaries in the underlying filesystem and physical storage */ zs.avail_in = got; if (inbufsz > got +(header_bits/8)) zs.avail_in += fread(inbuf+got,1,inbufsz-got-(header_bits/8),fin); } /* Start the zmap. We write into a temp file, which the caller then copies into the zsync file later. */ zmap = tmpfile(); if (!zmap) { perror("tmpfile"); exit(2); } /* We are past the header, so we are now at the start of the first block */ write_zmap_delta(&prev_in,&prev_out,header_bits, zs.total_out, 1); zs.avail_out = blocksize; while (!eoz) { if (zs.avail_in == 0) { int rc = fread(inbuf,1,inbufsz,fin); zs.next_in = inbuf; if (rc < 0) { perror("read"); exit(2); } zs.avail_in = rc; } { int rc; rc = inflate(&zs,Z_BLOCK); switch (rc) { case Z_STREAM_END: eoz = 1; case Z_BUF_ERROR: /* Not really an error, just means we provided stingy buffers */ case Z_OK: break; default: fprintf(stderr,"zlib error %s\n",zs.msg); exit(1); } if (zs.avail_out == 0 || rc == Z_STREAM_END) { SHA1Update(&shactx, outbuf, blocksize-zs.avail_out); /* Completed a block */ write_block_sums(outbuf,blocksize-zs.avail_out,fout); zs.next_out = outbuf; zs.avail_out = blocksize; want_zdelta = 1; } if (zs.data_type & 128 || rc == Z_STREAM_END) { write_zmap_delta(&prev_in,&prev_out,header_bits + in_position(&zs),zs.total_out,1); midblock_in = midblock_out = 0; want_zdelta = 0; } if (want_zdelta && inflateSafePoint(&zs)) { long long cur_in = header_bits + in_position(&zs); // fprintf(stderr,"here %lld %lld %lld!\n",cur_in,midblock_in,last_delta_in); if (midblock_in) { write_zmap_delta(&prev_in,&prev_out,midblock_in,midblock_out,0); } midblock_in = cur_in; midblock_out = zs.total_out; want_zdelta = 0; } } } len += zs.total_out; inflateEnd(&zs); fputc('\n',fout); /* Move back to the start of the zmap constructed, ready for the caller to read it back in */ rewind(zmap); free(inbuf); free(outbuf); } static int no_look_inside; void read_stream_write_blocksums(FILE* fin, FILE* fout) { unsigned char *buf = malloc(blocksize); if (!buf) { fprintf(stderr,"out of memory\n"); exit(1); } while (!feof(fin)) { int got = fread(buf, 1, blocksize, fin); if (got > 0) { if (!no_look_inside && len == 0 && buf[0] == 0x1f && buf[1] == 0x8b) { do_zstream(fin,fout,buf,got); break; } /* The SHA-1 sum, unlike our internal block-based sums, is on the whole file and nothing else - no padding */ SHA1Update(&shactx, buf, got); write_block_sums(buf,got,fout); len += got; } else { if (ferror(fin)) stream_error("fread",fin); } } } void fcopy(FILE* fin, FILE* fout) { unsigned char buf[4096]; size_t len; while ((len = fread(buf,1,sizeof(buf),fin)) > 0) { if (fwrite(buf,1,len,fout) < len) break; } if (ferror(fin)) { stream_error("fread",fin); } if (ferror(fout)) { stream_error("fwrite",fout); } } void fcopy_hashes(FILE* fin, FILE* fout, int rsum_bytes, int hash_bytes) { unsigned char buf[20]; size_t len; while ((len = fread(buf,1,sizeof(buf),fin)) > 0) { /* write trailing rsum_bytes of the rsum (trailing because the second part of the rsum is more useful in practice for hashing), and leading checksum_bytes of the checksum */ if (fwrite(buf + 4-rsum_bytes, 1, rsum_bytes, fout) < rsum_bytes) break; if (fwrite(buf + 4, 1, hash_bytes, fout) < hash_bytes) break; } if (ferror(fin)) { stream_error("fread",fin); } if (ferror(fout)) { stream_error("fwrite",fout); } } static int read_sample_and_close(FILE* f, size_t l, void* buf) { int rc = 0; if (fread(buf,1,l,f) == l) rc = 1; else if (errno != EBADF) perror("read"); fclose(f); return rc; } static char* encode_filename(const char* fname) { char* cmd = malloc(2 + strlen(fname)*2); if (!cmd) return NULL; { int i,j; for (i=j=0; fname[i]; i++) { if (!isalnum(fname[i])) cmd[j++] = '\\'; cmd[j++] = fname[i]; } cmd[j] = 0; } return cmd; } static const char * const try_opts[] = { "--best","","--rsync","--rsync --best", NULL }; const char* guess_gzip_options(const char* f) { #define SAMPLE 1024 char orig[SAMPLE]; { FILE* s = fopen(f,"r"); if (!s) { perror("open"); return NULL; } if (!read_sample_and_close(s,SAMPLE,orig)) return NULL; } { int i; const char* o; char* enc_f = encode_filename(f); for (i=0; (o = try_opts[i]) != NULL; i++) { char cmd[1024]; snprintf(cmd,sizeof(cmd),"zcat %s | gzip -n %s 2> /dev/null",enc_f,o); { FILE* p = popen(cmd,"r"); char samp[SAMPLE]; fprintf(stderr,"running %s to determine gzip options\n", cmd); if (!p) { perror(cmd); } else if (!read_sample_and_close(p,SAMPLE,samp)) { ; } else { char *a = skip_zhead(orig); char *b = skip_zhead(samp); if (!memcmp(a,b,900)) break; } } } free(enc_f); return o; } } off64_t get_len(FILE* f) { struct stat s; if (fstat(fileno(f),&s) == -1) return 0; return s.st_size; } #include #include int main(int argc, char** argv) { FILE* tf = tmpfile(); FILE* instream; char * fname = NULL, * zfname = NULL; char ** url = NULL; int nurls = 0; char ** Uurl = NULL; int nUurls = 0; char * outfname = NULL; FILE* fout; char *infname = NULL; int rsum_len, checksum_len, seq_matches; int do_compress = 0; int do_recompress = -1; // -1 means we decide for ourselves int do_exact = 0; const char* gzopts = NULL; { int opt; while ((opt = getopt(argc,argv,"b:Ceo:f:u:U:zZ")) != -1) { switch (opt) { case 'e': do_exact = 1; break; case 'C': do_recompress = 0; break; case 'o': if (outfname) { fprintf(stderr,"specify -o only once\n"); exit(2); } outfname = strdup(optarg); break; case 'f': if (fname) { fprintf(stderr,"specify -f only once\n"); exit(2); } fname = strdup(optarg); break; case 'b': blocksize = atoi(optarg); if ((blocksize & (blocksize-1)) != 0) { fprintf(stderr,"blocksize must be a power of 2 (512, 1024, 2048, ...)\n"); exit(2); } break; case 'u': url = realloc(url,(nurls+1)*sizeof *url); url[nurls++] = optarg; break; case 'U': Uurl = realloc(Uurl,(nUurls+1)*sizeof *Uurl); Uurl[nUurls++] = optarg; break; case 'z': do_compress = 1; break; case 'Z': no_look_inside = 1; break; } } if (optind == argc-1) { infname = strdup(argv[optind]); instream = fopen(infname,"rb"); if (!instream) { perror("open"); exit(2); } if (!fname) fname = basename(argv[optind]); } else { instream = stdin; } } /* If not user-specified, take a best guess */ if (!blocksize) { blocksize = (get_len(instream) < 100000000) ? 2048 : 4096; } if (do_compress) { char* newfname; { char* tryfname = infname; if (!tryfname) { tryfname = fname; } if (tryfname) { newfname = malloc(strlen(tryfname)+4); strcpy(newfname,tryfname); strcat(newfname,".gz"); } } if (!newfname) { newfname = strdup("zsync-target.gz"); } instream = optimal_gzip(instream, newfname, blocksize); if (!instream) { fprintf(stderr,"failed to compress\n"); exit(-1); } if (infname) { free(infname); infname = newfname; } else free(newfname); } SHA1Init(&shactx); read_stream_write_blocksums(instream,tf); { /* Decide how long a rsum hash and checksum hash we need */ seq_matches = 2; rsum_len = (7.9 + ((log(len) + log(blocksize))/log(2) - 8.6)/seq_matches)/8; if (rsum_len > 4) rsum_len = 4; if (rsum_len < 2) rsum_len = 2; checksum_len = (7.9 + (20 + (log(len) + log(1+len/blocksize)) / log(2))/seq_matches) / 8; { int checksum_len2 = (7.9 + (20 + log(1+len/blocksize)/log(2))) / 8; if (checksum_len < checksum_len2) checksum_len = checksum_len2; } } /* The only danger of the client not getting the original file is if we have compressed; * in that case we want to recompress iff the compressed version was supplied * (i.e. we weren't told to generate it ourselves with -z). */ if (do_exact) { int old_do_recompress = do_recompress; do_recompress = (zmapentries && !do_compress) ? 2 : 0; if (old_do_recompress != -1 && (!old_do_recompress) != (!do_recompress)) { fprintf(stderr,"conflicting request for compression and exactness\n"); exit(2); } } /* We recompress if we were told to, OR if * we were left to make our own decision about recompression * the original was compressed & the zsync is of the uncompressed (i.e. there is a zmap) * AND this compressed original isn't one we made ourselves just for transmission */ if ((do_recompress > 0) || (do_recompress == -1 && zmapentries && !do_compress)) gzopts = guess_gzip_options(infname); /* We now know whether to recompress - if the above and guess_gzip_options worked */ if (do_recompress == -1) do_recompress = (gzopts != NULL) ? 1 : 0; if (do_recompress > 1 && gzopts == NULL) { fprintf(stderr,"recompression required, but %s\n", zmap ? "could not determine gzip options to reproduce this archive" : "we are not looking into a compressed stream"); exit(2); } if (fname && zmapentries) { /* Remove any trailing .gz, as it is the uncompressed file being transferred */ char *p = strrchr(fname,'.'); if (p) { zfname = strdup(fname); if (!strcmp(p,".gz")) *p = 0; if (!strcmp(p,".tgz")) strcpy(p,".tar"); } } if (!outfname && fname) { outfname = malloc(strlen(fname) + 10); sprintf(outfname,"%s.zsync",fname); } if (outfname) { fout = fopen(outfname,"wb"); if (!fout) { perror("open"); exit(2); } free(outfname); } else { fout = stdout; } /* Okay, start writing the zsync file */ fprintf(fout,"zsync: " VERSION "\n"); /* Lines we might include but which older clients can ignore */ if (do_recompress) if (zfname) fprintf(fout,"Safe: Z-Filename Recompress\nZ-Filename: %s\n",zfname); else fprintf(fout,"Safe: Recompress\n"); if (fname) fprintf(fout,"Filename: %s\n",fname); fprintf(fout,"Blocksize: %d\n",blocksize); fprintf(fout,"Length: %lld\n",len); fprintf(fout,"Hash-Lengths: %d,%d,%d\n",seq_matches,rsum_len,checksum_len); { /* Write URLs */ int i; for (i = 0; i < nurls; i++) fprintf(fout,"%s: %s\n",zmapentries ? "Z-URL" : "URL", url[i]); for (i = 0; i < nUurls; i++) fprintf(fout,"URL: %s\n", Uurl[i]); } if (nurls == 0 && infname) { /* Assume that we are in the public dir, and use relative paths. * Look for an uncompressed version and add a URL for that to if appropriate. */ fprintf(fout,"%s: %s\n",zmapentries ? "Z-URL" : "URL", infname); if (zmapentries && fname && !access(fname,R_OK)) { fprintf(fout,"URL: %s\n",fname); } fprintf(stderr,"Relative URL included in .zsync file - you must keep the file being served and the .zsync in the same public directory\n"); } fputs("SHA-1: ",fout); { unsigned char digest[SHA1_DIGEST_LENGTH]; int i; SHA1Final(digest, &shactx); for (i = 0; i < sizeof digest; i++) fprintf(fout,"%02x",digest[i]); } fputc('\n',fout); if (do_recompress) fprintf(fout,"Recompress: %s %s\n",zhead,gzopts); if (zmapentries) { fprintf(fout,"Z-Map2: %d\n",zmapentries); fcopy(zmap,fout); fclose(zmap); } fputc('\n',fout); rewind(tf); fcopy_hashes(tf,fout,rsum_len,checksum_len); fclose(tf); fclose(fout); return 0; }