/*
* rcksum/lib - library for using the rsync algorithm to determine
* which parts of a file you have and which you need.
* Copyright (C) 2004,2005 Colin Phipps <cph@moria.org.uk>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the Artistic License v2 (see the accompanying
* file COPYING for the full license terms), or, at your option, any later
* version of the same license.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* COPYING file for details.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include "config.h"
#include "md4.h"
#include "rcksum.h"
#include "internal.h"
#define UPDATE_RSUM(a, b, oldc, newc, bshift) do { (a) += ((unsigned char)(newc)) - ((unsigned char)(oldc)); (b) += (a) - ((oldc) << (bshift)); } while (0)
struct rsum __attribute__((pure)) rcksum_calc_rsum_block(const unsigned char* data, size_t len)
{
register unsigned short a = 0;
register unsigned short b = 0;
{
while (len) {
unsigned char c = *data++;
a += c; b += len*c;
len--;
}
}
{
struct rsum r = { a, b };
return r;
}
}
void rcksum_calc_checksum(unsigned char *c, const unsigned char* data, size_t len)
{
MD4_CTX ctx;
MD4Init(&ctx);
MD4Update(&ctx,data,len);
MD4Final(c,&ctx);
}
static void unlink_block(struct rcksum_state* z, zs_blockid id)
{
struct hash_entry* t = &(z->blockhashes[id]);
struct hash_entry** p = &(z->rsum_hash[calc_rhash(z,t) & z->hashmask]);
while (*p != NULL) {
if (*p == t) {
if (t == z->rover) { z->rover = t->next; }
*p = (*p)->next;
return;
} else {
p = &((*p)->next);
}
}
}
#ifndef HAVE_PWRITE
size_t pwrite(int d, const void* buf, size_t nbytes, off64_t offset)
{
if (lseek(d, offset, SEEK_SET) == -1) return -1;
return write(d, buf, nbytes);
}
#endif
static void write_blocks(struct rcksum_state* z, const unsigned char* data, zs_blockid bfrom, zs_blockid bto)
{
off64_t len = ((off64_t)(bto - bfrom + 1)) << z->blockshift;
off64_t offset = ((off64_t)bfrom) << z->blockshift;
while (len) {
size_t l = len;
int rc;
if ((off64_t)l < len) l = 0x8000000;
rc = pwrite(z->fd,data,l,offset);
if (rc == -1) {
fprintf(stderr, "IO error: %s\n", strerror(errno));
exit(-1);
}
len -= rc;
if (len) { /* Incomplete, must advance */
data += rc;
offset += rc;
}
}
{
int id;
for (id = bfrom; id <= bto; id++) {
unlink_block(z, id);
add_to_ranges(z, id);
}
}
}
int rcksum_read_known_data(struct rcksum_state* z, unsigned char* buf, off64_t offset, size_t len)
{
int rc = pread(z->fd,buf,len,offset);
return rc;
}
int rcksum_submit_blocks(struct rcksum_state* const z, unsigned char* data, zs_blockid bfrom, zs_blockid bto)
{
zs_blockid x;
unsigned char md4sum[CHECKSUM_SIZE];
if (!z->rsum_hash)
if (!build_hash(z))
return -1;
for (x = bfrom; x <= bto; x++) {
rcksum_calc_checksum(&md4sum[0], data + ((x-bfrom) << z->blockshift), z->blocksize);
if (memcmp(&md4sum, &(z->blockhashes[x].checksum[0]), z->checksum_bytes)) {
if (x > bfrom) /* Write any good blocks we did get */
write_blocks(z,data,bfrom,x-1);
return -1;
}
}
write_blocks(z,data,bfrom,bto);
return 0;
}
static int check_checksums_on_hash_chain(struct rcksum_state* const z, const struct hash_entry* e, const char* data, int onlyone)
{
unsigned char md4sum[2][CHECKSUM_SIZE];
signed int done_md4 = -1;
int got_blocks = 0;
register struct rsum r = z->r[0];
z->rover = e;
/* This is essentially a for (;e;e=e->next), but we want to remove links from
* the list as we find matches, without keeping too many temp variables.
*/
while (z->rover) {
zs_blockid id;
e = z->rover; z->rover = onlyone ? NULL : e->next;
/* Check weak checksum first */
z->stats.hashhit++;
if (e->r.a != (r.a & z->rsum_a_mask) || e->r.b != r.b) {
continue;
}
id = get_HE_blockid(z,e);
if (!onlyone && z->seq_matches > 1 && (z->blockhashes[id+1].r.a != (z->r[1].a & z->rsum_a_mask) || z->blockhashes[id+1].r.b != z->r[1].b))
continue;
z->stats.weakhit++;
{
int ok = 1;
signed int check_md4 = 0;
/* This block at least must match; we must match at least z->seq_matches-1 others, which could either be trailing stuff, or these could be preceding blocks that we have verified already. */
do {
/* We only calculate the MD4 once we need it; but need not do so twice */
if (check_md4 > done_md4) {
rcksum_calc_checksum(&md4sum[check_md4][0], data + z->blocksize*check_md4, z->blocksize);
done_md4 = check_md4;
z->stats.checksummed++;
}
/* Now check the strong checksum for this block */
if (memcmp(&md4sum[check_md4], z->blockhashes[id+check_md4].checksum, z->checksum_bytes))
ok = 0;
check_md4++;
} while (ok && !onlyone && check_md4 < z->seq_matches);
if (ok) {
write_blocks(z, data, id, id+check_md4-1);
got_blocks+=check_md4;
z->stats.stronghit+=check_md4;
z->next_match = z->blockhashes + id+check_md4;
}
}
}
return got_blocks;
}
int rcksum_submit_source_data(struct rcksum_state* const z, unsigned char* data, size_t len, off64_t offset)
{
int x = 0;
register int bs = z->blocksize;
int got_blocks = 0;
if (offset) {
x = z->skip;
} else {
z->next_match = NULL;
}
if (x || !offset) {
z->r[0] = rcksum_calc_rsum_block(data+x, bs);
if (z->seq_matches > 1)
z->r[1] = rcksum_calc_rsum_block(data+x+bs, bs);
}
z->skip = 0;
for (;;) {
if (x+z->context == len) {
return got_blocks;
}
#if 0
{
int k = 0;
struct rsum c = rcksum_calc_rsum_block(data+x+bs*k, bs);
if (c.a != z->r[k].a || c.b != z->r[k].b) {
fprintf(stderr,"rsum miscalc (%d) at %lld\n",k,offset+x);
exit(3);
}
}
#endif
{
int thismatch = 0;
int blocks_matched = 0;
if (z->next_match && z->seq_matches > 1) {
if (0 != (thismatch = check_checksums_on_hash_chain(z, z->next_match, data+x, 1))) {
blocks_matched = 1;
} else
z->next_match = NULL;
}
if (!blocks_matched) {
const struct hash_entry* e;
unsigned hash = z->r[0].b;
hash ^= ((z->seq_matches > 1) ? z->r[1].b : z->r[0].a) << BITHASHBITS;
e = z->rsum_hash[hash & z->hashmask];
if ((z->bithash[(hash & z->bithashmask) >> 3] & (1 << (hash & 7))) != 0
&&(e = z->rsum_hash[hash & z->hashmask]) != NULL) {
thismatch = check_checksums_on_hash_chain(z, e, data+x, 0);
if (thismatch)
blocks_matched = z->seq_matches;
}
}
got_blocks += thismatch;
if (blocks_matched) {
x += bs + (blocks_matched > 1 ? bs : 0);
if (x + z->context > len) {
/* can't calculate rsum for block after this one, because it's not in the buffer. So leave a hint for next time so we know we need to recalculate */
z->skip = x + z->context - len;
return got_blocks;
}
/* If we are moving forward just 1 block, we already have the following block rsum. If we are skipping both, then recalculate both */
if (z->seq_matches > 1 && blocks_matched == 1)
z->r[0] = z->r[1];
else
z->r[0] = rcksum_calc_rsum_block(data+x, bs);
if (z->seq_matches > 1)
z->r[1] = rcksum_calc_rsum_block(data+x+bs, bs);
continue;
}
}
{
unsigned char Nc = data[x+bs*2];
unsigned char nc = data[x+bs];
unsigned char oc = data[x];
UPDATE_RSUM(z->r[0].a,z->r[0].b,oc,nc,z->blockshift);
if (z->seq_matches > 1)
UPDATE_RSUM(z->r[1].a,z->r[1].b,nc,Nc,z->blockshift);
}
x++;
}
}
int rcksum_submit_source_file(struct rcksum_state* z, FILE* f, int progress)
{
register int bufsize = z->blocksize*16;
char *buf = malloc(bufsize + z->context);
int got_blocks = 0;
off64_t in = 0;
int in_mb = 0;
if (!buf) return 0;
if (!z->rsum_hash)
if (!build_hash(z))
return 0;
while (!feof(f)) {
size_t len;
off64_t start_in = in;
if (!in) {
len = fread(buf,1,bufsize,f);
if (len < z->context) return 0;
in += len;
} else {
memcpy(buf, buf + (bufsize - z->context), z->context);
in += bufsize - z->context;
len = z->context + fread(buf + z->context,1,bufsize - z->context,f);
}
if (ferror(f)) {
perror("fread"); free(buf);
return got_blocks;
}
if (feof(f)) { /* 0 pad to complete a block */
memset(buf+len,0,z->context); len += z->context;
}
got_blocks += rcksum_submit_source_data(z,buf,len,start_in);
if (progress && in_mb != in / 1000000) { in_mb = in / 1000000; fputc('*',stderr); }
}
free(buf);
return got_blocks;
}
syntax highlighted by Code2HTML, v. 0.9.1