// distribution boxbackup-0.10 (svn version: 494)
//  
// Copyright (c) 2003 - 2006
//      Ben Summers and contributors.  All rights reserved.
//  
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. All use of this software and associated advertising materials must 
//    display the following acknowledgment:
//        This product includes software developed by Ben Summers.
// 4. The names of the Authors may not be used to endorse or promote
//    products derived from this software without specific prior written
//    permission.
// 
// [Where legally impermissible the Authors do not disclaim liability for 
// direct physical injury or death caused solely by defects in the software 
// unless it is modified by a third party.]
// 
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT,
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//  
//  
//  
// --------------------------------------------------------------------------
//
// File
//		Name:    testbackupdiff.cpp
//		Purpose: Test diffing routines for backup store files
//		Created: 12/1/04
//
// --------------------------------------------------------------------------

#include "Box.h"

#include <stdio.h>
#include <string.h>

#include "Test.h"
#include "BackupClientCryptoKeys.h"
#include "BackupStoreFile.h"
#include "BackupStoreFilenameClear.h"
#include "FileStream.h"
#include "BackupStoreFileWire.h"
#include "BackupStoreObjectMagic.h"
#include "BackupStoreFileCryptVar.h"
#include "BackupStoreException.h"
#include "CollectInBufferStream.h"

#include "MemLeakFindOn.h"

using namespace BackupStoreFileCryptVar;


// from another file
void create_test_files();

bool files_identical(const char *file1, const char *file2)
{
	FileStream f1(file1);
	FileStream f2(file2);
	
	if(f1.BytesLeftToRead() != f2.BytesLeftToRead())
	{
		return false;
	}
	
	while(f1.StreamDataLeft())
	{
		char buffer1[2048];
		char buffer2[2048];
		int s = f1.Read(buffer1, sizeof(buffer1));
		if(f2.Read(buffer2, s) != s)
		{
			return false;
		}
		if(::memcmp(buffer1, buffer2, s) != 0)
		{
			return false;
		}
	}
	
	if(f2.StreamDataLeft())
	{
		return false;
	}
	
	return true;
}

void make_file_of_zeros(const char *filename, size_t size)
{
	static const size_t bs = 0x10000;
	size_t remSize = size;
	void *b = malloc(bs);
	memset(b, 0, bs);
	FILE *f = fopen(filename, "wb");

	// Using largish blocks like this is much faster, while not consuming too much RAM
	while(remSize > bs)
	{
		fwrite(b, bs, 1, f);
		remSize -= bs;
	}
	fwrite(b, remSize, 1, f);

	fclose(f);
	free(b);

	TEST_THAT((size_t)TestGetFileSize(filename) == size);
}


void check_encoded_file(const char *filename, int64_t OtherFileID, int new_blocks_expected, int old_blocks_expected)
{
	FileStream enc(filename);
	
	// Use the interface verify routine
	int64_t otherIDFromFile = 0;
	TEST_THAT(BackupStoreFile::VerifyEncodedFileFormat(enc, &otherIDFromFile));
	TEST_THAT(otherIDFromFile == OtherFileID);
	
	// Now do our own reading
	enc.Seek(0, IOStream::SeekType_Absolute);
	BackupStoreFile::MoveStreamPositionToBlockIndex(enc);
	// Read in header to check magic value is as expected
	file_BlockIndexHeader hdr;
	TEST_THAT(enc.ReadFullBuffer(&hdr, sizeof(hdr), 0));
	TEST_THAT(hdr.mMagicValue == (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1));
	TEST_THAT((uint64_t)box_ntoh64(hdr.mOtherFileID) == (uint64_t)OtherFileID);
	// number of blocks
	int64_t nblocks = box_ntoh64(hdr.mNumBlocks);
	TRACE2("Reading index from '%s', has %lld blocks\n", filename, nblocks);
	TRACE0("======== ===== ========== ======== ========\n   Index Where  EncSz/Idx     Size  WChcksm\n");
	// Read them all in
	int64_t nnew = 0, nold = 0;
	for(int64_t b = 0; b < nblocks; ++b)
	{
		file_BlockIndexEntry en;
		TEST_THAT(enc.ReadFullBuffer(&en, sizeof(en), 0));
		int64_t s = box_ntoh64(en.mEncodedSize);
		if(s > 0)
		{
			nnew++;
			TRACE2("%8lld this  s=%8lld", b, s);
		}
		else
		{
			nold++;
			TRACE2("%8lld other i=%8lld", b, 0 - s);		
		}
		// Decode the rest
		uint64_t iv = box_ntoh64(hdr.mEntryIVBase);
		iv += b;
		sBlowfishDecryptBlockEntry.SetIV(&iv);			
		file_BlockIndexEntryEnc entryEnc;
		sBlowfishDecryptBlockEntry.TransformBlock(&entryEnc, sizeof(entryEnc),
				en.mEnEnc, sizeof(en.mEnEnc));
		TRACE2(" %8d %08x\n", ntohl(entryEnc.mSize), ntohl(entryEnc.mWeakChecksum));
		
	}
	TRACE0("======== ===== ========== ======== ========\n");
	TEST_THAT(new_blocks_expected == nnew);
	TEST_THAT(old_blocks_expected == nold);
}

void test_diff(int from, int to, int new_blocks_expected, int old_blocks_expected, bool expect_completely_different = false)
{
	// First, get the block index of the thing it's comparing against
	char from_encoded[256];
	sprintf(from_encoded, "testfiles/f%d.encoded", from);
	FileStream blockindex(from_encoded);
	BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);

	// make filenames
	char from_orig[256];
	sprintf(from_orig, "testfiles/f%d", from);
	char to_encoded[256];
	sprintf(to_encoded, "testfiles/f%d.encoded", to);
	char to_diff[256];
	sprintf(to_diff, "testfiles/f%d.diff", to);
	char to_orig[256];
	sprintf(to_orig, "testfiles/f%d", to);
	char rev_diff[256];
	sprintf(rev_diff, "testfiles/f%d.revdiff", to);
	char from_rebuild[256];
	sprintf(from_rebuild, "testfiles/f%d.rebuilt", to);
	char from_rebuild_dec[256];
	sprintf(from_rebuild_dec, "testfiles/f%d.rebuilt_dec", to);
	
	// Then call the encode varient for diffing files
	bool completelyDifferent = !expect_completely_different;	// oposite of what we want
	{
		BackupStoreFilenameClear f1name("filename");
		FileStream out(to_diff, O_WRONLY | O_CREAT | O_EXCL);
		std::auto_ptr<IOStream> encoded(
			BackupStoreFile::EncodeFileDiff(
				to_orig, 
				1 /* dir ID */, 
				f1name,
				1000 + from /* object ID of the file diffing from */, 
				blockindex, 
				IOStream::TimeOutInfinite,
				NULL, // DiffTimer interface
				0, 
				&completelyDifferent));
		encoded->CopyStreamTo(out);
	}
	TEST_THAT(completelyDifferent == expect_completely_different);
	
	// Test that the number of blocks in the file match what's expected
	check_encoded_file(to_diff, expect_completely_different?(0):(1000 + from), new_blocks_expected, old_blocks_expected);

	// filename
	char to_testdec[256];
	sprintf(to_testdec, "testfiles/f%d.testdec", to);
	
	if(!completelyDifferent)
	{
		// Then produce a combined file
		{
			FileStream diff(to_diff);
			FileStream diff2(to_diff);
			FileStream from(from_encoded);
			FileStream out(to_encoded, O_WRONLY | O_CREAT | O_EXCL);
			BackupStoreFile::CombineFile(diff, diff2, from, out);
		}
		
		// And check it
		check_encoded_file(to_encoded, 0, new_blocks_expected + old_blocks_expected, 0);
	}
	else
	{
		// Emulate the above stage!
		char cmd[256];
		sprintf(cmd, "cp testfiles/f%d.diff testfiles/f%d.encoded", to, to);
		::system(cmd);
	}

	// Decode it
	{
		FileStream enc(to_encoded);
		BackupStoreFile::DecodeFile(enc, to_testdec, IOStream::TimeOutInfinite);
		TEST_THAT(files_identical(to_orig, to_testdec));
	}
	
	// Then do some comparisons against the block index
	{
		FileStream index(to_encoded);
		BackupStoreFile::MoveStreamPositionToBlockIndex(index);
		TEST_THAT(BackupStoreFile::CompareFileContentsAgainstBlockIndex(to_orig, index, IOStream::TimeOutInfinite) == true);
	}
	{
		char from_orig[256];
		sprintf(from_orig, "testfiles/f%d", from);
		FileStream index(to_encoded);
		BackupStoreFile::MoveStreamPositionToBlockIndex(index);
		TEST_THAT(BackupStoreFile::CompareFileContentsAgainstBlockIndex(from_orig, index, IOStream::TimeOutInfinite) == files_identical(from_orig, to_orig));
	}
	
	// Check that combined index creation works as expected
	{
		// Load a combined index into memory
		FileStream diff(to_diff);
		FileStream from(from_encoded);
		std::auto_ptr<IOStream> indexCmbStr(BackupStoreFile::CombineFileIndices(diff, from));
		CollectInBufferStream indexCmb;
		indexCmbStr->CopyStreamTo(indexCmb);
		// Then check that it's as expected!
		FileStream result(to_encoded);
		BackupStoreFile::MoveStreamPositionToBlockIndex(result);
		CollectInBufferStream index;
		result.CopyStreamTo(index);
		TEST_THAT(indexCmb.GetSize() == index.GetSize());
		TEST_THAT(::memcmp(indexCmb.GetBuffer(), index.GetBuffer(), index.GetSize()) == 0);
	}
	
	// Check that reverse delta can be made, and that it decodes OK
	{
		// Create reverse delta
		{
			bool reversedCompletelyDifferent = !completelyDifferent;
			FileStream diff(to_diff);
			FileStream from(from_encoded);
			FileStream from2(from_encoded);
			FileStream reversed(rev_diff, O_WRONLY | O_CREAT);
			BackupStoreFile::ReverseDiffFile(diff, from, from2, reversed, to, &reversedCompletelyDifferent);
			TEST_THAT(reversedCompletelyDifferent == completelyDifferent);
		}
		// Use it to combine a file
		{
			FileStream diff(rev_diff);
			FileStream diff2(rev_diff);
			FileStream from(to_encoded);
			FileStream out(from_rebuild, O_WRONLY | O_CREAT | O_EXCL);
			BackupStoreFile::CombineFile(diff, diff2, from, out);
		}
		// And then confirm that this file is actually the one we want
		{
			FileStream enc(from_rebuild);
			BackupStoreFile::DecodeFile(enc, from_rebuild_dec, IOStream::TimeOutInfinite);
			TEST_THAT(files_identical(from_orig, from_rebuild_dec));
		}
		// Do some extra checking
		{
			TEST_THAT(files_identical(from_rebuild, from_encoded));
		}
	}
}

void test_combined_diff(int version1, int version2, int serial)
{
	char combined_file[256];
	char last_diff[256];
	sprintf(last_diff, "testfiles/f%d.diff", version1 + 1);	// ie from version1 to version1 + 1

	for(int v = version1 + 2; v <= version2; ++v)
	{
		FileStream diff1(last_diff);
		char next_diff[256];
		sprintf(next_diff, "testfiles/f%d.diff", v);
		FileStream diff2(next_diff);
		FileStream diff2b(next_diff);
		sprintf(combined_file, "testfiles/comb%d_%d.cmbdiff", version1, v);
		FileStream out(combined_file, O_WRONLY | O_CREAT);
		BackupStoreFile::CombineDiffs(diff1, diff2, diff2b, out);
		strcpy(last_diff, combined_file);
	}

	// Then do a combine on it, and check that it decodes to the right thing
	char orig_enc[256];
	sprintf(orig_enc, "testfiles/f%d.encoded", version1);	
	char combined_out[256];
	sprintf(combined_out, "testfiles/comb%d_%d.out", version1, version2);

	{
		FileStream diff(combined_file);
		FileStream diff2(combined_file);
		FileStream from(orig_enc);
		FileStream out(combined_out, O_WRONLY | O_CREAT);
		BackupStoreFile::CombineFile(diff, diff2, from, out);
	}

	char combined_out_dec[256];
	sprintf(combined_out_dec, "testfiles/comb%d_%d_s%d.dec", version1, version2, serial);
	char to_orig[256];
	sprintf(to_orig, "testfiles/f%d", version2);

	{
		FileStream enc(combined_out);
		BackupStoreFile::DecodeFile(enc, combined_out_dec, IOStream::TimeOutInfinite);
		TEST_THAT(files_identical(to_orig, combined_out_dec));
	}

}

#define MAX_DIFF 9
void test_combined_diffs()
{
	int serial = 0;

	// Number of items to combine at once
	for(int stages = 2; stages <= 4; ++stages)
	{
		// Offset to get complete coverage
		for(int offset = 0; offset < stages; ++offset)
		{
			// And then actual end file number
			for(int f = 0; f <= (MAX_DIFF - stages - offset); ++f)
			{
				// And finally, do something!
				test_combined_diff(offset + f, offset + f + stages, ++serial);
			}
		}
	}
}

int test(int argc, const char *argv[])
{
	// Want to trace out all the details
	#ifndef NDEBUG
	BackupStoreFile::TraceDetailsOfDiffProcess = true;
	#endif

	// Create all the test files
	create_test_files();

	// Setup the crypto
	BackupClientCryptoKeys_Setup("testfiles/backup.keys");	

	// Encode the first file
	{
		BackupStoreFilenameClear f0name("f0");
		FileStream out("testfiles/f0.encoded", O_WRONLY | O_CREAT | O_EXCL);
		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("testfiles/f0", 1 /* dir ID */, f0name));
		encoded->CopyStreamTo(out);
		check_encoded_file("testfiles/f0.encoded", 0, 33, 0);
	}
	
	// Check the "seek to index" code
	{
		FileStream enc("testfiles/f0.encoded");
		BackupStoreFile::MoveStreamPositionToBlockIndex(enc);
		// Read in header to check magic value is as expected
		file_BlockIndexHeader hdr;
		TEST_THAT(enc.ReadFullBuffer(&hdr, sizeof(hdr), 0));
		TEST_THAT(hdr.mMagicValue == (int32_t)htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1));
	}

	// Diff some files -- parameters are from number, to number,
	// then the number of new blocks expected, and the number of old blocks expected.
	
	// Diff the original file to a copy of itself, and check that there is no data in the file
	// This checks that the hash table is constructed properly, because two of the blocks share
	// the same weak checksum.
	test_diff(0, 1, 0, 33);

	// Insert some new data
	// Blocks from old file moved whole, but put in different order
	test_diff(1, 2, 7, 32);

	// Delete some data, but not on block boundaries
	test_diff(2, 3, 1, 29);

	// Add a very small amount of data, not on block boundary
	// delete a little data
	test_diff(3, 4, 3, 25);

	// 1 byte insertion between two blocks
	test_diff(4, 5, 1, 28);

	// a file with some new content at the very beginning
	// NOTE: You might expect the last numbers to be 2, 29, but the small 1 byte block isn't searched for
	test_diff(5, 6, 3, 28);
	
	// some new content at the very end
	// NOTE: 1 byte block deleted, so number aren't what you'd initial expect.
	test_diff(6, 7, 2, 30);
	
	// a completely different file, with no blocks matching.
	test_diff(7, 8, 14, 0, true /* completely different expected */);
	
	// diff to zero sized file
	test_diff(8, 9, 0, 0, true /* completely different expected */);
	
	// Test that combining diffs works
	test_combined_diffs();
	
	// Check zero sized file works OK to encode on its own, using normal encoding
	{
		{
			// Encode
			BackupStoreFilenameClear fn("filename");
			FileStream out("testfiles/f9.zerotest", O_WRONLY | O_CREAT | O_EXCL);
			std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("testfiles/f9", 1 /* dir ID */, fn));
			encoded->CopyStreamTo(out);
			check_encoded_file("testfiles/f9.zerotest", 0, 0, 0);		
		}
		{
			// Decode
			FileStream enc("testfiles/f9.zerotest");
			BackupStoreFile::DecodeFile(enc, "testfiles/f9.testdec.zero", IOStream::TimeOutInfinite);
			TEST_THAT(files_identical("testfiles/f9", "testfiles/f9.testdec.zero"));
		}
	}
	
	// Check that symlinks aren't diffed
	TEST_THAT(::symlink("f2", "testfiles/f2.symlink") == 0)
	// And go and diff it against the previous encoded file
	{
		bool completelyDifferent = false;
		{
			FileStream blockindex("testfiles/f1.encoded");
			BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
			
			BackupStoreFilenameClear f1name("filename");
			FileStream out("testfiles/f2.symlink.diff", O_WRONLY | O_CREAT | O_EXCL);
			std::auto_ptr<IOStream> encoded(
				BackupStoreFile::EncodeFileDiff(
					"testfiles/f2.symlink", 
					1 /* dir ID */, 
					f1name,
					1001 /* object ID of the file diffing from */, 
					blockindex, 
					IOStream::TimeOutInfinite,
					NULL, // DiffTimer interface
					0, 
					&completelyDifferent));
			encoded->CopyStreamTo(out);
		}
		TEST_THAT(completelyDifferent == true);
		check_encoded_file("testfiles/f2.symlink.diff", 0, 0, 0);		
	}

	// Check that diffing against a file which isn't "complete" and referes another isn't allowed
	{
		FileStream blockindex("testfiles/f1.diff");
		BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);
		
		BackupStoreFilenameClear f1name("filename");
		FileStream out("testfiles/f2.testincomplete", O_WRONLY | O_CREAT | O_EXCL);
		TEST_CHECK_THROWS(BackupStoreFile::EncodeFileDiff("testfiles/f2", 1 /* dir ID */, f1name,
			1001 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
			0, 0), BackupStoreException, CannotDiffAnIncompleteStoreFile);
	}

	// Found a nasty case where files of lots of the same thing sock up lots of processor
	// time -- because of lots of matches found. Check this out!
	make_file_of_zeros("testfiles/zero.0", 20*1024*1024);
	make_file_of_zeros("testfiles/zero.1", 200*1024*1024);
	// Generate a first encoded file
	{
		BackupStoreFilenameClear f0name("zero.0");
		FileStream out("testfiles/zero.0.enc", O_WRONLY | O_CREAT | O_EXCL);
		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("testfiles/zero.0", 1 /* dir ID */, f0name));
		encoded->CopyStreamTo(out);
	}
	// Then diff from it -- time how long it takes...
	{
		int beginTime = time(0);
		FileStream blockindex("testfiles/zero.0.enc");
		BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);

		BackupStoreFilenameClear f1name("zero.1");
		FileStream out("testfiles/zero.1.enc", O_WRONLY | O_CREAT | O_EXCL);
		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFileDiff("testfiles/zero.1", 1 /* dir ID */, f1name,
			2000 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
			0, 0));
		encoded->CopyStreamTo(out);
		TEST_THAT(time(0) < (beginTime + 40));
	}
	// Remove zero-files to save disk space
	remove("testfiles/zero.0");
	remove("testfiles/zero.1");

#if 0
	// Code for a nasty real world example! (16Mb files, won't include them in the distribution
	// for obvious reasons...)
	// Generate a first encoded file
	{
		BackupStoreFilenameClear f0name("0000000000000000.old");
		FileStream out("testfiles/0000000000000000.enc.0", O_WRONLY | O_CREAT | O_EXCL);
		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFile("/Users/ben/Desktop/0000000000000000.old", 1 /* dir ID */, f0name));
		encoded->CopyStreamTo(out);
	}
	// Then diff from it -- time how long it takes...
	{
		int beginTime = time(0);
		FileStream blockindex("testfiles/0000000000000000.enc.0");
		BackupStoreFile::MoveStreamPositionToBlockIndex(blockindex);

		BackupStoreFilenameClear f1name("0000000000000000.new");
		FileStream out("testfiles/0000000000000000.enc.1", O_WRONLY | O_CREAT | O_EXCL);
		std::auto_ptr<IOStream> encoded(BackupStoreFile::EncodeFileDiff("/Users/ben/Desktop/0000000000000000.new", 1 /* dir ID */, f1name,
			2000 /* object ID of the file diffing from */, blockindex, IOStream::TimeOutInfinite,
			0, 0));
		encoded->CopyStreamTo(out);
		TEST_THAT(time(0) < (beginTime + 20));
	}
#endif // 0

	return 0;
}




syntax highlighted by Code2HTML, v. 0.9.1