// distribution boxbackup-0.10 (svn version: 494)
//
// Copyright (c) 2003 - 2006
// Ben Summers and contributors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// 3. All use of this software and associated advertising materials must
// display the following acknowledgment:
// This product includes software developed by Ben Summers.
// 4. The names of the Authors may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// [Where legally impermissible the Authors do not disclaim liability for
// direct physical injury or death caused solely by defects in the software
// unless it is modified by a third party.]
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT,
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
//
//
// --------------------------------------------------------------------------
//
// File
// Name: BackupStoreFileEncodeStream.cpp
// Purpose: Implement stream-based file encoding for the backup store
// Created: 12/1/04
//
// --------------------------------------------------------------------------
#include "Box.h"
#include "BackupStoreFileEncodeStream.h"
#include "BackupStoreFile.h"
#include "BackupStoreFileWire.h"
#include "BackupStoreFileCryptVar.h"
#include "BackupStoreObjectMagic.h"
#include "BackupStoreException.h"
#include "BackupStoreConstants.h"
#include "BoxTime.h"
#include "BackupClientFileAttributes.h"
#include "FileStream.h"
#include "RollingChecksum.h"
#include "Random.h"
#include "MemLeakFindOn.h"
using namespace BackupStoreFileCryptVar;
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::BackupStoreFileEncodeStream
// Purpose: Constructor (opens file)
// Created: 8/12/03
//
// --------------------------------------------------------------------------
BackupStoreFileEncodeStream::BackupStoreFileEncodeStream()
: mpRecipe(0),
mpFile(0),
mStatus(Status_Header),
mSendData(true),
mTotalBlocks(0),
mAbsoluteBlockNumber(-1),
mInstructionNumber(-1),
mNumBlocks(0),
mCurrentBlock(-1),
mCurrentBlockEncodedSize(0),
mPositionInCurrentBlock(0),
mBlockSize(BACKUP_FILE_MIN_BLOCK_SIZE),
mLastBlockSize(0),
mpRawBuffer(0),
mAllocatedBufferSize(0),
mEntryIVBase(0)
{
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::~BackupStoreFileEncodeStream()
// Purpose: Destructor
// Created: 8/12/03
//
// --------------------------------------------------------------------------
BackupStoreFileEncodeStream::~BackupStoreFileEncodeStream()
{
// Free buffers
if(mpRawBuffer)
{
::free(mpRawBuffer);
mpRawBuffer = 0;
}
// Close the file, which we might have open
if(mpFile)
{
delete mpFile;
mpFile = 0;
}
// Free the recipe
if(mpRecipe != 0)
{
delete mpRecipe;
mpRecipe = 0;
}
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::Setup(const char *, Recipe *, int64_t, const BackupStoreFilename &, int64_t *)
// Purpose: Reads file information, and builds file header reading for sending.
// Takes ownership of the Recipe.
// Created: 8/12/03
//
// --------------------------------------------------------------------------
void BackupStoreFileEncodeStream::Setup(const char *Filename, BackupStoreFileEncodeStream::Recipe *pRecipe,
int64_t ContainerID, const BackupStoreFilename &rStoreFilename, int64_t *pModificationTime)
{
// Pointer to a blank recipe which we might create
BackupStoreFileEncodeStream::Recipe *pblankRecipe = 0;
try
{
// Get file attributes
box_time_t modTime = 0;
int64_t fileSize = 0;
BackupClientFileAttributes attr;
attr.ReadAttributes(Filename, false /* no zeroing of modification times */, &modTime,
0 /* not interested in attr mod time */, &fileSize);
// Might need to create a blank recipe...
if(pRecipe == 0)
{
pblankRecipe = new BackupStoreFileEncodeStream::Recipe(0, 0);
BackupStoreFileEncodeStream::RecipeInstruction instruction;
instruction.mSpaceBefore = fileSize; // whole file
instruction.mBlocks = 0; // no blocks
instruction.mpStartBlock = 0; // no block
pblankRecipe->push_back(instruction);
pRecipe = pblankRecipe;
}
// Tell caller?
if(pModificationTime != 0)
{
*pModificationTime = modTime;
}
// Go through each instruction in the recipe and work out how many blocks
// it will add, and the max clear size of these blocks
int maxBlockClearSize = 0;
for(uint64_t inst = 0; inst < pRecipe->size(); ++inst)
{
if((*pRecipe)[inst].mSpaceBefore > 0)
{
// Calculate the number of blocks the space before requires
int64_t numBlocks;
int32_t blockSize, lastBlockSize;
CalculateBlockSizes((*pRecipe)[inst].mSpaceBefore, numBlocks, blockSize, lastBlockSize);
// Add to accumlated total
mTotalBlocks += numBlocks;
// Update maximum clear size
if(blockSize > maxBlockClearSize) maxBlockClearSize = blockSize;
if(lastBlockSize > maxBlockClearSize) maxBlockClearSize = lastBlockSize;
}
// Add number of blocks copied from the previous file
mTotalBlocks += (*pRecipe)[inst].mBlocks;
// Check for bad things
if((*pRecipe)[inst].mBlocks < 0 || ((*pRecipe)[inst].mBlocks != 0 && (*pRecipe)[inst].mpStartBlock == 0))
{
THROW_EXCEPTION(BackupStoreException, Internal)
}
// Run through blocks to get the max clear size
for(int32_t b = 0; b < (*pRecipe)[inst].mBlocks; ++b)
{
if((*pRecipe)[inst].mpStartBlock[b].mSize > maxBlockClearSize) maxBlockClearSize = (*pRecipe)[inst].mpStartBlock[b].mSize;
}
}
// Send data? (symlinks don't have any data in them)
mSendData = !attr.IsSymLink();
// If not data is being sent, then the max clear block size is zero
if(!mSendData)
{
maxBlockClearSize = 0;
}
// Header
file_StreamFormat hdr;
hdr.mMagicValue = htonl(OBJECTMAGIC_FILE_MAGIC_VALUE_V1);
hdr.mNumBlocks = (mSendData)?(box_hton64(mTotalBlocks)):(0);
hdr.mContainerID = box_hton64(ContainerID);
hdr.mModificationTime = box_hton64(modTime);
// add a bit to make it harder to tell what's going on -- try not to give away too much info about file size
hdr.mMaxBlockClearSize = htonl(maxBlockClearSize + 128);
hdr.mOptions = 0; // no options defined yet
// Write header to stream
mData.Write(&hdr, sizeof(hdr));
// Write filename to stream
rStoreFilename.WriteToStream(mData);
// Write attributes to stream
attr.WriteToStream(mData);
// Allocate some buffers for writing data
if(mSendData)
{
// Open the file
mpFile = new FileStream(Filename);
// Work out the largest possible block required for the encoded data
mAllocatedBufferSize = BackupStoreFile::MaxBlockSizeForChunkSize(maxBlockClearSize);
// Then allocate two blocks of this size
mpRawBuffer = (uint8_t*)::malloc(mAllocatedBufferSize);
if(mpRawBuffer == 0)
{
throw std::bad_alloc();
}
#ifndef NDEBUG
// In debug builds, make sure that the reallocation code is exercised.
mEncodedBuffer.Allocate(mAllocatedBufferSize / 4);
#else
mEncodedBuffer.Allocate(mAllocatedBufferSize);
#endif
}
else
{
// Write an empty block index for the symlink
file_BlockIndexHeader blkhdr;
blkhdr.mMagicValue = htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1);
blkhdr.mOtherFileID = box_hton64(0); // not other file ID
blkhdr.mEntryIVBase = box_hton64(0);
blkhdr.mNumBlocks = box_hton64(0);
mData.Write(&blkhdr, sizeof(blkhdr));
}
// Ready for reading
mData.SetForReading();
// Update stats
BackupStoreFile::msStats.mBytesInEncodedFiles += fileSize;
// Finally, store the pointer to the recipe, when we know exceptions won't occur
mpRecipe = pRecipe;
}
catch(...)
{
// Clean up any blank recipe
if(pblankRecipe != 0)
{
delete pblankRecipe;
pblankRecipe = 0;
}
throw;
}
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::CalculateBlockSizes(int64_t &, int32_t &, int32_t &)
// Purpose: Calculates the sizes of blocks in a section of the file
// Created: 16/1/04
//
// --------------------------------------------------------------------------
void BackupStoreFileEncodeStream::CalculateBlockSizes(int64_t DataSize, int64_t &rNumBlocksOut, int32_t &rBlockSizeOut, int32_t &rLastBlockSizeOut)
{
// How many blocks, and how big?
rBlockSizeOut = BACKUP_FILE_MIN_BLOCK_SIZE / 2;
do
{
rBlockSizeOut *= 2;
rNumBlocksOut = (DataSize + rBlockSizeOut - 1) / rBlockSizeOut;
} while(rBlockSizeOut <= BACKUP_FILE_MAX_BLOCK_SIZE && rNumBlocksOut > BACKUP_FILE_INCREASE_BLOCK_SIZE_AFTER);
// Last block size
rLastBlockSizeOut = DataSize - ((rNumBlocksOut - 1) * rBlockSizeOut);
// Avoid small blocks?
if(rLastBlockSizeOut < BACKUP_FILE_AVOID_BLOCKS_LESS_THAN
&& rNumBlocksOut > 1)
{
// Add the small bit of data to the last block
--rNumBlocksOut;
rLastBlockSizeOut += rBlockSizeOut;
}
// checks!
ASSERT((((rNumBlocksOut-1) * rBlockSizeOut) + rLastBlockSizeOut) == DataSize);
//TRACE4("CalcBlockSize, sz %lld, num %lld, blocksize %d, last %d\n", DataSize, rNumBlocksOut, (int32_t)rBlockSizeOut, (int32_t)rLastBlockSizeOut);
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::Read(void *, int, int)
// Purpose: As interface -- generates encoded file data on the fly from the raw file
// Created: 8/12/03
//
// --------------------------------------------------------------------------
int BackupStoreFileEncodeStream::Read(void *pBuffer, int NBytes, int Timeout)
{
// Check there's something to do.
if(mStatus == Status_Finished)
{
return 0;
}
int bytesToRead = NBytes;
uint8_t *buffer = (uint8_t*)pBuffer;
while(bytesToRead > 0 && mStatus != Status_Finished)
{
if(mStatus == Status_Header || mStatus == Status_BlockListing)
{
// Header or block listing phase -- send from the buffered stream
// Send bytes from the data buffer
int b = mData.Read(buffer, bytesToRead, Timeout);
bytesToRead -= b;
buffer += b;
// Check to see if all the data has been used from this stream
if(!mData.StreamDataLeft())
{
// Yes, move on to next phase (or finish, if there's no file data)
if(!mSendData)
{
mStatus = Status_Finished;
}
else
{
// Reset the buffer so it can be used for the next phase
mData.Reset();
// Get buffer ready for index?
if(mStatus == Status_Header)
{
// Just finished doing the stream header, create the block index header
file_BlockIndexHeader blkhdr;
blkhdr.mMagicValue = htonl(OBJECTMAGIC_FILE_BLOCKS_MAGIC_VALUE_V1);
ASSERT(mpRecipe != 0);
blkhdr.mOtherFileID = box_hton64(mpRecipe->GetOtherFileID());
blkhdr.mNumBlocks = box_hton64(mTotalBlocks);
// Generate the IV base
Random::Generate(&mEntryIVBase, sizeof(mEntryIVBase));
blkhdr.mEntryIVBase = box_hton64(mEntryIVBase);
mData.Write(&blkhdr, sizeof(blkhdr));
}
++mStatus;
}
}
}
else if(mStatus == Status_Blocks)
{
// Block sending phase
if(mPositionInCurrentBlock >= mCurrentBlockEncodedSize)
{
// Next block!
++mCurrentBlock;
++mAbsoluteBlockNumber;
if(mCurrentBlock >= mNumBlocks)
{
// Output extra blocks for this instruction and move forward in file
if(mInstructionNumber >= 0)
{
SkipPreviousBlocksInInstruction();
}
// Is there another instruction to go?
++mInstructionNumber;
// Skip instructions which don't contain any data
while(mInstructionNumber < static_cast<int64_t>(mpRecipe->size())
&& (*mpRecipe)[mInstructionNumber].mSpaceBefore == 0)
{
SkipPreviousBlocksInInstruction();
++mInstructionNumber;
}
if(mInstructionNumber >= static_cast<int64_t>(mpRecipe->size()))
{
// End of blocks, go to next phase
++mStatus;
// Set the data to reading so the index can be written
mData.SetForReading();
}
else
{
// Get ready for this instruction
SetForInstruction();
}
}
// Can't use 'else' here as SetForInstruction() will change this
if(mCurrentBlock < mNumBlocks)
{
EncodeCurrentBlock();
}
}
// Send data from the current block (if there's data to send)
if(mPositionInCurrentBlock < mCurrentBlockEncodedSize)
{
// How much data to put in the buffer?
int s = mCurrentBlockEncodedSize - mPositionInCurrentBlock;
if(s > bytesToRead) s = bytesToRead;
// Copy it in
::memcpy(buffer, mEncodedBuffer.mpBuffer + mPositionInCurrentBlock, s);
// Update variables
bytesToRead -= s;
buffer += s;
mPositionInCurrentBlock += s;
}
}
else
{
// Should never get here, as it'd be an invalid status
ASSERT(false);
}
}
// Add encoded size to stats
BackupStoreFile::msStats.mTotalFileStreamSize += (NBytes - bytesToRead);
// Return size of data to caller
return NBytes - bytesToRead;
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::StorePreviousBlocksInInstruction()
// Purpose: Private. Stores the blocks of the old file referenced in the current
// instruction into the index and skips over the data in the file
// Created: 16/1/04
//
// --------------------------------------------------------------------------
void BackupStoreFileEncodeStream::SkipPreviousBlocksInInstruction()
{
// Check something is necessary
if((*mpRecipe)[mInstructionNumber].mpStartBlock == 0 || (*mpRecipe)[mInstructionNumber].mBlocks == 0)
{
return;
}
// Index of the first block in old file (being diffed from)
int firstIndex = mpRecipe->BlockPtrToIndex((*mpRecipe)[mInstructionNumber].mpStartBlock);
int64_t sizeToSkip = 0;
for(int32_t b = 0; b < (*mpRecipe)[mInstructionNumber].mBlocks; ++b)
{
// Update stats
BackupStoreFile::msStats.mBytesAlreadyOnServer += (*mpRecipe)[mInstructionNumber].mpStartBlock[b].mSize;
// Store the entry
StoreBlockIndexEntry(0 - (firstIndex + b),
(*mpRecipe)[mInstructionNumber].mpStartBlock[b].mSize,
(*mpRecipe)[mInstructionNumber].mpStartBlock[b].mWeakChecksum,
(*mpRecipe)[mInstructionNumber].mpStartBlock[b].mStrongChecksum);
// Increment the absolute block number -- kept encryption IV in sync
++mAbsoluteBlockNumber;
// Add the size of this block to the size to skip
sizeToSkip += (*mpRecipe)[mInstructionNumber].mpStartBlock[b].mSize;
}
// Move forward in the stream
mpFile->Seek(sizeToSkip, IOStream::SeekType_Relative);
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::SetForInstruction()
// Purpose: Private. Sets the state of the internal variables for the current instruction in the recipe
// Created: 16/1/04
//
// --------------------------------------------------------------------------
void BackupStoreFileEncodeStream::SetForInstruction()
{
// Calculate block sizes
CalculateBlockSizes((*mpRecipe)[mInstructionNumber].mSpaceBefore, mNumBlocks, mBlockSize, mLastBlockSize);
// Set variables
mCurrentBlock = 0;
mCurrentBlockEncodedSize = 0;
mPositionInCurrentBlock = 0;
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::EncodeCurrentBlock()
// Purpose: Private. Encodes the current block, and writes the block data to the index
// Created: 8/12/03
//
// --------------------------------------------------------------------------
void BackupStoreFileEncodeStream::EncodeCurrentBlock()
{
// How big is the block, raw?
int blockRawSize = mBlockSize;
if(mCurrentBlock == (mNumBlocks - 1))
{
blockRawSize = mLastBlockSize;
}
ASSERT(blockRawSize < mAllocatedBufferSize);
// Check file open
if(mpFile == 0)
{
// File should be open, but isn't. So logical error.
THROW_EXCEPTION(BackupStoreException, Internal)
}
// Read the data in
if(!mpFile->ReadFullBuffer(mpRawBuffer, blockRawSize, 0 /* not interested in size if failure */))
{
// TODO: Do something more intelligent, and abort this upload because the file
// has changed
THROW_EXCEPTION(BackupStoreException, Temp_FileEncodeStreamDidntReadBuffer)
}
// Encode it
mCurrentBlockEncodedSize = BackupStoreFile::EncodeChunk(mpRawBuffer, blockRawSize, mEncodedBuffer);
//TRACE2("Encode: Encoded size of block %d is %d\n", (int32_t)mCurrentBlock, (int32_t)mCurrentBlockEncodedSize);
// Create block listing data -- generate checksums
RollingChecksum weakChecksum(mpRawBuffer, blockRawSize);
MD5Digest strongChecksum;
strongChecksum.Add(mpRawBuffer, blockRawSize);
strongChecksum.Finish();
// Add entry to the index
StoreBlockIndexEntry(mCurrentBlockEncodedSize, blockRawSize, weakChecksum.GetChecksum(), strongChecksum.DigestAsData());
// Set vars to reading this block
mPositionInCurrentBlock = 0;
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::StoreBlockIndexEntry(int64_t, int32_t, uint32_t, uint8_t *)
// Purpose: Private. Adds an entry to the index currently being stored for sending at end of the stream.
// Created: 16/1/04
//
// --------------------------------------------------------------------------
void BackupStoreFileEncodeStream::StoreBlockIndexEntry(int64_t EncSizeOrBlkIndex, int32_t ClearSize, uint32_t WeakChecksum, uint8_t *pStrongChecksum)
{
// First, the encrypted section
file_BlockIndexEntryEnc entryEnc;
entryEnc.mSize = htonl(ClearSize);
entryEnc.mWeakChecksum = htonl(WeakChecksum);
::memcpy(entryEnc.mStrongChecksum, pStrongChecksum, sizeof(entryEnc.mStrongChecksum));
// Then the clear section
file_BlockIndexEntry entry;
entry.mEncodedSize = box_hton64(((uint64_t)EncSizeOrBlkIndex));
// Then encrypt the encryted section
// Generate the IV from the block number
if(sBlowfishEncryptBlockEntry.GetIVLength() != sizeof(mEntryIVBase))
{
THROW_EXCEPTION(BackupStoreException, IVLengthForEncodedBlockSizeDoesntMeetLengthRequirements)
}
uint64_t iv = mEntryIVBase;
iv += mAbsoluteBlockNumber;
// Convert to network byte order before encrypting with it, so that restores work on
// platforms with different endiannesses.
iv = box_hton64(iv);
sBlowfishEncryptBlockEntry.SetIV(&iv);
// Encode the data
int encodedSize = sBlowfishEncryptBlockEntry.TransformBlock(entry.mEnEnc, sizeof(entry.mEnEnc), &entryEnc, sizeof(entryEnc));
if(encodedSize != sizeof(entry.mEnEnc))
{
THROW_EXCEPTION(BackupStoreException, BlockEntryEncodingDidntGiveExpectedLength)
}
// Save to data block for sending at the end of the stream
mData.Write(&entry, sizeof(entry));
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::Write(const void *, int)
// Purpose: As interface. Exceptions.
// Created: 8/12/03
//
// --------------------------------------------------------------------------
void BackupStoreFileEncodeStream::Write(const void *pBuffer, int NBytes)
{
THROW_EXCEPTION(BackupStoreException, CantWriteToEncodedFileStream)
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::StreamDataLeft()
// Purpose: As interface -- end of stream reached?
// Created: 8/12/03
//
// --------------------------------------------------------------------------
bool BackupStoreFileEncodeStream::StreamDataLeft()
{
return (mStatus != Status_Finished);
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::StreamClosed()
// Purpose: As interface
// Created: 8/12/03
//
// --------------------------------------------------------------------------
bool BackupStoreFileEncodeStream::StreamClosed()
{
return true;
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::Recipe::Recipe(BackupStoreFileCreation::BlocksAvailableEntry *, int64_t)
// Purpose: Constructor. Takes ownership of the block index, and will delete it when it's deleted
// Created: 15/1/04
//
// --------------------------------------------------------------------------
BackupStoreFileEncodeStream::Recipe::Recipe(BackupStoreFileCreation::BlocksAvailableEntry *pBlockIndex,
int64_t NumBlocksInIndex, int64_t OtherFileID)
: mpBlockIndex(pBlockIndex),
mNumBlocksInIndex(NumBlocksInIndex),
mOtherFileID(OtherFileID)
{
ASSERT((mpBlockIndex == 0) || (NumBlocksInIndex != 0))
}
// --------------------------------------------------------------------------
//
// Function
// Name: BackupStoreFileEncodeStream::Recipe::~Recipe()
// Purpose: Destructor
// Created: 15/1/04
//
// --------------------------------------------------------------------------
BackupStoreFileEncodeStream::Recipe::~Recipe()
{
// Free the block index, if there is one
if(mpBlockIndex != 0)
{
::free(mpBlockIndex);
}
}
syntax highlighted by Code2HTML, v. 0.9.1