/* ====================================================================
* Copyright (c) 2006, Martin Hauner
* http://subcommander.tigris.org
*
* Subcommander is licensed as described in the file doc/COPYING, which
* you should have received as part of this distribution.
* ====================================================================
*/
// sc
#include "FileData.h"
#include "utf.h"
// apr
#include <apr_file_info.h>
#include <apr_file_io.h>
#include <apr_xlate.h>
#include <apr_strings.h>
const sc::String LocaleEncoding("*");
FileData::FileData( const sc::String& name, const sc::String& encoding )
: _name(name), _encoding(encoding), _buf(0), _len(0), _bufXlate(0), _lenXlate(0)
{
}
FileData::~FileData()
{
}
const sc::Error* FileData::read()
{
apr_status_t status;
apr_file_t* file;
apr_finfo_t finfo = {};
status = apr_file_open( &file, _name, APR_READ, APR_OS_DEFAULT, _pool );
APR_ERR(status);
status = apr_file_info_get( &finfo, APR_FINFO_SIZE, file );
APR_ERR(status);
_buf = (const unsigned char*)apr_palloc( _pool, (apr_size_t)finfo.size );
_len = (sc::Size)finfo.size;
apr_size_t rLen = _len;
status = apr_file_read( file, (void*)_buf, &rLen );
APR_ERR(status);
status = apr_file_close(file);
APR_ERR(status);
return sc::Success;
}
const sc::Error* FileData::xlate()
{
// try to detect unicode when no encoding is given
if( _encoding == LocaleEncoding )
{
utf utf( _buf, _len );
if( utf.hasEncoding() )
{
_encoding = utf.getEncoding();
_bom = utf.getBom();
}
}
const char* cpFrom = _encoding;
const char* cpTo = "utf-8";
if( _encoding == LocaleEncoding )
cpFrom = APR_LOCALE_CHARSET;
const sc::Error* err = xlate( cpFrom, cpTo );
SC_ERR(err);
return sc::Success;
}
char getChar( char c )
{
if( c < 16 )
{
return '.';
}
else
{
return c;
}
}
const sc::Error* FileData::xlateBinary()
{
_encoding = "binary";
_bom = Bom();
// create binary strings
// 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 .... ....
{
/* we require ~7 byte for a each input byte */
sc::Size newLen = _len*7;
unsigned char* newBuf = (unsigned char*)apr_palloc( _pool, newLen );
unsigned char* src = (unsigned char*)_buf;
unsigned char* dst = newBuf;
sc::Size in = _len;
sc::Size out = 0;
while( in >= 8 )
{
int len = sprintf( (char*)dst, "0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x %c%c%c%c %c%c%c%c\n",
src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7],
getChar(src[0]), getChar(src[1]), getChar(src[2]), getChar(src[3]),
getChar(src[4]), getChar(src[5]), getChar(src[6]), getChar(src[7]) );
dst += len;
src += 8;
out += len;
in -= 8;
}
_buf = newBuf;
_len = out;
}
// this should never fails because latin is an 8 bit encoding
const sc::Error* err = xlate( "iso-8859-1", "utf-8" );
SC_ERR(err);
return sc::Success;
}
const sc::String& FileData::getName() const
{
return _name;
}
const sc::String& FileData::getEncoding() const
{
return _encoding;
}
const unsigned char* FileData::getBuffer() const
{
return _bufXlate;
}
sc::Size FileData::getBufferSize() const
{
return _lenXlate;
}
const Bom& FileData::getBom() const
{
return _bom;
}
const sc::Error* FileData::xlate( const char* from, const char* to )
{
apr_status_t status;
apr_xlate_t* xlate;
status = apr_xlate_open( &xlate, to, from, _pool );
APR_ERR(status);
apr_size_t size = _len * 2;
while(true)
{
apr::Pool pool;
const char* xSrcBuf = (const char*)_buf;
apr_size_t xSrcLen = _len;
apr_size_t xDstLen = size;
char* xDstBuf = (char*)apr_palloc( pool, xDstLen );
status = apr_xlate_conv_buffer( xlate, xSrcBuf, &xSrcLen, xDstBuf, &xDstLen );
// buffer to small?
if( status == APR_SUCCESS && xSrcLen > 0 )
{
size *= 2;
continue;
}
// everything translated?
else if( status == APR_SUCCESS && xSrcLen == 0 )
{
apr_size_t xLen = size - xDstLen;
_bufXlate = (const unsigned char*)apr_pmemdup( _pool, xDstBuf, xLen );
_lenXlate = xLen;
// we are done
break;
}
else
{
APR_ERR(status);
}
}
status = apr_xlate_close(xlate);
APR_ERR(status);
return sc::Success;
}
syntax highlighted by Code2HTML, v. 0.9.1