/* ==================================================================== * Copyright (c) 2006, Martin Hauner * http://subcommander.tigris.org * * Subcommander is licensed as described in the file doc/COPYING, which * you should have received as part of this distribution. * ==================================================================== */ // sc #include "FileData.h" #include "utf.h" // apr #include #include #include #include const sc::String LocaleEncoding("*"); FileData::FileData( const sc::String& name, const sc::String& encoding ) : _name(name), _encoding(encoding), _buf(0), _len(0), _bufXlate(0), _lenXlate(0) { } FileData::~FileData() { } const sc::Error* FileData::read() { apr_status_t status; apr_file_t* file; apr_finfo_t finfo = {}; status = apr_file_open( &file, _name, APR_READ, APR_OS_DEFAULT, _pool ); APR_ERR(status); status = apr_file_info_get( &finfo, APR_FINFO_SIZE, file ); APR_ERR(status); _buf = (const unsigned char*)apr_palloc( _pool, (apr_size_t)finfo.size ); _len = (sc::Size)finfo.size; apr_size_t rLen = _len; status = apr_file_read( file, (void*)_buf, &rLen ); APR_ERR(status); status = apr_file_close(file); APR_ERR(status); return sc::Success; } const sc::Error* FileData::xlate() { // try to detect unicode when no encoding is given if( _encoding == LocaleEncoding ) { utf utf( _buf, _len ); if( utf.hasEncoding() ) { _encoding = utf.getEncoding(); _bom = utf.getBom(); } } const char* cpFrom = _encoding; const char* cpTo = "utf-8"; if( _encoding == LocaleEncoding ) cpFrom = APR_LOCALE_CHARSET; const sc::Error* err = xlate( cpFrom, cpTo ); SC_ERR(err); return sc::Success; } char getChar( char c ) { if( c < 16 ) { return '.'; } else { return c; } } const sc::Error* FileData::xlateBinary() { _encoding = "binary"; _bom = Bom(); // create binary strings // 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 .... .... { /* we require ~7 byte for a each input byte */ sc::Size newLen = _len*7; unsigned char* newBuf = (unsigned char*)apr_palloc( _pool, newLen ); unsigned char* src = (unsigned char*)_buf; unsigned char* dst = newBuf; sc::Size in = _len; sc::Size out = 0; while( in >= 8 ) { int len = sprintf( (char*)dst, "0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x %c%c%c%c %c%c%c%c\n", src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7], getChar(src[0]), getChar(src[1]), getChar(src[2]), getChar(src[3]), getChar(src[4]), getChar(src[5]), getChar(src[6]), getChar(src[7]) ); dst += len; src += 8; out += len; in -= 8; } _buf = newBuf; _len = out; } // this should never fails because latin is an 8 bit encoding const sc::Error* err = xlate( "iso-8859-1", "utf-8" ); SC_ERR(err); return sc::Success; } const sc::String& FileData::getName() const { return _name; } const sc::String& FileData::getEncoding() const { return _encoding; } const unsigned char* FileData::getBuffer() const { return _bufXlate; } sc::Size FileData::getBufferSize() const { return _lenXlate; } const Bom& FileData::getBom() const { return _bom; } const sc::Error* FileData::xlate( const char* from, const char* to ) { apr_status_t status; apr_xlate_t* xlate; status = apr_xlate_open( &xlate, to, from, _pool ); APR_ERR(status); apr_size_t size = _len * 2; while(true) { apr::Pool pool; const char* xSrcBuf = (const char*)_buf; apr_size_t xSrcLen = _len; apr_size_t xDstLen = size; char* xDstBuf = (char*)apr_palloc( pool, xDstLen ); status = apr_xlate_conv_buffer( xlate, xSrcBuf, &xSrcLen, xDstBuf, &xDstLen ); // buffer to small? if( status == APR_SUCCESS && xSrcLen > 0 ) { size *= 2; continue; } // everything translated? else if( status == APR_SUCCESS && xSrcLen == 0 ) { apr_size_t xLen = size - xDstLen; _bufXlate = (const unsigned char*)apr_pmemdup( _pool, xDstBuf, xLen ); _lenXlate = xLen; // we are done break; } else { APR_ERR(status); } } status = apr_xlate_close(xlate); APR_ERR(status); return sc::Success; }