ports//devel/subcommander/work/subcommander-1.2.2/util/FileData.cpp

/* ====================================================================
 * Copyright (c) 2006,      Martin Hauner
 *                          http://subcommander.tigris.org
 *
 * Subcommander is licensed as described in the file doc/COPYING, which
 * you should have received as part of this distribution.
 * ====================================================================
 */
 
// sc
#include "FileData.h"
#include "utf.h"

// apr
#include <apr_file_info.h>
#include <apr_file_io.h>
#include <apr_xlate.h>
#include <apr_strings.h>


const sc::String LocaleEncoding("*");

FileData::FileData( const sc::String& name, const sc::String& encoding )
: _name(name), _encoding(encoding), _buf(0), _len(0), _bufXlate(0), _lenXlate(0)
{
}

FileData::~FileData()
{
}

const sc::Error* FileData::read()
{
  apr_status_t status;
  apr_file_t*  file;
  apr_finfo_t  finfo = {};

  status = apr_file_open( &file, _name, APR_READ, APR_OS_DEFAULT, _pool );
  APR_ERR(status);

  status = apr_file_info_get( &finfo, APR_FINFO_SIZE, file );
  APR_ERR(status);

  _buf = (const unsigned char*)apr_palloc( _pool, (apr_size_t)finfo.size );
  _len = (sc::Size)finfo.size;

  apr_size_t rLen = _len;
  status = apr_file_read( file, (void*)_buf, &rLen );
  APR_ERR(status);

  status = apr_file_close(file);
  APR_ERR(status);

  return sc::Success;
}

const sc::Error* FileData::xlate()
{
  // try to detect unicode when no encoding is given
  if( _encoding == LocaleEncoding )
  {
    utf utf( _buf, _len );

    if( utf.hasEncoding() )
    {
      _encoding = utf.getEncoding();
      _bom      = utf.getBom();
    }
  }

  const char* cpFrom = _encoding;
  const char* cpTo   = "utf-8";

  if( _encoding == LocaleEncoding )
    cpFrom = APR_LOCALE_CHARSET;

  const sc::Error* err = xlate( cpFrom, cpTo );
  SC_ERR(err);

  return sc::Success;
}

char getChar( char c )
{
  if( c < 16 )
  {
    return '.';
  }
  else
  {
    return c;
  }
}

const sc::Error* FileData::xlateBinary()
{
  _encoding = "binary";
  _bom      = Bom();

  // create binary strings
  // 0x00 0x01 0x02 0x03  0x04 0x05 0x06 0x07  .... ....
  {
    /* we require ~7 byte for a each input byte */
    sc::Size       newLen = _len*7;
    unsigned char* newBuf = (unsigned char*)apr_palloc( _pool, newLen );

    unsigned char* src = (unsigned char*)_buf;
    unsigned char* dst = newBuf;

    sc::Size  in  = _len;
    sc::Size  out = 0;

    while( in >= 8 )
    {
      int len = sprintf( (char*)dst, "0x%02x 0x%02x 0x%02x 0x%02x  0x%02x 0x%02x 0x%02x 0x%02x  %c%c%c%c %c%c%c%c\n",
        src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7],
        getChar(src[0]), getChar(src[1]), getChar(src[2]), getChar(src[3]),
        getChar(src[4]), getChar(src[5]), getChar(src[6]), getChar(src[7]) );

      dst += len;
      src += 8;
      out += len;
      in  -= 8;
    }

    _buf = newBuf;
    _len = out;
  }

  // this should never fails because latin is an 8 bit encoding
  const sc::Error* err = xlate( "iso-8859-1", "utf-8" );
  SC_ERR(err);

  return sc::Success;
}

const sc::String& FileData::getName() const
{
  return _name;
}

const sc::String& FileData::getEncoding() const
{
  return _encoding;
}

const unsigned char* FileData::getBuffer() const
{
  return _bufXlate;
}

sc::Size FileData::getBufferSize() const
{
  return _lenXlate;
}

const Bom& FileData::getBom() const
{
  return _bom;
}

const sc::Error* FileData::xlate( const char* from, const char* to )
{
  apr_status_t status;
  apr_xlate_t* xlate;

  status = apr_xlate_open( &xlate, to, from, _pool );
  APR_ERR(status);

  apr_size_t size = _len * 2;
  
  while(true)
  {
    apr::Pool pool;

    const char* xSrcBuf = (const char*)_buf;
    apr_size_t  xSrcLen = _len;
    apr_size_t  xDstLen = size;
    char*       xDstBuf = (char*)apr_palloc( pool, xDstLen );

    status = apr_xlate_conv_buffer( xlate, xSrcBuf, &xSrcLen, xDstBuf, &xDstLen );

    // buffer to small?
    if( status == APR_SUCCESS && xSrcLen > 0 )
    {
      size *= 2;
      continue;
    }
    // everything translated?
    else if( status == APR_SUCCESS && xSrcLen == 0 )
    {
      apr_size_t xLen = size - xDstLen;

      _bufXlate = (const unsigned char*)apr_pmemdup( _pool, xDstBuf, xLen );
      _lenXlate = xLen;

      // we are done
      break;
    }
    else
    {
      APR_ERR(status);
    }
  }

  status = apr_xlate_close(xlate);
  APR_ERR(status);

  return sc::Success;
}
syntax highlighted by Code2HTML, v. 0.9.1