/* ====================================================================
* Copyright (c) 2006, Martin Hauner
* http://subcommander.tigris.org
*
* Subcommander is licensed as described in the file doc/COPYING, which
* you should have received as part of this distribution.
* ====================================================================
*/
#ifndef _SC_UTF_H
#define _SC_UTF_H
// sc
#include "util/types.h"
#include "util/String.h"
class Bom
{
public:
enum Type { none = 0, utf8, utf16be, utf16le, utf32be, utf32le };
Bom();
Bom( Type type );
Bom( const Bom& src );
bool isNull() const;
const unsigned char* getBom() const;
sc::Size getSize() const;
private:
Type _type;
};
/**
* helper class to test/detect various utf encoded files.
*/
class utf
{
public:
utf( const unsigned char* buf, sc::Size len );
~utf();
/** detected an encoding? */
bool hasEncoding() const;
bool hasBom() const;
const Bom& getBom() const;
const sc::String& getEncoding() const;
const unsigned char* getBuffer() const;
sc::Size getLength() const;
bool isBigEndian() const;
bool isLittleEndian() const;
/** check if \a len fits an utf-32 encoded file */
static bool isUtf32Size( sc::Size len );
/** check if \a len fits an utf-16 encoded file */
static bool isUtf16Size( sc::Size len );
/** check if \a len fits an utf-8 encoded file */
static bool isUtf8Size( sc::Size len );
/** check if \a buf start with a utf-32be bom */
static bool isUtf32BeBom( const unsigned char* buf, sc::Size len );
/** check if \a buf start with a utf-32le bom */
static bool isUtf32LeBom( const unsigned char* buf, sc::Size len );
/** check if \a buf start with a utf-16be bom */
static bool isUtf16BeBom( const unsigned char* buf, sc::Size len );
/** check if \a buf start with a utf-16le bom */
static bool isUtf16LeBom( const unsigned char* buf, sc::Size len );
/** check if \a buf start with a utf-8 bom */
static bool isUtf8Bom( const unsigned char* buf, sc::Size len );
/** check first kbyte of \a buf if it contains valid utf-32be data */
static bool isUtf32BeData( const unsigned char* buf, sc::Size len, bool bom );
/** check first kbyte of \a buf if it contains valid utf-32le data */
static bool isUtf32LeData( const unsigned char* buf, sc::Size len, bool bom );
/** check first kbyte of \a buf if it contains valid utf-16be data */
static bool isUtf16BeData( const unsigned char* buf, sc::Size len, bool bom );
/** check first kbyte of \a buf if it contains valid utf-16le data */
static bool isUtf16LeData( const unsigned char* buf, sc::Size len, bool bom );
/** check first kbyte of \a buf if it contains valid utf-8 data */
static bool isUtf8Data( const unsigned char* buf, sc::Size len, bool bom );
private:
void check();
bool _bigEndian;
bool _littleEndian;
Bom _bom;
sc::String _encoding;
const unsigned char* _buf;
sc::Size _len;
};
#endif // _SC_UTF_H
syntax highlighted by Code2HTML, v. 0.9.1