/* ==================================================================== * Copyright (c) 2006, Martin Hauner * http://subcommander.tigris.org * * Subcommander is licensed as described in the file doc/COPYING, which * you should have received as part of this distribution. * ==================================================================== */ #ifndef _SC_UTF_H #define _SC_UTF_H // sc #include "util/types.h" #include "util/String.h" class Bom { public: enum Type { none = 0, utf8, utf16be, utf16le, utf32be, utf32le }; Bom(); Bom( Type type ); Bom( const Bom& src ); bool isNull() const; const unsigned char* getBom() const; sc::Size getSize() const; private: Type _type; }; /** * helper class to test/detect various utf encoded files. */ class utf { public: utf( const unsigned char* buf, sc::Size len ); ~utf(); /** detected an encoding? */ bool hasEncoding() const; bool hasBom() const; const Bom& getBom() const; const sc::String& getEncoding() const; const unsigned char* getBuffer() const; sc::Size getLength() const; bool isBigEndian() const; bool isLittleEndian() const; /** check if \a len fits an utf-32 encoded file */ static bool isUtf32Size( sc::Size len ); /** check if \a len fits an utf-16 encoded file */ static bool isUtf16Size( sc::Size len ); /** check if \a len fits an utf-8 encoded file */ static bool isUtf8Size( sc::Size len ); /** check if \a buf start with a utf-32be bom */ static bool isUtf32BeBom( const unsigned char* buf, sc::Size len ); /** check if \a buf start with a utf-32le bom */ static bool isUtf32LeBom( const unsigned char* buf, sc::Size len ); /** check if \a buf start with a utf-16be bom */ static bool isUtf16BeBom( const unsigned char* buf, sc::Size len ); /** check if \a buf start with a utf-16le bom */ static bool isUtf16LeBom( const unsigned char* buf, sc::Size len ); /** check if \a buf start with a utf-8 bom */ static bool isUtf8Bom( const unsigned char* buf, sc::Size len ); /** check first kbyte of \a buf if it contains valid utf-32be data */ static bool isUtf32BeData( const unsigned char* buf, sc::Size len, bool bom ); /** check first kbyte of \a buf if it contains valid utf-32le data */ static bool isUtf32LeData( const unsigned char* buf, sc::Size len, bool bom ); /** check first kbyte of \a buf if it contains valid utf-16be data */ static bool isUtf16BeData( const unsigned char* buf, sc::Size len, bool bom ); /** check first kbyte of \a buf if it contains valid utf-16le data */ static bool isUtf16LeData( const unsigned char* buf, sc::Size len, bool bom ); /** check first kbyte of \a buf if it contains valid utf-8 data */ static bool isUtf8Data( const unsigned char* buf, sc::Size len, bool bom ); private: void check(); bool _bigEndian; bool _littleEndian; Bom _bom; sc::String _encoding; const unsigned char* _buf; sc::Size _len; }; #endif // _SC_UTF_H