// Copyright (c) 1995  David Engberg  All rights reserved
// $Id: unicode_string.C,v 1.4 1998/05/23 18:19:52 geppetto Exp $
#ifdef __GNUG__
#pragma implementation
#endif
#include "unicode_string.h"

#ifdef NEED_EXPLICIT_UNISTRING_NILREP
template unicode_string::Rep unicode_string::nilRep;
#endif

//
//  Function name : UTFToUnicode
//  Description : This global function is used to facilitate the translation
//    from Java bytecode encoded UTF strings to 16-bit Unicode strings.
//    It takes in a sequence of 8-bit characters and spits out a
//    unicode_string.
//
unicode_string
UTFToUnicode(const string& inString)
{
  unicode_string outString;
  string::const_iterator i = inString.begin();
  string::const_iterator end = inString.end();
  while (i != end) {
    string::value_type firstChar = *i++;
    if (firstChar & 0x80 &&
	(end - i) >= ((firstChar & 0x20) ? 2 : 1)) {
      string::value_type secondChar = *i++;
      unicode_char outChar;
      if (firstChar & 0x20) {
	outChar = ((0x0f & firstChar) << 12) |
	  ((0x3f & secondChar) << 6) | (0x3f & *i++);
      } else {
	outChar = ((0x1f & firstChar) << 6) | (0x3f & secondChar);
      }
      outString += outChar;
    } else {
      outString += (unicode_char)firstChar;
    }
  }
  return outString;
}

//
//  Function name : UnicodeToUTF
//  Description : This global function is the opposite of UTFToUnicode.  It
//    goes from a 16-bit unicode string to an 8-bit UTF string as documented
//    in the Java reference manual.
//
string
UnicodeToUTF(const unicode_string& inString)
{
  string outString;
  unicode_string::const_iterator i = inString.begin();
  for (; i != inString.end(); i++) {
    if (*i > 0x3ff) {
      outString += (string::value_type)(0xe0 | ((0xf000 & *i) >> 12));
      outString += (string::value_type)(0x80 | ((0xfc0 & *i) >> 6));
      outString += (string::value_type)(0x80 | (0x3f & *i));
    } else if (*i == 0 || *i > 0x7f) {
      outString += (string::value_type)(0xc0 | ((0x7c0 & *i) >> 6));
      outString += (string::value_type)(0x80 | (0x3f & *i));
    } else {
      outString += (string::value_type)*i;
    }
  }
  return outString;
}

//
//  Function name : StringToUnicode
//  Description : This global function takes an 8-bit string and returns the
//    equivalent unicode string.
//
unicode_string
StringToUnicode(const string& inString) {
  unicode_string outString;
  string::const_iterator i = inString.begin();
  while (i != inString.end()) {
    outString += (unicode_string::value_type)*i++;
  }
  return outString;
}

//
//  Function name : StringToUnicode
//  Description : This global function takes an 8-bit C-style string and
//    returns the equivalent unicode string.
//
unicode_string
StringToUnicode(const char* inString) {
  unicode_string outString;
  while (*inString != 0) {
    outString += (unicode_string::value_type)*inString++;
  }
  return outString;
}

//
//  Function name : StringToUnicode
//  Description : This global function takes a sequence of 8-bit characters
//    and their length and returns the equivalent unicode string.
//
unicode_string
StringToUnicode(const char* inString, size_t length) {
  unicode_string outString;
  for (int i=0 ; i < length; i++) {
    outString += (unicode_string::value_type)inString[i];
  }
  return outString;
}

//
//  Function name : UnicodeToString
//  Description : This global function takes a unicode string and hacks it
//    down to an 8-bit character string.  This is a destructive transformation,
//    so any charaters with ordinal value above 255 will be corrupted.
//    If this is a problem, you should use the non-destructive UnicodeToUTF
//    to encode in 8-bit strings.
//
string
UnicodeToString(const unicode_string& inString) {
  string outString;
  unicode_string::const_iterator i = inString.begin();
  for (; i != inString.end(); i++) {
    outString += (unicode_string::value_type)*i;
  }
  return outString;
}


//
//  Function name : Hash
//  Description : This global function allows a hash value to be generated
//    from a range of 16-bit characters.  This is done by multiplying each
//    character in the sequence by a constant and then summing them together,
//    tossing away overflow.
//
unsigned long
Hash(unicode_string::const_iterator start,
     unicode_string::const_iterator end) {
  unsigned long hash = 0;
  for (; start < end; start++) {
    hash += 0xfedc * (*start);
  }
  return hash;
}

//
//  Function name : Hash
//  Description : This global function takes a unicode string and tries to
//    provide a general-purpose 32-bit hash value from it using a relatively
//    basic hash function described above.
//
unsigned long
Hash(const unicode_string& s) {
  return ::Hash(s.begin(), s.end());
}