/*
 * 文字(xchar)のタイプなどを扱う
 *
 * Copyright (C) 2001-2006 TABATA Yusuke
 */
#include <string.h>
#include "config.h"

#include <anthy/xstr.h>
#include <anthy/xchar.h>

#include "diclib_inner.h"

#define PAGE_SIZE 128
#define NR_PAGES 512
#include "e2u.h"
#include "u2e.h"

/* this use UCS4 */
static struct xchar_ent {
  const xchar xc;
  const int type;
  struct xchar_ent *next;/* hash chain */
} xchar_tab[] =
{
  {0x309b, XCT_CLOSE, 0}, /* ” */
  {0xff08, XCT_OPEN, 0}, /* ( */
  {0xff09, XCT_CLOSE, 0}, /* ) */
  {0x3014, XCT_OPEN, 0},  /* 〔 */
  {0x3015, XCT_CLOSE, 0}, /* 〕 */
  {0xff3b, XCT_OPEN, 0}, /* [ */
  {0xff3d, XCT_CLOSE, 0}, /* ] */
  {0xff5b, XCT_OPEN, 0},  /* { */
  {0xff5d, XCT_CLOSE, 0},  /* } */
  {0x3008, XCT_OPEN, 0},  /* < */
  {0x3009, XCT_CLOSE, 0},  /* > */
  {0x300a, XCT_OPEN, 0},  /* 《 */
  {0x300b, XCT_CLOSE, 0},  /* 》 */
  {0x300c, XCT_OPEN, 0},  /* 「 */
  {0x300d, XCT_CLOSE, 0},  /* 」 */
  {0x300e, XCT_OPEN, 0},  /* 『 */
  {0x300f, XCT_CLOSE, 0},  /* 』 */
  {0x3010, XCT_OPEN, 0},  /* 【 */
  {0x3011, XCT_CLOSE, 0},  /* 】 */
  {0x3001, XCT_PUNCTUATION, 0},  /* 、 */
  {0x3002, XCT_PUNCTUATION, 0},  /* 。 */
  {0xff0c, XCT_PUNCTUATION, 0},  /* , */
  {0xff0e, XCT_PUNCTUATION, 0},  /* . */
  {0xff1f, XCT_PUNCTUATION, 0},  /* ? */
  {0xff01, XCT_PUNCTUATION, 0},  /* ! */

  {28, XCT_OPEN, 0}, /* ( */
  {133, XCT_OPEN, 0}, /* [ */
  {29, XCT_CLOSE, 0}, /* ) */
  {135, XCT_CLOSE, 0}, /* ] */
  {HK_TO, XCT_DEP, 0},/* と */
  {HK_HA, XCT_DEP, 0},/* は */
  {HK_NO, XCT_DEP, 0},/* の */
  {HK_NI, XCT_DEP, 0},/* に */
  {HK_GA, XCT_DEP, 0},/* が */
  {HK_WO, XCT_DEP, 0},/* を */
  {WIDE_0, XCT_WIDENUM, 0},
  {WIDE_1, XCT_WIDENUM, 0},
  {WIDE_2, XCT_WIDENUM, 0},
  {WIDE_3, XCT_WIDENUM, 0},
  {WIDE_4, XCT_WIDENUM, 0},
  {WIDE_5, XCT_WIDENUM, 0},
  {WIDE_6, XCT_WIDENUM, 0},
  {WIDE_7, XCT_WIDENUM, 0},
  {WIDE_8, XCT_WIDENUM, 0},
  {WIDE_9, XCT_WIDENUM, 0},
  {HK_DDOT, XCT_PART, 0},
  {HK_XA, XCT_PART, 0},
  {HK_XI, XCT_PART, 0},
  {HK_XU, XCT_PART, 0},
  {HK_XE, XCT_PART, 0},
  {HK_XO, XCT_PART, 0},
  {HK_XYA, XCT_PART, 0},
  {HK_XYU, XCT_PART, 0},
  {HK_XYO, XCT_PART, 0},
  {HK_TT, XCT_PART, 0},
  {0, 0, 0},
};

#define DDOT 0x8ede
#define CIRCLE 0x8edf

static const struct half_kana_table half_kana_tab[] = {
  {HK_A,0x8eb1,0},
  {HK_I,0x8eb2,0},
  {HK_U,0x8eb3,0},
  {HK_E,0x8eb4,0},
  {HK_O,0x8eb5,0},
  {HK_KA,0x8eb6,0},
  {HK_KI,0x8eb7,0},
  {HK_KU,0x8eb8,0},
  {HK_KE,0x8eb9,0},
  {HK_KO,0x8eba,0},
  {HK_SA,0x8ebb,0},
  {HK_SI,0x8ebc,0},
  {HK_SU,0x8ebd,0},
  {HK_SE,0x8ebe,0},
  {HK_SO,0x8ebf,0},
  {HK_TA,0x8ec0,0},
  {HK_TI,0x8ec1,0},
  {HK_TU,0x8ec2,0},
  {HK_TE,0x8ec3,0},
  {HK_TO,0x8ec4,0},
  {HK_NA,0x8ec5,0},
  {HK_NI,0x8ec6,0},
  {HK_NU,0x8ec7,0},
  {HK_NE,0x8ec8,0},
  {HK_NO,0x8ec9,0},
  {HK_HA,0x8eca,0},
  {HK_HI,0x8ecb,0},
  {HK_HU,0x8ecc,0},
  {HK_HE,0x8ecd,0},
  {HK_HO,0x8ece,0},
  {HK_MA,0x8ecf,0},
  {HK_MI,0x8ed0,0},
  {HK_MU,0x8ed1,0},
  {HK_ME,0x8ed2,0},
  {HK_MO,0x8ed3,0},
  {HK_YA,0x8ed4,0},
  {HK_YU,0x8ed5,0},
  {HK_YO,0x8ed6,0},
  {HK_RA,0x8ed7,0},
  {HK_RI,0x8ed8,0},
  {HK_RU,0x8ed9,0},
  {HK_RE,0x8eda,0},
  {HK_RO,0x8edb,0},
  {HK_WA,0x8edc,0},
  {HK_WI,0,0},
  {HK_WE,0,0},
  {HK_WO,0x8ea6,0},
  {HK_N,0x8edd,0},
  {HK_TT,0x8eaf,0},
  {HK_XA,0x8ea7,0},
  {HK_XI,0x8ea8,0},
  {HK_XU,0x8ea9,0},
  {HK_XE,0x8eaa,0},
  {HK_XO,0x8eab,0},
  {HK_GA,0x8eb6,DDOT},
  {HK_GI,0x8eb7,DDOT},
  {HK_GU,0x8eb8,DDOT},
  {HK_GE,0x8eb9,DDOT},
  {HK_GO,0x8eba,DDOT},
  {HK_ZA,0x8ebb,DDOT},
  {HK_ZI,0x8ebc,DDOT},
  {HK_ZU,0x8ebd,DDOT},
  {HK_ZE,0x8ebe,DDOT},
  {HK_ZO,0x8ebf,DDOT},
  {HK_DA,0x8ec0,DDOT},
  {HK_DI,0x8ec1,DDOT},
  {HK_DU,0x8ec2,DDOT},
  {HK_DE,0x8ec3,DDOT},
  {HK_DO,0x8ec4,DDOT},
  {HK_BA,0x8eca,DDOT},
  {HK_BI,0x8ecb,DDOT},
  {HK_BU,0x8ecc,DDOT},
  {HK_BE,0x8ecd,DDOT},
  {HK_BO,0x8ece,DDOT},
  {HK_PA,0x8eca,CIRCLE},
  {HK_PI,0x8ecb,CIRCLE},
  {HK_PU,0x8ecc,CIRCLE},
  {HK_PE,0x8ecd,CIRCLE},
  {HK_PO,0x8ece,CIRCLE},
  {HK_XYA,0x8eac,0},
  {HK_XYU,0x8ead,0},
  {HK_XYO,0x8eae,0},
  {HK_XWA,0,0},
  {HK_DDOT,DDOT,0},
  {HK_BAR,0x8eb0,0},
  {0,0,0}
};

static const struct half_wide_ent {
  const xchar half;
  const xchar wide;
} half_wide_tab[] = {
  {'!', 0xff01},
  {'\"', 0x201d},
  {'#', 0xff03},
  {'$', 0xff04},
  {'%', 0xff05},
  {'&', 0xff06},
  {'\'', 0x2019},
  {'(', 0xff08},
  {')', 0xff09},
  {'*', 0xff0a},
  {'+', 0xff0b},
  {',', 0xff0c},
  {'-', 0xff0d},
  {'.', 0xff0e},
  {'/', 0xff0f},
  {':', 0xff1a},
  {';', 0xff1b},
  {'<', 0xff1c},
  {'=', 0xff1d},
  {'>', 0xff1e},
  {'?', 0xff1f},
  {'@', 0xff20},
  {'[', 0xff3b},
  {'\\', 0xff3c},
  {']', 0xff3d},
  {'^', 0xff3e},
  {'_', 0xff3f},
  {'`', 0xff40},
  {'{', 0xff5b},
  {'|', 0xff5c},
  {'}', 0xff5d},
  {'~', 0xff5e},
  {0, 0}
};

xchar
anthy_lookup_half_wide(xchar xc)
{
  const struct half_wide_ent *hw;
  for (hw = half_wide_tab; hw->half; hw ++) {
    if (hw->half == xc) {
      return hw->wide;
    }
    if (hw->wide == xc) {
      return hw->half;
    }
  }
  return 0;
}

const struct half_kana_table *
anthy_find_half_kana(xchar xc)
{
  const struct half_kana_table *tab;
  for (tab = half_kana_tab; tab->src; tab ++) {
    if (tab->src == xc && tab->dst) {
      return tab;
    }
  }
  return NULL;
}

static int
find_xchar_type(xchar xc)
{
  struct xchar_ent *xe = xchar_tab;

  for (; xe->xc; xe++) {
    if (xe->xc == xc) {
      return xe->type;
    }
  }

  return XCT_NONE;
}

static int
is_hira(xchar xc)
{
  if (xc == HK_DDOT) {
    return 1;
  }
  if (xc == HK_BAR) {
    return 1;
  }
  xc = anthy_ucs_to_euc(xc);
  if ((xc & 0xff00) == 0xa400) {
    return 1;
  }
  return 0;
}

static int
is_kata(xchar xc)
{
  if (xc == HK_BAR) {
    return 1;
  }
  xc = anthy_ucs_to_euc(xc);
  if ((xc & 0xff00) == 0xa500) {
    return 1;
  }
  return 0;
}

static int
is_symbol(xchar xc)
{
  if (xc == UCS_GETA) {
    return 1;
  }
  xc = anthy_ucs_to_euc(xc);
  if (xc == EUC_GETA) {
    return 0;
  }
  if ((xc & 0xff00) == 0xa100) {
    return 1;
  }
  if ((xc & 0xff00) == 0xa200) {
    return 1;
  }
  return 0;
}

static int
is_kanji(xchar xc)
{
  if (xc > 0x4e00 && xc < 0xa000) {
    return 1;
  }
  return 0;
}

static int
search(const int *tab[], int v, int geta)
{
  int page = v / PAGE_SIZE;
  int off = v % PAGE_SIZE;
  const int *t;
  if (page >= NR_PAGES) {
    return geta;
  }
  t = tab[page];
  if (!t) {
    return geta;
  }
  if (!t[off] && v) {
    return geta;
  }
  return t[off];
}

int
anthy_euc_to_ucs(int ec)
{
  return search(e2u_index, ec, UCS_GETA);
}

int
anthy_ucs_to_euc(int uc)
{
  int r = search(u2e_index, uc, EUC_GETA);
  if (r > 65536) {
    return EUC_GETA;
  }
  return r;
}

int
anthy_get_xchar_type(const xchar xc)
{
  int t = find_xchar_type(xc);
  if (xc > 47 && xc < 58) {
    t |= XCT_NUM;
  }
  if (xc < 128) {
    t |= XCT_ASCII;
  }
  if (is_hira(xc)) {
    t |= XCT_HIRA;
  }
  if (is_kata(xc)) {
    t |= XCT_KATA;
  }
  if (is_symbol(xc)) {
    if (!(t & XCT_OPEN) && !(t & XCT_CLOSE)) {
      t |= XCT_SYMBOL;
    }
  }
  if (is_kanji(xc)) {
    t |= XCT_KANJI;
  }
  return t;
}

int
anthy_get_xstr_type(const xstr *xs)
{
  int i, t = XCT_ALL;
  for (i = 0; i < xs->len; i++) {
    t &= anthy_get_xchar_type(xs->str[i]);
  }
  return t;
}

int
anthy_xchar_to_num(xchar xc)
{
  switch (xc) {
  case WIDE_0:return 0;
  case WIDE_1:return 1;
  case WIDE_2:return 2;
  case WIDE_3:return 3;
  case WIDE_4:return 4;
  case WIDE_5:return 5;
  case WIDE_6:return 6;
  case WIDE_7:return 7;
  case WIDE_8:return 8;
  case WIDE_9:return 9;
  }
  if (xc >= '0' && xc <= '9') {
    return xc - (int)'0';
  }
  return -1;
}

xchar
anthy_xchar_wide_num_to_num(xchar c)
{
  switch (c) {
  case WIDE_0:return '0';
  case WIDE_1:return '1';
  case WIDE_2:return '2';
  case WIDE_3:return '3';
  case WIDE_4:return '4';
  case WIDE_5:return '5';
  case WIDE_6:return '6';
  case WIDE_7:return '7';
  case WIDE_8:return '8';
  case WIDE_9:return '9';
  default:return c;
  }
}

void
anthy_init_xchar_tab(void)
{
}


syntax highlighted by Code2HTML, v. 0.9.1