ports//japanese/anthy/work/anthy-9100d/src-util/dic-tool.c

/*
 * 辞書操作用のユーティリティコマンド
 *
 * 辞書のライブラリ内部の形式と外部の形式の相互変換を行う
 * 外部形式は
 * *読み 頻度 単語
 * *品詞の変数1 = 値1
 * *品詞の変数2 = 値2
 * *...
 * *<空行>
 * になる
 */
/*
 * Funded by IPA未踏ソフトウェア創造事業 2001 9/22
 *
 * Copyright (C) 2000-2007 TABATA Yusuke
 */
/*
  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <anthy/anthy.h>
#include <anthy/dicutil.h>
/**/
#include <anthy/xstr.h>
#include "config.h"

#define UNSPEC 0
#define DUMP_DIC 1
#define LOAD_DIC 2
#define APPEND_DIC 3

#define TYPETAB "typetab"
#define USAGE_TEXT "dic-tool-usage.txt"

#define USAGE \
 "Anthy-dic-util [options]\n"\
 " --help: Show this usage text\n"\
 " --version: Show version\n"\
 " --dump: Dump dictionary\n"\
 " --load: Load dictionary\n"\
 " --append: Append dictionary\n"\
 " --utf8: Use utf8 encoding\n"\
 " --personality=NAME: use NAME as a name of personality\n"


static int command = UNSPEC;
static int encoding = ANTHY_EUC_JP_ENCODING;
static FILE *fp_in;
static char *fn;
static const char *personality = "";

/* 変数名と値のペア */
struct var{
  struct var *next;
  char *var_name;
  char *val;
};

/* 品詞のパラメータから品詞名を得るためのテーブル */
struct trans_tab {
  struct trans_tab *next;
  char *type_name; /* 内部での型の名前 T35とか */
  struct var var_list; /* 型を決定するためのパラメータ */
}trans_tab_list;

static void
print_usage(void)
{
  printf(USAGE);
  exit(0);
}

static FILE *
open_typetab(void)
{
  FILE *fp;
  char *fn;
  fp = fopen(TYPETAB, "r");
  if (fp) {
    return fp;
  }
  fn = strdup(anthy_dic_util_get_anthydir());
  fn = realloc(fn, strlen(fn) + strlen(TYPETAB) + 4);
  strcat(fn, "/");
  strcat(fn, TYPETAB);
  fp = fopen(fn, "r");
  return fp;
}

static FILE *
open_usage_file(void)
{
  FILE *fp;
  /* カレントディレクトリにある場合は、それを使用する */
  fp = fopen(USAGE_TEXT, "r");
  if (!fp) {
    /* インストールされたものを使用 */
    char *fn;
    fn = strdup(anthy_dic_util_get_anthydir());
    fn = realloc(fn, strlen(fn) + strlen(USAGE_TEXT) + 10);
    strcat(fn, "/" USAGE_TEXT);
    fp = fopen(fn, "r");
  }
  return fp;
}

static void
print_usage_text(void)
{
  char buf[256];
  FILE *fp = open_usage_file();
  if (!fp) {
    printf("# Anthy-dic-tool\n#\n");
    return ;
  }
  fprintf(stdout, "#" PACKAGE " " VERSION "\n");
  if (encoding == ANTHY_UTF8_ENCODING) {
  } else {
  }
  /* そのままファイルの内容を出力 */
  while (fgets(buf, 256, fp)) {
    if (encoding == ANTHY_UTF8_ENCODING) {
      char *s;
      s = anthy_conv_euc_to_utf8(buf);
      printf("%s", s);
      free(s);
    } else {
      printf("%s", buf);
    }
  }
  fclose(fp);
}

static char *
read_line(char *buf, int len, FILE *fp)
{
  while (fgets(buf, len, fp)) {
    if (buf[0] != '#') {
      /* 改行を削除する */
      int l = strlen(buf);
      if (l > 0 && buf[l-1] == '\n') {
	buf[l-1] = 0;
      }
      if (l > 1 && buf[l-2] == '\r') {
	buf[l-1] = 0;
      }
      /**/
      return buf;
    }
  }
  return NULL;
}

static int
read_typetab_var(struct var *head, FILE *fp, int table)
{
  char buf[256];
  char var[256], eq[256], val[256];
  struct var *v;
  if (!read_line(buf, 256, fp)) {
    return -1;
  }
  if (sscanf(buf, "%s %s %s", var, eq, val) != 3) {
    return -1;
  }

  v = malloc(sizeof(struct var));
  if (encoding == ANTHY_UTF8_ENCODING && table) {
    /* UTF-8 */
    v->var_name = anthy_conv_euc_to_utf8(var);
    v->val = anthy_conv_euc_to_utf8(val);
  } else {
    /* do not change */
    v->var_name = strdup(var);
    v->val = strdup(val);
  }

  /* リストにつなぐ */
  v->next = head->next;
  head->next = v;

  return 0;
}

static int
read_typetab_entry(FILE *fp)
{
  char buf[256], type_name[257];
  char *res;
  struct trans_tab *t;
  /* 一行目の品詞名を読む */
  do {
    res = read_line(buf, 256, fp);
    if (!res) {
      return -1;
    }
  } while (res[0] == '#' || res[0] == 0);
  t = malloc(sizeof(struct trans_tab));
  sprintf(type_name, "#%s", buf);
  t->type_name = strdup(type_name);
  t->var_list.next = 0;
  /* パラメータを読む */
  while(!read_typetab_var(&t->var_list, fp, 1));
  /* リストにつなぐ */
  t->next = trans_tab_list.next;
  trans_tab_list.next = t;
  return 0;
}

static void
read_typetab(void)
{
  FILE *fp = open_typetab();
  if (!fp) {
    printf("Failed to open type table.\n");
    exit(1);
  }
  while (!read_typetab_entry(fp));
}

static struct trans_tab *
find_trans_tab_by_name(char *name)
{
  struct trans_tab *t;
  for (t = trans_tab_list.next; t; t = t->next) {
    if (!strcmp(t->type_name, name)) {
      return t;
    }
  }
  return NULL;
}

static void
print_word_type(struct trans_tab *t)
{
  struct var *v;
  for (v = t->var_list.next; v; v = v->next) {
    printf("%s\t=\t%s\n", v->var_name, v->val);
  }
}

static void
dump_dic(void)
{
  print_usage_text();
  if (anthy_priv_dic_select_first_entry() == -1) {
    printf("# Failed to read private dictionary\n"
	   "# There are no words or error occured?\n"
	   "#\n");
    return ;
  }
  do {
    char idx[100], wt[100], w[100];
    int freq;
    if (anthy_priv_dic_get_index(idx, 100) &&
	anthy_priv_dic_get_wtype(wt, 100) &&
	anthy_priv_dic_get_word(w, 100)) {
      struct trans_tab *t;
      freq = anthy_priv_dic_get_freq();
      t = find_trans_tab_by_name(wt);
      if (t) {
	printf("%s %d %s\n", idx, freq, w);
	print_word_type(t);
	printf("\n");
      } else {
	printf("# Failed to determine word type of %s(%s).\n", w, wt);
      }
    }
  } while (anthy_priv_dic_select_next_entry() == 0);
}

static void
open_input_file(void)
{
  if (!fn) {
    fp_in = stdin;
  } else {
    fp_in = fopen(fn, "r");
    if (!fp_in) {
      exit(1);
    }
  }
}

/* vが sの中にあるか */
static int
match_var(struct var *v, struct var *s)
{
  struct var *i;
  for (i = s->next; i; i = i->next) {
    if (!strcmp(v->var_name, i->var_name) &&
	!strcmp(v->val, i->val)) {
      return 1;
    }
  }
  return 0;
}

/* v1がv2の部分集合かどうか */
static int
var_list_subset_p(struct var *v1, struct var *v2)
{
  struct var *v;
  for (v = v1->next; v; v = v->next) {
    if (!match_var(v, v2)) {
      return 0;
    }
  }
  return 1;
}

static char *
find_wt(void)
{
  struct var v;
  struct trans_tab *t;
  v.next = 0;
  while(!read_typetab_var(&v, fp_in, 0));
  for (t = trans_tab_list.next; t; t = t->next) {
    if (var_list_subset_p(&t->var_list, &v) &&
	var_list_subset_p(&v, &t->var_list)) {
      return t->type_name;
    }
  }
  return NULL;
}

static int
find_head(char *yomi, char *freq, char *w)
{
  char buf[256];
  do {
    if (!read_line(buf, 256, fp_in)) {
      return -1;
    }
  } while (sscanf(buf, "%s %s %[^\n]",yomi, freq, w) != 3);
  return 0;
}

static void
load_dic(void)
{
  char yomi[256], freq[256], w[256];
  while (!find_head(yomi, freq, w)) {
    char *wt = find_wt();
    if (wt) {
      int ret;
      ret = anthy_priv_dic_add_entry(yomi, w, wt, atoi(freq));
      if (ret == -1) {
	printf("Failed to register %s\n", yomi);
      }else {
	printf("Word %s is registered as %s\n", yomi, wt);
      }
    } else {
      printf("Failed to find the type of %s.\n", yomi);
    }
  }
}

static void
print_version(void)
{
  printf("Anthy-dic-util "VERSION".\n");
  exit(0);
}

static void
parse_args(int argc, char **argv)
{
  int i;
  for (i = 1 ; i < argc ; i++) {
    if (!strncmp(argv[i], "--", 2)) {
      char *opt = &argv[i][2];
      if (!strcmp(opt, "help")) {
	print_usage();
      } else if (!strcmp(opt, "version")){
	print_version();
      } else if (!strcmp(opt, "dump")) {
	command = DUMP_DIC;
      } else if (!strcmp(opt,"append") ){
	command = APPEND_DIC;
      } else if (!strncmp(opt, "personality=", 12)) {
	personality = &opt[12];
      } else if (!strcmp(opt, "utf8")) {
	encoding = ANTHY_UTF8_ENCODING;
      } else if (!strcmp(opt, "eucjp")) {
	encoding = ANTHY_EUC_JP_ENCODING;
      } else if (!strcmp(opt, "load")) {
	command = LOAD_DIC;
      }
    }else{
      fn = argv[i];
    }
  }
}

static void
init_lib(void)
{
  anthy_dic_util_init();
  anthy_dic_util_set_encoding(encoding);
  read_typetab();
}

int
main(int argc,char **argv)
{
  fp_in = stdin;
  parse_args(argc, argv);

  switch (command) {
  case DUMP_DIC:
    init_lib();
    dump_dic();
    break;
  case LOAD_DIC:
    init_lib();
    anthy_priv_dic_delete();
    open_input_file();
    load_dic();
    break;
  case APPEND_DIC:
    init_lib();
    open_input_file();
    load_dic();
    break;
  case UNSPEC:
  default:
    print_usage();
  }
  return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1