/*
* 変換や文節の伸縮などの操作が進行中の文字列や候補などを
* まとめて変換コンテキストと呼ぶ。
* Anthyのコンテキストに対する操作は全てここから呼ばれる。
* 各操作に対して変換パイプラインの必要なモジュールを順に呼びだす。
*
* personalityの管理もする。
*
* Funded by IPA未踏ソフトウェア創造事業 2001 10/29
* Copyright (C) 2000-2007 TABATA Yusuke
*
* $Id: context.c,v 1.26 2002/11/17 14:45:47 yusuke Exp $
*/
/*
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <anthy/anthy.h>
#include <anthy/alloc.h>
#include <anthy/record.h>
#include <anthy/ordering.h>
#include <anthy/splitter.h>
#include <anthy/xstr.h>
#include "main.h"
/**/
static allocator context_ator;
/** 現在のpersonality
* 未設定時: null
* 未設定のまま変換を開始した場合: "default"
* anonymousの場合: ""
*/
static char *current_personality;
/**/
#define HISTORY_FILE_LIMIT 100000
static void
context_dtor(void *p)
{
anthy_do_reset_context((struct anthy_context *)p);
}
/** 現在のpersonalityを返す */
static char *
get_personality(void)
{
if (!current_personality) {
current_personality = strdup("default");
anthy_dic_set_personality(current_personality);
}
return current_personality;
}
static void
release_segment(struct seg_ent *s)
{
if (s->cands) {
int i;
for (i = 0; i < s->nr_cands; i++) {
anthy_release_cand_ent(s->cands[i]);
}
free (s->cands);
}
if (s->mw_array) {
free(s->mw_array);
}
free(s);
}
/** 文節リストの最後の要素を削除する */
static void
pop_back_seg_ent(struct anthy_context *c)
{
struct seg_ent *s;
s = c->seg_list.list_head.prev;
if (s == &c->seg_list.list_head) {
return ;
}
s->prev->next = s->next;
s->next->prev = s->prev;
release_segment(s);
c->seg_list.nr_segments --;
}
/** n番目の文節の文字のindexを求める */
static int
get_nth_segment_index(struct anthy_context *c, int n)
{
int i,s;
for (i = 0, s = 0; i < c->str.len; i++) {
if (c->split_info.ce[i].seg_border) {
if (s == n) {
return i;
}
s++;
}
}
return -1;
}
/** n番目の文節の長さを求める.
* segment_listが構成されていなくても計算できるようにする.
*/
static int
get_nth_segment_len(struct anthy_context *c, int sindex)
{
int a,i,l;
a = get_nth_segment_index(c, sindex);
if ( a == -1){
return -1;
}
l = 1;
for (i = a+1; !c->split_info.ce[i].seg_border; i++) {
l++;
}
return l;
}
/** metawordの配列を作る */
static void
make_metaword_array(struct anthy_context *ac,
struct seg_ent *se)
{
int i;
se->mw_array = NULL;
for (i = se->len; i > 0; i--) {
int j;
/* 最後に濁点とかがついてたら直前の文字ごと落す */
if (i < se->len &&
anthy_get_xchar_type(se->str.str[i]) & XCT_PART) {
/* FIXME 濁点とかがありえない並びをしてたら */
i--;
continue ;
}
se->nr_metaword = anthy_get_nr_metaword(&ac->split_info, se->from, i);
if (!se->nr_metaword) {
continue ;
}
/* metawordを配列に取り込む */
se->mw_array = malloc(sizeof(struct meta_word*) * se->nr_metaword);
for (j = 0; j < se->nr_metaword; j++) {
se->mw_array[j] = anthy_get_nth_metaword(&ac->split_info, se->from, i, j);
}
return;
}
}
static struct seg_ent*
create_segment(struct anthy_context *ac, int from, int len,
struct meta_word* best_mw)
{
struct seg_ent* s;
s = (struct seg_ent *)malloc(sizeof(struct seg_ent));
s->str.str = &ac->str.str[from];
s->str.len = len;
s->from = from;
s->len = s->str.len;
s->nr_cands = 0;
s->cands = NULL;
s->best_seg_class = ac->split_info.ce[from].best_seg_class;
s->best_mw = best_mw;
make_metaword_array(ac, s);
return s;
}
/** 変換コンテキストに文節を追加する */
static void
push_back_segment(struct anthy_context *ac, struct seg_ent *se)
{
se->next = &ac->seg_list.list_head;
se->prev = ac->seg_list.list_head.prev;
ac->seg_list.list_head.prev->next = se;
ac->seg_list.list_head.prev = se;
ac->seg_list.nr_segments ++;
se->committed = -1;
}
/** splitterによって配列中に付けられた文節境界のマークから、
* 文節のリストを構成する
*/
static void
create_segment_list(struct anthy_context *ac, int from, int to)
{
int i, n;
struct seg_ent *s;
/* from の所までにいくつの文節があるか調べる */
i = 0; n = 0;
while (i < from) {
i += get_nth_segment_len(ac, n);
n++;
};
/**/
for (i = from; i < to; i++) {
if (ac->split_info.ce[i].seg_border) {
int len = get_nth_segment_len(ac, n);
s = create_segment(ac, i, len, ac->split_info.ce[i].best_mw);
push_back_segment(ac, s);
n++;
}
}
}
/** コンテキストを作る */
struct anthy_context *
anthy_do_create_context(int encoding)
{
struct anthy_context *ac;
char *p = get_personality();
if (!p) {
return NULL;
}
ac = (struct anthy_context *)anthy_smalloc(context_ator);
ac->str.str = NULL;
ac->str.len = 0;
ac->seg_list.nr_segments = 0;
ac->seg_list.list_head.prev = &ac->seg_list.list_head;
ac->seg_list.list_head.next = &ac->seg_list.list_head;
ac->split_info.word_split_info = NULL;
ac->split_info.ce = NULL;
ac->ordering_info.oc = NULL;
ac->dic_session = NULL;
ac->prediction.str.str = NULL;
ac->prediction.str.len = 0;
ac->prediction.nr_prediction = 0;
ac->prediction.predictions = NULL;
ac->encoding = encoding;
ac->reconversion_mode = ANTHY_RECONVERT_AUTO;
return ac;
}
/** コンテキストのアロケータを作る */
void
anthy_init_contexts(void)
{
context_ator = anthy_create_allocator(sizeof(struct anthy_context),
context_dtor);
}
void
anthy_quit_contexts(void)
{
anthy_free_allocator(context_ator);
}
static void
release_prediction(struct prediction_cache *pc)
{
int i;
if (pc->str.str) {
free(pc->str.str);
pc->str.str = NULL;
}
if (pc->predictions) {
for (i = 0; i < pc->nr_prediction; ++i) {
anthy_free_xstr(pc->predictions[i].src_str);
anthy_free_xstr(pc->predictions[i].str);
}
free(pc->predictions);
pc->predictions = NULL;
}
}
void
anthy_release_segment_list(struct anthy_context *ac)
{
int i, sc;
sc = ac->seg_list.nr_segments;
for (i = 0; i < sc; i++) {
pop_back_seg_ent(ac);
}
ac->seg_list.nr_segments = 0;
}
/* resetではcontextのために確保されたリソースを全て解放する */
void
anthy_do_reset_context(struct anthy_context *ac)
{
/* まず辞書セッションを解放 */
if (ac->dic_session) {
anthy_dic_release_session(ac->dic_session);
ac->dic_session = NULL;
}
if (!ac->str.str) {
/* 文字列が設定されていなければ解放すべき物はもう無い */
return ;
}
free(ac->str.str);
ac->str.str = NULL;
anthy_release_split_context(&ac->split_info);
anthy_release_segment_list(ac);
/* 予測された文字列の解放 */
release_prediction(&ac->prediction);
}
void
anthy_do_release_context(struct anthy_context *ac)
{
anthy_sfree(context_ator, ac);
}
static void
make_candidates(struct anthy_context *ac, int from, int from2, int is_reverse)
{
int i;
int len = ac->str.len;
/* 文節の境界を設定 */
/* from と from2の間に境界を作ることを禁止する */
anthy_mark_border(&ac->split_info, from, from2, len);
create_segment_list(ac, from, len);
anthy_sort_metaword(&ac->seg_list);
/* 候補を列挙 */
for (i = 0; i < ac->seg_list.nr_segments; i++) {
anthy_do_make_candidates(&ac->split_info,
anthy_get_nth_segment(&ac->seg_list, i),
is_reverse);
}
/* 候補をソート */
anthy_sort_candidate(&ac->seg_list, 0);
}
int
anthy_do_context_set_str(struct anthy_context *ac, xstr *s, int is_reverse)
{
int i;
/* 文字列をコピー(一文字分余計にして0をセット) */
ac->str.str = (xchar *)malloc(sizeof(xchar)*(s->len+1));
anthy_xstrcpy(&ac->str, s);
ac->str.str[s->len] = 0;
/* splitterの初期化*/
anthy_init_split_context(&ac->str, &ac->split_info, is_reverse);
/* 解の候補を作成 */
make_candidates(ac, 0, 0, is_reverse);
/* 最初に設定した文節境界を覚えておく */
for (i = 0; i < ac->seg_list.nr_segments; i++) {
struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
ac->split_info.ce[s->from].initial_seg_len = s->len;
}
return 0;
}
void
anthy_do_resize_segment(struct anthy_context *ac,
int nth, int resize)
{
int i;
int index, len, sc;
/* resizeが可能か検査する */
if (nth >= ac->seg_list.nr_segments) {
return ;
}
index = get_nth_segment_index(ac, nth);
len = get_nth_segment_len(ac, nth);
if (index + len + resize > ac->str.len) {
return ;
}
if (len + resize < 1) {
return ;
}
/* nth以降のseg_entを解放する */
sc = ac->seg_list.nr_segments;
for (i = nth; i < sc; i++) {
pop_back_seg_ent(ac);
}
/* resizeしたseg_borderをマークする */
/* 現在のマークを消して新しいマークをつける */
ac->split_info.ce[index+len].seg_border = 0;
ac->split_info.ce[ac->str.len].seg_border = 1;
for (i = index+len+resize+1; i < ac->str.len; i++) {
ac->split_info.ce[i].seg_border = 0;
}
ac->split_info.ce[index+len+resize].seg_border = 1;
for (i = index; i < ac->str.len; i++) {
ac->split_info.ce[i].best_mw = NULL;
}
/* 解の候補を作成 */
make_candidates(ac, index, index+len+resize, 0);
}
/*
* n番めの文節を取得する、無い場合にはNULLを返す
*/
struct seg_ent *
anthy_get_nth_segment(struct segment_list *sl, int n)
{
int i;
struct seg_ent *se;
if (n >= sl->nr_segments ||
n < 0) {
return NULL;
}
for (i = 0, se = sl->list_head.next; i < n; i++, se = se->next);
return se;
}
int
anthy_do_set_prediction_str(struct anthy_context *ac, xstr* xs)
{
struct prediction_cache* prediction = &ac->prediction;
int nr_prediction;
/* まず辞書セッションを解放 */
if (ac->dic_session) {
anthy_dic_release_session(ac->dic_session);
ac->dic_session = NULL;
}
/* 予測された文字列の解放 */
release_prediction(&ac->prediction);
/* 辞書セッションの開始 */
if (!ac->dic_session) {
ac->dic_session = anthy_dic_create_session();
if (!ac->dic_session) {
return -1;
}
}
prediction->str.str = (xchar*)malloc(sizeof(xchar*)*(xs->len+1));
anthy_xstrcpy(&prediction->str, xs);
prediction->str.str[xs->len]=0;
nr_prediction = anthy_traverse_record_for_prediction(xs, NULL);
prediction->nr_prediction = nr_prediction;
if (nr_prediction) {
prediction->predictions = (struct prediction_t*)malloc(sizeof(struct prediction_t) *
nr_prediction);
anthy_traverse_record_for_prediction(xs, prediction->predictions);
}
return 0;
}
static const char *
get_change_state(struct anthy_context *ac)
{
int resize = 0, cand_change = 0;
int i;
for (i = 0; i < ac->seg_list.nr_segments; i++) {
struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
if (ac->split_info.ce[s->from].initial_seg_len != s->len) {
resize = 1;
}
if (s->committed > 0) {
cand_change = 1;
}
}
/**/
if (resize && cand_change) {
return "SC";
}
if (resize) {
return "S";
}
if (cand_change) {
return "C";
}
return "-";
}
static void
write_history(FILE *fp, struct anthy_context *ac)
{
int i;
/* 読み */
fprintf(fp, "|");
for (i = 0; i < ac->seg_list.nr_segments; i++) {
struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
char *c = anthy_xstr_to_cstr(&s->str, ANTHY_EUC_JP_ENCODING);
fprintf(fp, "%s|", c);
free(c);
}
fprintf(fp, " |");
/* 結果 */
for (i = 0; i < ac->seg_list.nr_segments; i++) {
struct seg_ent *s = anthy_get_nth_segment(&ac->seg_list, i);
char *c;
/**/
if (s->committed < 0) {
fprintf(fp, "?|");
continue ;
}
c = anthy_xstr_to_cstr(&s->cands[s->committed]->str,
ANTHY_EUC_JP_ENCODING);
fprintf(fp, "%s|", c);
free(c);
}
}
void
anthy_save_history(const char *fn, struct anthy_context *ac)
{
FILE *fp;
struct stat st;
if (!fn) {
return ;
}
fp = fopen(fn, "a");
if (!fp) {
return ;
}
if (stat(fn, &st) ||
st.st_size > HISTORY_FILE_LIMIT) {
fclose(fp);
return ;
}
/**/
fprintf(fp, "anthy-%s ", anthy_get_version_string());
fprintf(fp, "%s ", get_change_state(ac));
write_history(fp, ac);
fprintf(fp, "\n");
fclose(fp);
/**/
chmod(fn, S_IREAD | S_IWRITE);
}
/** 候補を表示する */
void
anthy_print_candidate(struct cand_ent *ce)
{
int mod = (ce->score % 1000);
int seg_score = 0;
if (ce->mw) {
seg_score = ce->mw->score;
}
anthy_putxstr(&ce->str);
printf(":(");
/*if (ce->nr_words == 1) {printf("%d,", ce->elm[0].id); }*/
if (ce->flag & CEF_OCHAIRE) {
putchar('o');
}
if (ce->flag & CEF_SINGLEWORD) {
putchar('1');
}
if (ce->flag & CEF_GUESS) {
putchar('g');
}
if (ce->flag & (CEF_KATAKANA | CEF_HIRAGANA)) {
putchar('N');
}
if (ce->flag & CEF_USEDICT) {
putchar('U');
}
if (ce->flag & CEF_CONTEXT) {
putchar('C');
}
printf(",%d,", seg_score);
if (ce->mw) {
printf("%s,%d", anthy_seg_class_sym(ce->mw->seg_class),
ce->mw->struct_score);
} else {
putchar('-');
}
printf(")");
if (ce->score >= 1000) {
printf("%d,", ce->score/1000);
if (mod < 100) {
printf("0");
}
if (mod < 10) {
printf("0");
}
printf("%d ", mod);
} else {
printf("%d ", ce->score);
}
}
/** 文節を表示する */
static void
print_segment(struct seg_ent *e)
{
int i;
anthy_putxstr(&e->str);
printf("(");
for ( i = 0 ; i < e->nr_cands ; i++) {
anthy_print_candidate(e->cands[i]);
printf(",");
}
printf(")");
printf(":\n");
}
/** コンテキストを表示する */
void
anthy_do_print_context(struct anthy_context *ac, int encoding)
{
int i;
struct char_ent *ce;
anthy_xstr_set_print_encoding(encoding);
ce = ac->split_info.ce;
if (!ce) {
printf("(invalid)\n");
return ;
}
/* 各文字を表示する */
for (i = 0, ce = ac->split_info.ce; i < ac->str.len; i++, ce++) {
if (ce->seg_border) {
printf("|");
}
anthy_putxchar(*(ce->c));
}
printf("\n");
/* 各文節を表示する */
for (i = 0; i < ac->seg_list.nr_segments; i++) {
print_segment(anthy_get_nth_segment(&ac->seg_list, i));
}
printf("\n");
}
void
anthy_release_cand_ent(struct cand_ent *ce)
{
if (ce->elm) {
free(ce->elm);
}
if (&ce->str) {
anthy_free_xstr_str(&ce->str);
}
free(ce);
}
int
anthy_do_set_personality(const char *id)
{
if (current_personality) {
/* すでに設定されてる */
return -1;
}
if (!id || strchr(id, '/')) {
return -1;
}
current_personality = strdup(id);
anthy_dic_set_personality(current_personality);
return 0;
}
void
anthy_init_personality(void)
{
current_personality = NULL;
}
void
anthy_quit_personality(void)
{
if (current_personality) {
free(current_personality);
current_personality = NULL;
}
}
syntax highlighted by Code2HTML, v. 0.9.1