/* EIMILTextUtil.c EIMIL mtext Manager. */ #include #include #include #include #include "EIMILint.h" /* NOTICE!!! IMTextUtil.c set native_chars in UTF-32 format, and set encoding to UTF32_CODESET. All of the functions below except Normalize_IMText() treat IMText as UTF-32 based string. Therefore, you cannot use it as an external representation before calling Normalize_IMText(). EIMIL_convert_mtext_to_IMText convert text.naitive_chars, which is encoded in UTF-32, into text.utf_chars in UTF-16; recaliculate start_pos and end_pos of IMAnnotationValue; and properly fill IMFeedBack from the original value and 'feedback attribute. */ #ifndef MIN #define MIN(x, y) ((x) < (y) ? (x) : (y)) #endif #ifndef MAX #define MAX(x, y) ((x) > (y) ? (x) : (y)) #endif #define EIMIL_INTERVAL_OVERLAP_P(s1, e1, s2, e2) (MIN(e1, e2) > MAX(s1, s2)) #define UTF16_S_P(v) ((v & 0xF800) == 0xD800) #define UTF16_HS_P(v) ((v & 0xFC00) == 0xD800) #define UTF16_LS_P(v) ((v & 0xFC00) == 0xDC00) int EIMIL_UTF32_string_len( const UTF32 *pstr ) { int i; for (i = 0;*pstr;pstr++, i++); return i; } int EIMIL_UTF32_strcmp( const UTF32 *pstr1, const UTF32 *pstr2 ) { for (;; pstr1++, pstr2++) { if (*pstr1 > *pstr2) return 1; if (*pstr1 < *pstr2) return -1; if (*pstr1 == 0) return 0; } return 0; } int EIMIL_convert_UTF32char_to_UTF8( UTF32 ch, UTF8 *p ) { if (ch < 0x80) { *p++ = ch; return 1; } else if (ch < 0x800) { *p++ = (ch >> 6) | 0xC0; *p++ = (ch & 0x3F) | 0x80; return 2; } else if (ch < 0x10000) { *p++ = (ch >> 12) | 0xE0; *p++ = ((ch >> 6) & 0x3F) | 0x80; *p++ = (ch & 0x3F) | 0x80; return 3; } else if (ch < 0x200000) { *p++ = (ch >> 18) | 0xF0; *p++ = ((ch >> 12) & 0x3F) | 0x80; *p++ = ((ch >> 6) & 0x3F) | 0x80; *p++ = (ch & 0x3F) | 0x80; return 4; } else if (ch < 0x4000000) { /* actually, UTF-32 forbids the area over 0x10FFFF. */ *p++ = (ch >> 24) | 0xF8; *p++ = ((ch >> 18) & 0x3F) | 0x80; *p++ = ((ch >> 12) & 0x3F) | 0x80; *p++ = ((ch >> 6) & 0x3F) | 0x80; *p++ = (ch & 0x3F) | 0x80; return 5; } else { /* actually, UTF-32 forbids the area over 0x10FFFF. */ *p++ = ((ch >> 30) & 0x1) | 0xFC; *p++ = ((ch >> 24) & 0x3F) | 0x80; *p++ = ((ch >> 18) & 0x3F) | 0x80; *p++ = ((ch >> 12) & 0x3F) | 0x80; *p++ = ((ch >> 6) & 0x3F) | 0x80; *p++ = (ch & 0x3F) | 0x80; return 6; } return 0; } UTF8* EIMIL_convert_UTF32_to_UTF8( const UTF32 *putf32 ) { int n; const UTF32 *pu32; UTF8 *ps, *p; for (n = 0, pu32 = putf32; *pu32; pu32++) { if (*pu32 < 0x80) n++; else if (*pu32 < 0x800) n += 2; else if (*pu32 < 0x10000) n += 3; else if (*pu32 < 0x200000) n += 4; else if (*pu32 < 0x4000000) n += 5; else n += 6; } ps = p = (UTF8*) malloc(sizeof(UTF8) * (n + 1)); if (!ps) return NULL; for (; *putf32; putf32++) { if (*putf32 < 0x80) { *p++ = *putf32; } else if (*putf32 < 0x800) { *p++ = (*putf32 >> 6) | 0xC0; *p++ = (*putf32 & 0x3F) | 0x80; } else if (*putf32 < 0x10000) { *p++ = (*putf32 >> 12) | 0xE0; *p++ = ((*putf32 >> 6) & 0x3F) | 0x80; *p++ = (*putf32 & 0x3F) | 0x80; } else if (*putf32 < 0x200000) { *p++ = (*putf32 >> 18) | 0xF0; *p++ = ((*putf32 >> 12) & 0x3F) | 0x80; *p++ = ((*putf32 >> 6) & 0x3F) | 0x80; *p++ = (*putf32 & 0x3F) | 0x80; } else if (*putf32 < 0x4000000) { /* actually, UTF-32 forbids the area over 0x10FFFF. */ *p++ = (*putf32 >> 24) | 0xF8; *p++ = ((*putf32 >> 18) & 0x3F) | 0x80; *p++ = ((*putf32 >> 12) & 0x3F) | 0x80; *p++ = ((*putf32 >> 6) & 0x3F) | 0x80; *p++ = (*putf32 & 0x3F) | 0x80; } else { /* actually, UTF-32 forbids the area over 0x10FFFF. */ *p++ = ((*putf32 >> 30) & 0x1) | 0xFC; *p++ = ((*putf32 >> 24) & 0x3F) | 0x80; *p++ = ((*putf32 >> 18) & 0x3F) | 0x80; *p++ = ((*putf32 >> 12) & 0x3F) | 0x80; *p++ = ((*putf32 >> 6) & 0x3F) | 0x80; *p++ = (*putf32 & 0x3F) | 0x80; } } *p = 0; return ps; } int EIMIL_convert_UTF8_to_UTF32char( const UTF8 *p, UTF32 *pch ) { if (!p) return 0; if (*p < 0x80) { if (pch) *pch = *p; return 1; } else if (*p < 0xE0) { if (pch) *pch = (((p[0] & 0x1F) << 6) | (p[1] & 0x3F)); return 2; } else if (*p < 0xF0) { if (pch) *pch = (((p[0] & 0x0F) << 12) | ((p[1] & 0x3F) << 6) | (p[2] & 0x3F)); return 3; } else if (*p < 0xF8) { if (pch) *pch = (((p[0] & 0x07) << 18) | ((p[1] & 0x3F) << 12) | ((p[2] & 0x3F) << 6) | (p[3] & 0x3F)); return 4; } else if (*p < 0xFC) { if (pch) *pch = (((p[0] & 0x03) << 24) | ((p[1] & 0x3F) << 18) | ((p[2] & 0x3F) << 12) | ((p[3] & 0x3F) << 6) | (p[4] & 0x3F)); return 5; } else { if (pch) *pch = (((p[0] & 0x01) << 30) | ((p[1] & 0x3F) << 24) | ((p[2] & 0x3F) << 18) | ((p[3] & 0x3F) << 12) | ((p[4] & 0x3F) << 6) | (p[5] & 0x3F)); return 6; } return 0; } UTF32* EIMIL_convert_UTF8_to_UTF32( const UTF8 *putf8 ) { int n; const UTF8 *pu8; UTF32 *ps, *p; for (n = 0, pu8 = putf8; *pu8; n++) { if (*pu8 < 0x80) pu8++; else if (*pu8 < 0xE0) pu8 += 2; else if (*pu8 < 0xF0) pu8 += 3; else if (*pu8 < 0xF8) pu8 += 4; else if (*pu8 < 0xFC) pu8 += 5; else pu8 += 6; } ps = p = (UTF32*) malloc(sizeof(UTF32) * (n + 1)); if (!ps) return NULL; while (*putf8) { if (*putf8 < 0x80) { *p++ = *putf8++; } else if (*putf8 < 0xE0) { *p++ = (((putf8[0] & 0x1F) << 6) | (putf8[1] & 0x3F)); putf8 += 2; } else if (*putf8 < 0xF0) { *p++ = (((putf8[0] & 0x0F) << 12) | ((putf8[1] & 0x3F) << 6) | (putf8[2] & 0x3F)); putf8 += 3; } else if (*putf8 < 0xF8) { *p++ = (((putf8[0] & 0x07) << 18) | ((putf8[1] & 0x3F) << 12) | ((putf8[2] & 0x3F) << 6) | (putf8[3] & 0x3F)); putf8 += 4; } else if (*putf8 < 0xFC) { *p++ = (((putf8[0] & 0x03) << 24) | ((putf8[1] & 0x3F) << 18) | ((putf8[2] & 0x3F) << 12) | ((putf8[3] & 0x3F) << 6) | (putf8[4] & 0x3F)); putf8 += 5; } else { *p++ = (((putf8[0] & 0x01) << 30) | ((putf8[1] & 0x3F) << 24) | ((putf8[2] & 0x3F) << 18) | ((putf8[3] & 0x3F) << 12) | ((putf8[4] & 0x3F) << 6) | (putf8[5] & 0x3F)); putf8 += 6; } } *p = 0; return ps; } int EIMIL_adjust_UTF16_pos_to_UTF32( int pos, const UTF32 *pbase, const UTF32 *pbaseend ) { int i, npos; for (i = 0, npos = 0;i < pos;npos++, pbase++) { if (pbase >= pbaseend) return -1; if (*pbase < 0x10000) { i++; }else{ i += 2; } } return npos; } int EIMIL_adjust_UTF32_pos_to_UTF16( int pos, const UTF32 *pbase, const UTF32 *pbaseend ) { const UTF32 *pe; int npos; pe = pbase + pos; for (npos = 0;pbase < pe;pbase++) { if (pbase >= pbaseend) return -1; if (*pbase < 0x10000) { npos++; }else{ npos += 2; } } return npos; } int EIMIL_convert_UTF32_to_UTF16( const UTF32 *pu32, int u32len, UTF16 **ppu16, int *pu16len ) { int i, rlen; UTF32 u32; UTF16 *pr, *prh; prh = (UTF16*) malloc(sizeof(UTF16) * (u32len * 2 + 1)); if (!prh) return 0; pr = prh; for (i = 0; i < u32len; i++) { u32 = *pu32++; if (UTF16_S_P(u32)) { /* invalid code. TODO:We should output error. */ }else if (u32 < 0x10000) { *pr++ = (UTF16) u32; }else if (u32 < 0x110000) { u32 -= 0x10000; *pr++ = ((u32 >> 10) | 0xD800); *pr++ = ((u32 & 0x3FF) | 0xDC00); }else{ /* invalid code. TODO:We should output error. */ } } *pr = 0; rlen = pr - prh; prh = (UTF16*) realloc(prh, sizeof(UTF16) * (rlen + 1)); *pu16len = rlen; *ppu16 = prh; return 1; } int EIMIL_convert_UTF16_to_UTF32( const UTF16 *pu16, int u16len, UTF32 **ppu32, int *pu32len ) { int i, rlen; UTF16 hs1, hs2; UTF32 *pr, *prh; prh = (UTF32*) malloc(sizeof(UTF32) * (u16len + 1)); if (!prh) return 0; pr = prh; for (i = 0; i < u16len;) { hs1 = *pu16++; i++; if (UTF16_HS_P(hs1)) { hs2 = *pu16++; i++; if (i > u16len) { /* Invalid code. TODO:We should output error. */ break; } if (UTF16_LS_P(hs2)) { *pr++ = (((hs1 & 0x3FF) << 10) | (hs2 & 0x3FF)) + 0x10000; }else{ /* Invalid code. TODO:We should output error. */ *pr++ = hs2; } }else{ *pr++ = hs1; } } rlen = pr - prh; if (rlen != u16len) prh = (UTF32*) realloc(prh, sizeof(UTF32) * (rlen + 1)); *pr = 0; *pu32len = rlen; *ppu32 = prh; return 1; } void EIMIL_destruct_mtext( EIMIL_mtext *pmt ) { int i, j; EIMIL_mtext_props *pmp; EIMIL_value **ppv; EIMIL_prop *pprop; if (pmt->pslots) { for (pmp = pmt->pslots, i = 0; i < pmt->slotsnum; i++, pmp++) { if (pmp->pprops) { for (ppv = pmp->pprops, j = 0; j < pmp->num; j++, ppv++) { ASSERT((*ppv)->type == EIMIL_TYPE_PROP); pprop = &(*ppv)->v.prop; ASSERT(pprop->target == pmt); pprop->st = pprop->end = -1; pprop->target = NULL; EIMIL_RMREF(**ppv); } free(pmp->pprops); } } free(pmt->pslots); } if (pmt->ustr) free(pmt->ustr); return; } EIMIL_value* EIMIL_construct_mtext_from_UTF8( const UTF8 *in ) { UTF32 *pu; EIMIL_value *pv; EIMIL_mtext *pm; pv = (EIMIL_value*) malloc(sizeof(EIMIL_value)); if (!pv) return NULL; memset(pv, 0, sizeof(EIMIL_value)); pv->type = EIMIL_TYPE_MTEXT; pm = &pv->v.mtext; if (!(pu = EIMIL_convert_UTF8_to_UTF32(in))) { free(pm); return NULL; } pm->len = EIMIL_UTF32_string_len(pu); pm->slotsnum = 0; pm->pslots = NULL; pm->UIdatap = 0; pm->ustr = pu; return pv; } EIMIL_value* EIMIL_construct_mtext_from_UTF16( int len, const UTF16 *in ) { EIMIL_value *pv; EIMIL_mtext *pm; pv = (EIMIL_value*) malloc(sizeof(EIMIL_value)); if (!pv) return NULL; memset(pv, 0, sizeof(EIMIL_value)); pv->type = EIMIL_TYPE_MTEXT; pm = &pv->v.mtext; if (!EIMIL_convert_UTF16_to_UTF32(in, len, &pm->ustr, &pm->len)) return NULL; pm->slotsnum = 0; pm->pslots = NULL; pm->UIdatap = 0; return pv; } EIMIL_value* EIMIL_construct_mtext_from_UTF32( int len, const UTF32 *in ) { int i; UTF32 *pu; EIMIL_value *pv; EIMIL_mtext *pm; pv = (EIMIL_value*) malloc(sizeof(EIMIL_value)); if (!pv) return NULL; memset(pv, 0, sizeof(EIMIL_value)); pv->type = EIMIL_TYPE_MTEXT; pm = &pv->v.mtext; pu = (UTF32*) malloc(sizeof(UTF32) * (len + 1)); if (!pu) { free(pm); return NULL; } pm->len = len; pm->slotsnum = 0; pm->pslots = NULL; pm->UIdatap = 0; pm->ustr = pu; for (i = 0; i < len; i++) *pu++ = *in++; *pu = 0; return pv; } EIMIL_value* EIMIL_construct_mtext_from_UTF32_char( UTF32 in ) { UTF32 *pu; EIMIL_value *pv; EIMIL_mtext *pm; pv = (EIMIL_value*) malloc(sizeof(EIMIL_value)); if (!pv) return NULL; memset(pv, 0, sizeof(EIMIL_value)); pv->type = EIMIL_TYPE_MTEXT; pm = &pv->v.mtext; pu = (UTF32*) malloc(sizeof(UTF32) * 2); if (!pu) { free(pm); return NULL; } pm->len = 1; pm->slotsnum = 0; pm->pslots = NULL; pm->UIdatap = 0; pm->ustr = pu; *pu = in; pu[1] = 0; return pv; } int EIMIL_mtext_equal( EIMIL_mtext *pm1, EIMIL_mtext *pm2 ) { /* TODO!! */ return 0; } static EIMIL_mtext_props* EIMIL_find_mtext_props( EIMIL_mtext *pm, EIMIL_symbol *property_sym ) { int i, n; EIMIL_mtext_props *pmp; n = pm->slotsnum; for (pmp = pm->pslots, i = 0; i < n; i++, pmp++) { if (pmp->property_sym == property_sym) break; } if (i == n) return NULL; return pmp; } static EIMIL_mtext_props* EIMIL_prepare_mtext_props_slot( EIMIL_mtext *pm, EIMIL_symbol *property_sym ) { int n; EIMIL_mtext_props *pmp; pmp = EIMIL_find_mtext_props(pm, property_sym); if (pmp) return pmp; n = pm->slotsnum; pm->slotsnum++; pm->pslots = realloc(pm->pslots, sizeof(EIMIL_mtext_props) * pm->slotsnum); if (!pm->pslots) return NULL; pmp = pm->pslots + n; pmp->num = 0; pmp->property_sym = property_sym; pmp->pprops = NULL; return pmp; } EIMIL_value* EIMIL_find_prop_from_mtext( EIMIL_mtext *pm, EIMIL_symbol *property_sym, int pos ) { int i, n; int minpos, minpos_idx; EIMIL_mtext_props *pmp; EIMIL_value **ppv, *pv; EIMIL_prop *pprop; minpos_idx = -1; pmp = EIMIL_find_mtext_props(pm, property_sym); if (!pmp) return NULL; n = pmp->num; for (ppv = pmp->pprops, i = 0; i < n; ppv++, i++) { pv = *ppv; ASSERT(pv->type == EIMIL_TYPE_PROP); pprop = &pv->v.prop; ASSERT(pprop->property_sym == property_sym); if ((pprop->st <= pos) && (pprop->end > pos)) return pv; if ((pprop->st > pos) && ((minpos_idx < 0) || (minpos > pprop->st))) { minpos_idx = i; minpos = pprop->st; } } if (minpos_idx > 0) return pmp->pprops[minpos_idx]; return NULL; } EIMIL_value* EIMIL_get_prop_from_mtext( EIMIL_mtext *pm, EIMIL_symbol *property_sym, int pos ) { int i, n; EIMIL_mtext_props *pmp; EIMIL_value **ppv, *pv; EIMIL_prop *pprop; pmp = EIMIL_find_mtext_props(pm, property_sym); if (!pmp) return NULL; n = pmp->num; for (ppv = pmp->pprops, i = 0; i < n; ppv++, i++) { pv = *ppv; ASSERT(pv->type == EIMIL_TYPE_PROP); pprop = &pv->v.prop; ASSERT(pprop->property_sym == property_sym); if ((pprop->st <= pos) && (pprop->end > pos)) return pv; } return NULL; } void EIMIL_detach_prop_from_mtext( EIMIL_value *pv ) { int i, n; EIMIL_mtext *pmt; EIMIL_mtext_props *pmp; EIMIL_value **ppv; EIMIL_prop *pprop; ASSERT(pv->type == EIMIL_TYPE_PROP); pprop = &pv->v.prop; pmt = pprop->target; if (!pmt) return; pmp = EIMIL_find_mtext_props(pmt, pprop->property_sym); ASSERT(pmp); n = pmp->num; for (ppv = pmp->pprops, i = 0; i < pmp->num; ppv++, i++) { if (*ppv == pv) { pmp->num--; if ((n - i - 1) > 0) { memmove(ppv, ppv + 1, sizeof(EIMIL_value*) * (n - i - 1)); } pprop->st = pprop->end = -1; pprop->target = NULL; EIMIL_RMREF(*pv); return; } } /* not reached */ abort(); return; } /* mtext and property Notation: |----| mtext <----> property |-------------------------------------------| <--P1--> <--P4--> <--P2-> <---P3-----> pprops : [P3 P1 P2 P4] */ EIMIL_mtext* EIMIL_add_prop_on_mtext( EIMIL_mtext *pm, EIMIL_value *pv, int st, int end ) { int i, n; EIMIL_mtext_props *pmp; EIMIL_value **ppv, *pv2; EIMIL_prop *pprop; ASSERT(pv->type == EIMIL_TYPE_PROP); pmp = EIMIL_prepare_mtext_props_slot(pm, pv->v.prop.property_sym); if (!pmp) return NULL; n = pmp->num; for (ppv = pmp->pprops, i = 0; i < n; ppv++, i++) { pv2 = *ppv; ASSERT(pv2->type == EIMIL_TYPE_PROP); pprop = &pv2->v.prop; ASSERT(pprop->property_sym == pmp->property_sym); if (EIMIL_INTERVAL_OVERLAP_P(st, end, pprop->st, pprop->end)) break; } pmp->pprops = (EIMIL_value**) realloc(pmp->pprops, sizeof(EIMIL_value*) * n + 1); if (!pmp->pprops) return NULL; ppv = pmp->pprops + i; if (n > i) { memmove(ppv + 1, ppv, sizeof(EIMIL_value*) * (n - i)); } *ppv = pv; pmp->num++; pv->v.prop.st = st; pv->v.prop.end = end; pv->v.prop.target = pm; EIMIL_ADDREF(*pv); return pm; } /* |-------------------------------------------| <--P1--> <--P4--> <--P2-> <---P3-----> <==setmprop:P5=====> [P3 P2 P1 P4] then... |-------------------------------------------| <--P1--> XXP2XXX <==setmprop:P5=====> [P3 P1 P5 P4] */ EIMIL_mtext* EIMIL_set_prop_on_mtext( EIMIL_mtext *pm, EIMIL_value *pv, int st, int end ) { int i, n, idx; int mst, mend; EIMIL_mtext_props *pmp; EIMIL_value **ppv, *pv2, *pv3; EIMIL_prop *pprop; ASSERT(pv->type == EIMIL_TYPE_PROP); pmp = EIMIL_prepare_mtext_props_slot(pm, pv->v.prop.property_sym); if (!pmp) return NULL; n = pmp->num; idx = -1; ppv = pmp->pprops; for (i = 0; i < n;) { pv2 = ppv[i]; ASSERT(pv2->type == EIMIL_TYPE_PROP); pprop = &pv2->v.prop; ASSERT(pprop->target == pm); ASSERT(pprop->property_sym == pmp->property_sym); mst = pprop->st; mend = pprop->end; if ((mend >= st) && (mst < st)) { /* <---> */ /* <===> */ pprop->end = st; i++; }else if ((mend <= end) && (mst >= st)) { /* <---> */ /* <======> */ pprop->st = -1; pprop->end = -1; pprop->target = NULL; n--; if (n > i) { memmove(ppv + i, ppv + i + 1, sizeof(EIMIL_value*) * (n - i)); } EIMIL_RMREF(*pv2); }else if ((mst <= end) && (mend > end)) { /* <---> */ /* <======> */ pprop->st = end; i++; }else if ((mst < st) && (mend > end)) { /* <--------> */ /* <===> */ /* | */ /* V */ /* <-><===><> */ /* i i+1 */ pv3 = EIMIL_copy_value(pv2); if (!pv3) return NULL; pv3->v.prop.st = end; EIMIL_ADDREF(*pv3); pprop->end = st; ppv = (EIMIL_value**) realloc(ppv, sizeof(EIMIL_value*) * (n + 1)); pmp->pprops = ppv; if (n > (i + 1)) { memmove(ppv + i + 2, ppv + i + 1, sizeof(EIMIL_value*) * (n - i)); } ppv[i + 1] = pv3; n++; if (idx < 0) idx = i + 1; i += 2; }else if (mst < st) { /* <----> */ /* <====> */ if (idx < 0) idx = i; i++; }else{ /* <----> */ /* <====> */ i++; } } pmp->pprops = (EIMIL_value**) realloc(pmp->pprops, sizeof(EIMIL_value*) * (n + 1)); if (!pmp->pprops) return NULL; ppv = pmp->pprops + i; if (n > i) { memmove(ppv + 1, ppv, sizeof(EIMIL_value*) * (n - i)); } *ppv = pv; pmp->num = n + 1; pv->v.prop.st = st; pv->v.prop.end = end; pv->v.prop.target = pm; EIMIL_ADDREF(*pv); return pm; } static EIMIL_value* EIMIL_move_prop( EIMIL_mtext *pm_target, EIMIL_value *pv, int dif, int last ) { int st, end; EIMIL_value *pv2; ASSERT(pv->type == EIMIL_TYPE_PROP); pv2 = EIMIL_copy_value(pv); if (!pv2) return NULL; st = pv->v.prop.st; end = pv->v.prop.end; st += dif; end += dif; if (st < 0) st = 0; if (end > last) end = last; ASSERT((st < end) && (st >= 0) && (end <= last)); pv2->v.prop.st = st; pv2->v.prop.end = end; pv2->v.prop.target = pm_target; EIMIL_ADDREF(*pv2); return pv2; } EIMIL_value* EIMIL_mtext_concat( int num, EIMIL_value **pvs ) { int i, j, k; int clen, pos; EIMIL_mtext *pm, *pmr; EIMIL_value **pvs2, *pvr; EIMIL_value **ppv; EIMIL_mtext_props *pmp, *pmp2; UTF32 *pu; pvr = (EIMIL_value*) malloc(sizeof(EIMIL_value)); if (!pvr) return NULL; memset(pvr, 0, sizeof(EIMIL_value)); pvr->type = EIMIL_TYPE_MTEXT; pmr = &pvr->v.mtext; pmp2 = NULL; pos = 0; for (pvs2 = pvs, i = 0; i < num; pvs2++, i++) { if ((*pvs2)->type == EIMIL_TYPE_MTEXT) { pm = &((*pvs2)->v.mtext); clen = pm->len; for (pmp = pm->pslots, j = 0; j < pm->slotsnum; pmp++, j++) { pmp2 = EIMIL_prepare_mtext_props_slot(pmr, pmp->property_sym); if (!pmp2) goto error; pmp2->pprops = realloc(pmp2->pprops, sizeof(EIMIL_value*) * (pmp2->num + pmp->num)); if (!pmp2->pprops) goto error; ppv = pmp2->pprops + pmp2->num; for (k = 0; k < pmp->num; k++) { *ppv = EIMIL_move_prop(pmr, pmp->pprops[k], pos, pos + clen); if (!*ppv) goto error; ppv++; } pmp2->num += pmp->num; } pos += pm->len; } else if ((*pvs2)->type == EIMIL_TYPE_CHAR) { pos++; } else { ERROR_INTERNAL("Invalid type(must be mtext or char)."); } } pmr->len = pos; pu = (UTF32*) malloc(sizeof(UTF32) * (pos + 1)); if (!pu) goto error; pmr->ustr = pu; for (pvs2 = pvs, i = 0; i < num; pvs2++, i++) { if ((*pvs2)->type == EIMIL_TYPE_MTEXT) { pm = &((*pvs2)->v.mtext); memcpy(pu, pm->ustr, sizeof(UTF32) * pm->len); pu += pm->len; } else { /* EIMIL_TYPE_CHAR */ *pu++ = (*pvs2)->v.ch; } } *pu = 0; return pvr; error: EIMIL_destruct_value(pvr); return NULL; } EIMIL_value* EIMIL_mtext_substr( EIMIL_value *pv_mtext, int st, int end ) { int i, j, len, num_props; int mst, mend; EIMIL_mtext *pm, *pmr; EIMIL_mtext_props *pmp, *pmp2; EIMIL_value **ppv, **ppv2, *pv, *pvr; UTF32 *pu; ASSERT(end > st); pm = &pv_mtext->v.mtext; if (st >= pm->len) return NULL; if (end > pm->len) end = pm->len; pvr = (EIMIL_value*) malloc(sizeof(EIMIL_value)); if (!pvr) return NULL; memset(pvr, 0, sizeof(EIMIL_value)); pvr->type = EIMIL_TYPE_MTEXT; pmr = &pvr->v.mtext; len = end - st; pmr->len = len; pu = (UTF32*) malloc(sizeof(UTF32) * (len + 1)); if (!pu) { free(pmr); return NULL; } pmr->ustr = pu; memcpy(pu, pm->ustr + st, sizeof(UTF32) * len); pu[len] = 0; for (pmp = pm->pslots, i = 0; i < pm->slotsnum; pmp++, i++) { pmp2 = EIMIL_prepare_mtext_props_slot(pmr, pmp->property_sym); if (!pmp2) goto error; num_props = 0; ppv2 = (EIMIL_value**) malloc(sizeof(EIMIL_value*) * pmp->num); pmp2->pprops = ppv2; for (ppv = pmp->pprops, j = 0; j < pmp->num; ppv++, j++) { pv = *ppv; mst = pv->v.prop.st; mend = pv->v.prop.end; if (EIMIL_INTERVAL_OVERLAP_P(st, end, mst, mend)) { *ppv2 = EIMIL_move_prop(pmr, pv, -st, len); if (!*ppv2) goto error; ppv2++; num_props++; } } if (num_props > 0) { pmp2->pprops = (EIMIL_value**) realloc(pmp2->pprops, sizeof(EIMIL_value*) * num_props); if (!pmp2->pprops) goto error; } else { free(pmp2->pprops); pmp2->pprops = NULL; } pmp2->num = num_props; } return pvr; error: EIMIL_destruct_mtext(pmr); free(pmr); return NULL; } /******************************************************************************* Interfacial functions for IM structure <--> EIMIL structure ******************************************************************************/ static IMProp* EIMIL_prop_convert_to_IMProp( EIMIL_prop *pprop ) { int i; IMProp *pim; EIMIL_value **ppv, *pv; pim = (IMProp*) malloc(sizeof(IMProp)); pim->count = pprop->size; switch(pprop->type) { case EIMIL_TYPE_NUMBER: { int *pnums; pim->type = IM_SYMBOL_PROPERTY_NUMBER; pnums = (int*) malloc(sizeof(int) * pim->count); pim->vals.numbers = pnums; if (!pnums) return NULL; for (ppv = pprop->pvals, i = 0; i < pim->count; ppv++, i++) { pv = *ppv; ASSERT(pv->type == EIMIL_TYPE_NUMBER); pnums[i] = pv->v.number; } break; } case EIMIL_TYPE_BOOL: { int *pbools; pim->type = IM_SYMBOL_PROPERTY_BOOL; pbools = (int*) malloc(sizeof(int) * pim->count); pim->vals.bools = pbools; if (!pbools) return NULL; for (ppv = pprop->pvals, i = 0; i < pim->count; ppv++, i++) { pv = *ppv; ASSERT(pv->type == EIMIL_TYPE_BOOL); pbools[i] = pv->v.bool_val; } break; } case EIMIL_TYPE_CHAR: { CARD32BIT *pchars; pim->type = IM_SYMBOL_PROPERTY_CHAR; pchars = (CARD32BIT*) malloc(sizeof(CARD32BIT) * pim->count); pim->vals.chars = pchars; if (!pchars) return NULL; for (ppv = pprop->pvals, i = 0; i < pim->count; ppv++, i++) { pv = *ppv; ASSERT(pv->type == EIMIL_TYPE_CHAR); pchars[i] = pv->v.ch; } break; } case EIMIL_TYPE_MTEXT: { IMText *ptexts; pim->type = IM_SYMBOL_PROPERTY_MTEXT; ptexts = (IMText*) malloc(sizeof(IMText) * pim->count); pim->vals.mtexts = ptexts; if (!ptexts) return NULL; for (ppv = pprop->pvals, i = 0; i < pim->count; ppv++, i++) { pv = *ppv; ASSERT(pv->type == EIMIL_TYPE_MTEXT); if (!EIMIL_convert_mtext_to_IMText(&ptexts[i], &pv->v.mtext)) { return NULL; } } break; } default: abort(); } return pim; } static EIMIL_value* EIMIL_prop_convert_IMProp( IMProp *pim ) { int i; EIMIL_value *pv, *pv2; EIMIL_prop *pprop; switch(pim->type) { case IM_SYMBOL_PROPERTY_NUMBER: pv = EIMIL_construct_prop2(EIMIL_TYPE_NUMBER); if (!pv) return NULL; pprop = &pv->v.prop; for (i = 0; i < pim->count; i++) { pv2 = EIMIL_construct_number(pim->vals.numbers[i]); if (!pv2) return NULL; if (!EIMIL_add_prop(pprop, pv2)) return NULL; } break; case IM_SYMBOL_PROPERTY_BOOL: pv = EIMIL_construct_prop2(EIMIL_TYPE_BOOL); if (!pv) return NULL; pprop = &pv->v.prop; for (i = 0; i < pim->count; i++) { pv2 = EIMIL_construct_bool(pim->vals.bools[i]); if (!pv2) return NULL; if (!EIMIL_add_prop(pprop, pv2)) return NULL; } break; case IM_SYMBOL_PROPERTY_CHAR: pv = EIMIL_construct_prop2(EIMIL_TYPE_CHAR); if (!pv) return NULL; pprop = &pv->v.prop; for (i = 0; i < pim->count; i++) { pv2 = EIMIL_construct_char(pim->vals.chars[i]); if (!pv2) return NULL; if (!EIMIL_add_prop(pprop, pv2)) return NULL; } break; case IM_SYMBOL_PROPERTY_MTEXT: pv = EIMIL_construct_prop2(EIMIL_TYPE_MTEXT); if (!pv) return NULL; pprop = &pv->v.prop; for (i = 0; i < pim->count; i++) { pv2 = EIMIL_construct_mtext_from_IMText(&pim->vals.mtexts[i]); if (!pv2) return NULL; if (!EIMIL_add_prop(pprop, pv2)) return NULL; } default: abort(); } return pv; } static IMFeedbackList* create_feedback( int size ) { int i; IMFeedbackList *feedback; IMFeedback *fb; feedback = (IMFeedbackList *) malloc(sizeof(IMFeedbackList) * size); for (i = 0; i < size; i++) { IMFeedbackList *fbl = &feedback[i]; fbl->count_feedbacks = 1; fb = (IMFeedback *) malloc(sizeof(IMFeedback) * 4); fbl->feedbacks = fb; memset(fbl->feedbacks, 0, sizeof(IMFeedback) * 4); } return feedback; } static void set_feedback( UTF32 *basestr, UTF32 *strend, EIMIL_prop *pprop, IMFeedbackList* pfbl ) { int st, end; IMFeedback *pfb; EIMIL_value *pv; if (pprop->type != EIMIL_TYPE_NUMBER) return; st = EIMIL_adjust_UTF32_pos_to_UTF16(pprop->st, basestr, strend); ASSERT(st >= 0); end = EIMIL_adjust_UTF32_pos_to_UTF16(pprop->end, basestr, strend); ASSERT(end >= 0); pv = pprop->pvals[0]; ASSERT(pv->type == EIMIL_TYPE_NUMBER); for (pfb = pfbl->feedbacks + st; end > st; st++, pfb++) { if (IM_FEEDBACK_TYPE(pfb)) continue; IM_FEEDBACK_TYPE(pfb) = IM_DECORATION_FEEDBACK; IM_FEEDBACK_VALUE(pfb) = pv->v.number; } } /* TODO: check recursive loop. */ int EIMIL_convert_mtext_to_IMText( IMText *pim, EIMIL_mtext *psrc ) { int i, j; EIMIL_value **ppv, *pv; EIMIL_mtext_props *pmp; EIMIL_prop *pprop; IMAnnotation *pima; IMAnnotationValue *pimav; IMProp *pimp; memset(pim, 0, sizeof(IMText)); pim->encoding = UTF16_CODESET; pim->count_annotations = psrc->slotsnum; pima = (IMAnnotation*) malloc(sizeof(IMAnnotation) * pim->count_annotations); pim->annotations = pima; if (!pima) { free(pim); return 0; } /* UTF16 string */ if (!EIMIL_convert_UTF32_to_UTF16(psrc->ustr, psrc->len, &pim->text.utf_chars, &pim->char_length)) { free(pim->annotations); free(pim); return 0; } pim->feedback = create_feedback(pim->char_length); if (!pim->feedback) { free(pim->text.utf_chars); free(pim->annotations); free(pim); return 0; } /* feedback & annotation */ for (pmp = psrc->pslots, i = 0; i < psrc->slotsnum; pmp++, pima++, i++) { pima->type = pmp->property_sym->symbolid; if (pima->type == EIMIL_SYMBOL_ID_FEEDBACK) { for (ppv = pmp->pprops, j = 0; j < pmp->num; ppv++, pimav++, j++) { pv = *ppv; ASSERT(pv->type == EIMIL_TYPE_PROP); pprop = &pv->v.prop; set_feedback(psrc->ustr, psrc->ustr + psrc->len, pprop, pim->feedback); } } pima->num_values = pmp->num; pimav = (IMAnnotationValue*) malloc(sizeof(IMAnnotationValue) * pmp->num); if (!pimav) { free(pim->annotations); free(pim); return 0; } pima->values = pimav; for (ppv = pmp->pprops, j = 0; j < pmp->num; ppv++, pimav++, j++) { pv = *ppv; ASSERT(pv->type == EIMIL_TYPE_PROP); pprop = &pv->v.prop; pimav->start_pos = EIMIL_adjust_UTF32_pos_to_UTF16(pprop->st, psrc->ustr, psrc->ustr + psrc->len); ASSERT(pimav->start_pos >= 0); pimav->end_pos = EIMIL_adjust_UTF32_pos_to_UTF16(pprop->end, psrc->ustr, psrc->ustr + psrc->len); ASSERT(pimav->end_pos >= 0); pimav->len = -1; pimp = EIMIL_prop_convert_to_IMProp(pprop); if (!pimp) { free(pim->annotations); free(pim); return 0; } } } return 1; } static int EIMIL_add_props_of_IMText( EIMIL_mtext *pm, IMText *pim ) { int i, j; int mst, mend; EIMIL_value *pv; IMAnnotation *pima; IMAnnotationValue *pimav; for (pima = pim->annotations, i = 0; i < pim->count_annotations; pima++, i++) { for (pimav = pima->values, j = 0; j < pima->num_values; pimav++, j++) { pv = EIMIL_prop_convert_IMProp((IMProp*)pimav->value); mst = EIMIL_adjust_UTF16_pos_to_UTF32(pimav->start_pos, pm->ustr, pm->ustr + pm->len); if (mst < 0) return 0; mend = EIMIL_adjust_UTF16_pos_to_UTF32(pimav->end_pos, pm->ustr, pm->ustr + pm->len); if (mend < 0) return 0; if (!EIMIL_add_prop_on_mtext(pm, pv, mst, mend)) return 0; } } return 1; } EIMIL_value* EIMIL_construct_mtext_from_IMText( IMText *pim ) { EIMIL_value *pv; pv = EIMIL_construct_mtext_from_UTF16(pim->char_length, pim->text.utf_chars); if (!pv) return NULL; if (!EIMIL_add_props_of_IMText(&pv->v.mtext, pim)) return NULL; return pv; } /* orig mtext |------------------------------| <=====> st end <*********> ulen new mtext |------<*********>-----------------| st nend |<->| dlen |<----------nlen------------------>| */ int EIMIL_sync_mtext( EIMIL_mtext *pm, IMDifferential *pdiff ) { int i, j, n; int st, end, nend, mst, mend, nlen, dlen; UTF32 *pustr, ulen; EIMIL_mtext_props *pmp; EIMIL_value **ppv, *pv, *pv2; EIMIL_prop *pprop; IMText *ptx; ptx = &pdiff->text; /* STEP1: replace the interval with the text. */ st = EIMIL_adjust_UTF16_pos_to_UTF32(pdiff->chg_first, pm->ustr, pm->ustr + pm->len); if (st < 0) return 0; end = EIMIL_adjust_UTF16_pos_to_UTF32(pdiff->chg_len, pm->ustr + st, pm->ustr + pm->len); if (end < 0) return 0; end += st; if (!EIMIL_convert_UTF16_to_UTF32(ptx->text.utf_chars, ptx->char_length, &pustr, &ulen)) return 0; dlen = ulen - (end - st); nlen = pm->len + dlen; if ((end - st) > ulen) { memmove(pm->ustr + st + ulen, pm->ustr + end, sizeof(UTF32) * (pm->len - end)); pm->ustr = (UTF32*) realloc(pm->ustr, sizeof(UTF32) * nlen); }else if ((end - st) < ulen) { pm->ustr = (UTF32*) realloc(pm->ustr, sizeof(UTF32) * nlen); memmove(pm->ustr + st + ulen, pm->ustr + end, sizeof(UTF32) * (pm->len - end)); } memcpy(pm->ustr + st, pustr, sizeof(UTF32) * ulen); free(pustr); /* STEP2: strip or move props in the interval. */ for (pmp = pm->pslots, i = 0; i < pm->slotsnum; pmp++, i++) { n = pmp->num; ppv = pmp->pprops; for (j = 0; j < n;) { pv = ppv[j]; ASSERT(pv->type == EIMIL_TYPE_PROP); pprop = &pv->v.prop; mst = pprop->st; mend = pprop->end; if ((mend >= st) && (mst < st)) { /* <---> */ /* <===> */ pprop->end = st; j++; }else if ((mend <= end) && (mst >= st)) { /* <---> */ /* <======> */ pprop->st = -1; pprop->end = -1; pprop->target = NULL; n--; if (n > j) { memmove(ppv + j, ppv + j + 1, sizeof(EIMIL_value*) * (n - i)); } EIMIL_RMREF(*pv); }else if ((mst <= end) && (mend > end)) { /* <---> */ /* <======> */ pprop->st = nend; j++; }else if ((mst < st) && (mend > end)) { /* <--------> */ /* <===> */ /* | */ /* V */ /* <-><===><> */ /* i i+1 */ pv2 = EIMIL_copy_value(pv); if (!pv2) return 0; pv2->v.prop.st = nend; EIMIL_ADDREF(*pv2); pprop->end = st; ppv = (EIMIL_value**) realloc(ppv, sizeof(EIMIL_value*) * (n + 1)); pmp->pprops = ppv; if (n > (j + 1)) { memmove(ppv + j + 2, ppv + j + 1, sizeof(EIMIL_value*) * (n - j)); } ppv[j + 1] = pv2; n++; j += 2; }else if (mst < st) { /* <----> */ /* <====> */ j++; }else{ /* <----> */ /* <====> */ pprop->st += dlen; pprop->end += dlen; j++; } } } /* STEP3: add props of the text. */ if (!EIMIL_add_props_of_IMText(pm, ptx)) return 0; return 1; } int EIMIL_mtext_diff( EIMIL_mtext *porig, EIMIL_mtext *pnew, IMDifferential *pdiff ) { /* TODO: make it more efficient!!! */ if (!EIMIL_convert_mtext_to_IMText(&pdiff->text, pnew)) return 0; pdiff->chg_first = 0; pdiff->chg_len = 0; return 1; } /* Local Variables: */ /* c-file-style: "iiim-project" */ /* End: */