/**************************************************************** lv.c $Author: igarashi $ $Date: 2000/06/08 17:28:44 $ ****************************************************************/ #include #include #include #include #include #include VALUE rb_cIChar; VALUE rb_cIString; VALUE rb_cLV; /********************************** IChar **********************************/ typedef i_str_t ichar; /* * helper functions */ static void _rb_ichar_free(ic) ichar *ic; { free(ic); } static VALUE rb_ichar_new(ic) ichar ic; { ichar *new_ic; VALUE obj; obj = Data_Make_Struct(rb_cIChar, ichar, 0, _rb_ichar_free, new_ic); *new_ic = ic; return obj; } static ichar _rb_ichar_get(obj) VALUE obj; { ichar *ic; Data_Get_Struct(obj, ichar, ic); return *ic; } static ichar rb_ichar_get(obj) VALUE obj; { ichar *ic; VALUE klass = rb_class_of(obj); if (klass != rb_cIChar) rb_raise(rb_eTypeError, "wrong argument type %s (expected IChar)", rb_class2name(klass)); Data_Get_Struct(obj, ichar, ic); return *ic; } /* * instance methods */ static VALUE rb_ichar_inspect(self) VALUE self; { ichar ic; char buf[64]; VALUE ret; ic = _rb_ichar_get(self); sprintf(buf, "#", ic.charset, ic.c); ret = rb_str_new2(buf); return ret; } /* static VALUE rb_ichar_to_i(c) VALUE c; { i_str_c _c; _c = _get_ichar(c); return INT2NUM(_c); } */ /* static VALUE rb_ichar_lang(self) VALUE self; { _mlchar c; int lang; c = _get_mlchar(self); lang = (c & 0xff000000) >> 24; return INT2FIX(lang); } */ static VALUE rb_ichar_charset(self) VALUE self; { ichar ic; ic = _rb_ichar_get(self); return INT2FIX(ic.charset); } static VALUE rb_ichar_codepoint(self) VALUE self; { ichar ic; ic = _rb_ichar_get(self); return INT2FIX(ic.c); } /********************************** IString **********************************/ typedef struct { size_t len; ichar *ptr; VALUE orig; } istr; #define STR_FREEZE FL_USER1 #define STR_NO_ORIG FL_USER3 /* * helper functions */ static void _rb_istr_free(is) istr *is; { /* rb_warning("_rb_istr_free called."); */ free(is->ptr); free(is); } static VALUE rb_istr_new(str, len) ichar *str; int len; { istr *is; ichar *buf; VALUE obj; /* rb_warning("rb_istr_new called."); */ buf = ALLOC_N(ichar, len); MEMCPY(buf, str, ichar, len); obj = Data_Make_Struct(rb_cIString, istr, 0, _rb_istr_free, is); is->len = len; is->ptr = buf; is->orig = 0; /* rb_obj_call_init(obj, argc, argv); */ return obj; } static istr* _rb_istr_get(obj) VALUE obj; { istr *is; Data_Get_Struct(obj, istr, is); return is; } static istr* rb_istr_get(obj) VALUE obj; { istr *is; VALUE klass = rb_class_of(obj); if (klass != rb_cIString) rb_raise(rb_eTypeError, "wrong argument type %s (expected IString)", rb_class2name(klass)); Data_Get_Struct(obj, istr, is); return is; } static VALUE _str2istr(mbstr) VALUE mbstr; { istr str; /* str = istr_new(RSTRING(mbstr)->ptr); */ rb_raise(rb_eTypeError, "IString required"); return rb_istr_new(str); } /* * class methods */ /* static VALUE rb_istr_clone(VALUE obj); */ /* static VALUE rb_istr_new(argc, argv, klass) int argc; VALUE *argv; VALUE klass; { VALUE src; VALUE cs; istr is; VALUE cls; if (rb_scan_args(argc, argv, "11", &src, &cs) == 1) { cls = rb_class_of(src); if (cls == rb_cString) { } else if (cls == rb_cMLString) { return rb_istr_clone(src); } else { src = rb_funcall(src, rb_intern("to_s"), 0); } str = mlstr_new(RSTRING(src)->ptr); } else { cls = rb_class_of(mbstr); if (cls != rb_cString) rb_raise(rb_eTypeError, "String required"); str = mlstr_newWithCode(RSTRING(mbstr)->ptr, RSTRING(encoding)->ptr); } return _rb_mlstr_new(str); } */ /* * instance methods */ /* static VALUE rb_istr_clone(self) VALUE self; { istr is; istr new_is; is = _get_mlstr(obj); new_str = str->dup(str); return _rb_mlstr_new(new_str); } */ /* static VALUE rb_istr_to_s(obj) VALUE obj; { istr *is = _rb_get_mlstr(obj); char *_str; VALUE mbstr; _str = str->get(str); mbstr = rb_str_new2(_str); free(_str); return mbstr; } */ /* static VALUE rb_istr_inspect(obj) VALUE obj; { mlstr str; char *mbstr; int mbstr_len; char *buf; int buf_len; VALUE ret; str = _get_mlstr(obj); mbstr = str->get(str); mbstr_len = strlen(mbstr); buf_len = mbstr_len+2; buf = ALLOC_N(char, buf_len); buf[0] = '"'; memcpy(buf+1, mbstr, mbstr_len); buf[mbstr_len+1] = '"'; free(mbstr); ret = rb_str_new(buf, buf_len); free(buf); return ret; } */ static VALUE rb_istr_elem_type(self) VALUE self; { return rb_cIChar; } static VALUE rb_istr_length(self) VALUE self; { istr *is; is = _rb_istr_get(self); return INT2NUM(is->len); } static VALUE rb_istr_empty_p(self) VALUE self; { istr *is; is = _rb_istr_get(self); if (is->len == 0) return Qtrue; return Qfalse; } static void rb_istr_modify(str) VALUE str; { ichar *ptr; istr *is = _rb_istr_get(str); if (FL_TEST(str, STR_FREEZE)) rb_raise(rb_eTypeError, "can't modify frozen string"); if (!FL_TEST(str, FL_TAINT) && rb_safe_level() >= 4) rb_raise(rb_eSecurityError, "Insecure: can't modify string"); if (!is->orig || FL_TEST(str, STR_NO_ORIG)) return; ptr = is->ptr; is->ptr = ALLOC_N(ichar, is->len); if (is->ptr) { MEMCPY(is->ptr, ptr, ichar, is->len); /* is->ptr[is->len] = 0; */ } is->orig = 0; } static VALUE rb_istr_cat(str, ptr, len) VALUE str; const ichar *ptr; long len; { istr *is = _rb_istr_get(str); if (len > 0) { /* rb_str_modify(str); */ REALLOC_N(is->ptr, ichar, is->len + len); if (ptr) MEMCPY(is->ptr + is->len, ptr, ichar, len); is->len += len; /* RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; */ /* sentinel */ } return str; } static VALUE rb_istr_concat(str1, str2) VALUE str1, str2; { istr *is2; /* rb_str_modify(str1); */ if (rb_class_of(str2) == rb_cIChar) { ichar c = _rb_ichar_get(str2); return rb_istr_cat(str1, &c, 1); } /* if (TYPE(str2) != T_STRING) str2 = rb_str_to_str(str2); */ is2 = rb_istr_get(str2); return rb_istr_cat(str1, is2->ptr, is2->len); } /* static VALUE rb_mlstr_plus(obj1, obj2) VALUE obj1, obj2; { VALUE obj; mlstr str; mlstr str2; obj = rb_mlstr_clone(obj1); str = _get_mlstr(obj); if (TYPE(obj2) == T_STRING) obj2 = _str2mlstr(obj2); str2 = _get_mlstr(obj2); str->append(str, str2); return obj; } static VALUE rb_mlstr_compare(obj, obj2) VALUE obj, obj2; { mlstr str; mlstr str2; int result; str = _get_mlstr(obj); if (TYPE(obj2) == T_STRING) obj2 = _str2mlstr(obj2); str2 = _get_mlstr(obj2); result = str->equals(str, str2); return INT2FIX(result); } static VALUE rb_mlstr_equals(obj, obj2) VALUE obj, obj2; { mlstr str; mlstr str2; int result; str = _get_mlstr(obj); if (TYPE(obj2) == T_STRING) obj2 = _str2mlstr(obj2); str2 = _get_mlstr(obj2); result = str->equals(str, str2); if (result) return Qtrue; else return Qfalse; } static VALUE rb_mlstr_index(obj, obj2) VALUE obj, obj2; { mlstr str; mlstr str2; int result; str = _get_mlstr(obj); if (TYPE(obj2) == T_STRING) obj2 = _str2mlstr(obj2); str2 = _get_mlstr(obj2); result = str->search(str, str2); if (result == -1) return Qnil; else return INT2NUM(result); } static VALUE rb_mlstr_rindex(obj, obj2) VALUE obj, obj2; { mlstr str; mlstr str2; int result; str = _get_mlstr(obj); if (TYPE(obj2) == T_STRING) obj2 = _str2mlstr(obj2); str2 = _get_mlstr(obj2); result = str->rsearch(str, str2); if (result == -1) return Qnil; else return INT2NUM(result); } static VALUE rb_mlstr_aref(obj, index) VALUE obj, index; { mlstr str; _mlchar c; int _index; str = _get_mlstr(obj); _index = NUM2INT(index); if (_index < 0 || str->len <= _index ) rb_raise(rb_eIndexError, "index %d out of string", _index); c = str->str[_index]; return _rb_mlchar_new(c); } */ VALUE rb_istr_substr(str, beg, len) VALUE str; long beg, len; { VALUE str2; istr *is = _rb_istr_get(str); if (len < 0) return Qnil; if (beg > is->len) return Qnil; if (beg < 0) { beg += is->len; if (beg < 0) return Qnil; } if (beg + len > is->len) { len = is->len - beg; } if (len < 0) { len = 0; } if (len == 0) return rb_istr_new(0,0); str2 = rb_istr_new(is->ptr+beg, len); if (OBJ_TAINTED(str)) OBJ_TAINT(str2); return str2; } static VALUE rb_istr_aref(str, indx) VALUE str; VALUE indx; { long idx; istr *is = _rb_istr_get(str); switch (TYPE(indx)) { case T_FIXNUM: idx = FIX2LONG(indx); if (idx < 0) { idx = is->len + idx; } if (idx < 0 || is->len <= idx) { return Qnil; } return rb_ichar_new(is->ptr[idx]); /* case T_REGEXP: if (rb_reg_search(indx, str, 0, 0) >= 0) return rb_reg_last_match(rb_backref_get()); return Qnil; case T_STRING: if (rb_str_index(str, indx, 0) != -1) return indx; return Qnil; */ default: /* check if indx is Range */ { long beg, len; switch (rb_range_beg_len(indx, &beg, &len, is->len, 0)) { case Qfalse: break; case Qnil: return Qnil; default: return rb_istr_substr(str, beg, len); } } rb_raise(rb_eIndexError, "invalid index for IString"); } return Qnil; /* not reached */ } static VALUE rb_istr_aref_method(argc, argv, str) int argc; VALUE *argv; VALUE str; { VALUE arg1, arg2; if (rb_scan_args(argc, argv, "11", &arg1, &arg2) == 2) { return rb_istr_substr(str, NUM2INT(arg1), NUM2INT(arg2)); } return rb_istr_aref(str, arg1); } /* static VALUE rb_mlstr_aset(str, index, c) VALUE str, index, c; { mlstr _str; _mlchar _c; int _index; _str = _get_mlstr(str); _index = NUM2INT(index); _c = _get_mlchar(c); if (_index < 0 || _str->len <= _index) rb_raise(rb_eIndexError, "index %d out of string", _index); _str->str[_index] = _c; return _rb_mlchar_new(_c); } static VALUE rb_mlstr_chop(self) VALUE self; { mlstr str; VALUE new_obj; new_obj = rb_mlstr_clone(self); str = _get_mlstr(new_obj); str->chop(str); return new_obj; } static VALUE rb_mlstr_chop2(self) VALUE self; { mlstr str; str = _get_mlstr(self); str->chop(str); return self; } static VALUE rb_mlstr_gsub_bang(str, str1, str2) VALUE str, str1, str2; { mlstr _str; mlstr _str1; mlstr _str2; int result; _str = _get_mlstr(str); if (TYPE(str1) == T_STRING) str1 = _str2mlstr(str1); _str1 = _get_mlstr(str1); if (TYPE(str2) == T_STRING) str2 = _str2mlstr(str2); _str2 = _get_mlstr(str2); result = _str->gsub(_str, _str1, _str2); if (result == 0) return str; else return Qnil; } static VALUE rb_mlstr_gsub(str, str1, str2) VALUE str, str1, str2; { VALUE str0; VALUE result; str0 = rb_mlstr_clone(str); result = rb_mlstr_gsub_bang(str0, str1, str2); if (NIL_P(result)) return str; else return str0; } */ static VALUE rb_istr_each_char(str) VALUE str; { istr *_str; int i; _str = _rb_istr_get(str); for (i = 0; i < _str->len; i++) { rb_yield(rb_ichar_new(_str->ptr[i])); } return str; } /********************************** IConv **********************************/ /* class methods */ static VALUE rb_iconv_decode(self, mbs, cs) VALUE self, mbs, cs; { int len; char *str; ichar *is_str; /* check cs */ if (NUM2INT(cs) < 0 || RAW < NUM2INT(cs)) rb_raise(rb_eArgError, "argument out of range"); if (NIL_P(mbs)) { rb_raise(rb_eTypeError, "wrong argument type nil"); } str = rb_str2cstr(mbs, &len); is_str = alloca(sizeof(ichar) * len); Decode(is_str, NUM2INT(cs), str, &len); return rb_istr_new(is_str, len); } static VALUE rb_iconv_encode(self, str, cs) VALUE self, str, cs; { istr *is; int mb_len; str_t *code; char *outbuf; int i; char *ptr; VALUE val; /* check cs */ if (NUM2INT(cs) <= AUTOSELECT || RAW < NUM2INT(cs)) rb_raise(rb_eArgError, "not supported CES"); /* check istr */ is = rb_istr_get(str); mb_len = is->len * 4 + CODE_EXTRA_LEN * 2; code = alloca(sizeof(str_t) * mb_len); /* for (i = 0; i < is->len; i++) rb_warning("is->str[%d] = %x", i, is->ptr[i]); */ Encode(is->ptr, 0, is->len, NUM2INT(cs), TRUE, code, &mb_len); /* rb_warning("mb_len: %d", mb_len); */ ptr = outbuf = alloca(mb_len); for (i = 0 ; i < mb_len ; i++) *ptr++ = 0xff & code[i]; val = rb_str_new(outbuf, mb_len); return val; } /********************************** * Library Initialization *********************************/ void Init_lv() { /* * initialize library */ ItableInit(); CtableInit(); IstrInit(); DecodeInit(); /* * IChar class */ rb_cIChar = rb_define_class("IChar", rb_cObject); rb_define_method(rb_cIChar, "inspect", rb_ichar_inspect, 0); rb_define_method(rb_cIChar, "to_s", rb_ichar_inspect, 0); rb_define_method(rb_cIChar, "charset", rb_ichar_charset, 0); rb_define_method(rb_cIChar, "codepoint", rb_ichar_codepoint, 0); /* constants */ rb_define_const(rb_cIChar, "ISO646_US", INT2FIX(ISO646_US)); rb_define_const(rb_cIChar, "X0201ROMAN", INT2FIX(X0201ROMAN)); rb_define_const(rb_cIChar, "X0201KANA", INT2FIX(X0201KANA)); rb_define_const(rb_cIChar, "ISO8859_1", INT2FIX(ISO8859_1)); rb_define_const(rb_cIChar, "ISO8859_2", INT2FIX(ISO8859_2)); rb_define_const(rb_cIChar, "ISO8859_3", INT2FIX(ISO8859_3)); rb_define_const(rb_cIChar, "ISO8859_4", INT2FIX(ISO8859_4)); rb_define_const(rb_cIChar, "ISO8859_5", INT2FIX(ISO8859_5)); rb_define_const(rb_cIChar, "ISO8859_6", INT2FIX(ISO8859_6)); rb_define_const(rb_cIChar, "ISO8859_7", INT2FIX(ISO8859_7)); rb_define_const(rb_cIChar, "ISO8859_8", INT2FIX(ISO8859_8)); rb_define_const(rb_cIChar, "ISO8859_9", INT2FIX(ISO8859_9)); rb_define_const(rb_cIChar, "C6226", INT2FIX(C6226)); rb_define_const(rb_cIChar, "GB2312", INT2FIX(GB2312)); rb_define_const(rb_cIChar, "X0208", INT2FIX(X0208)); rb_define_const(rb_cIChar, "KSC5601", INT2FIX(KSC5601)); rb_define_const(rb_cIChar, "X0212", INT2FIX(X0212)); rb_define_const(rb_cIChar, "ISO_IR_165", INT2FIX(ISO_IR_165)); rb_define_const(rb_cIChar, "CNS_1", INT2FIX(CNS_1)); rb_define_const(rb_cIChar, "CNS_2", INT2FIX(CNS_2)); rb_define_const(rb_cIChar, "CNS_3", INT2FIX(CNS_3)); rb_define_const(rb_cIChar, "CNS_4", INT2FIX(CNS_4)); rb_define_const(rb_cIChar, "CNS_5", INT2FIX(CNS_5)); rb_define_const(rb_cIChar, "CNS_6", INT2FIX(CNS_6)); rb_define_const(rb_cIChar, "CNS_7", INT2FIX(CNS_7)); rb_define_const(rb_cIChar, "BIG5", INT2FIX(BIG5)); rb_define_const(rb_cIChar, "UNICODE", INT2FIX(UNICODE)); rb_define_const(rb_cIChar, "PSEUDO", INT2FIX(PSEUDO)); rb_define_const(rb_cIChar, "SPACE", INT2FIX(SPACE)); rb_define_const(rb_cIChar, "HTAB", INT2FIX(HTAB)); rb_define_const(rb_cIChar, "CNTRL", INT2FIX(CNTRL)); rb_define_const(rb_cIChar, "LINE_FEED", INT2FIX(LINE_FEED)); rb_define_const(rb_cIChar, "I_TABLE_SIZE", INT2FIX(I_TABLE_SIZE)); rb_define_const(rb_cIChar, "NOSET", INT2FIX(I_TABLE_SIZE)); rb_define_const(rb_cIChar, "ASCII", INT2FIX(ISO646_US)); /* * IString class */ rb_cIString = rb_define_class("IString", rb_cObject); rb_include_module(rb_cIString, rb_mEnumerable); /* rb_include_module(rb_cMLString, rb_mComparable); */ /* rb_cIString = rb_define_class("IString", rb_cString); */ /* rb_define_singleton_method(rb_cIString, "new", rb_istr_new, -1); */ /* instance methods */ /* rb_define_method(rb_cMLString, "clone", rb_mlstr_clone, 0); */ /* rb_define_method(rb_cMLString, "to_s", rb_mlstr_to_s, 0); */ /* rb_define_method(rb_cMLString, "inspect", rb_istr_inspect, 0); */ rb_define_method(rb_cIString, "element_type", rb_istr_elem_type, 0); rb_define_method(rb_cIString, "length", rb_istr_length, 0); rb_define_alias(rb_cIString, "size", "length"); rb_define_method(rb_cIString, "empty?", rb_istr_empty_p, 0); /* rb_define_method(rb_cMLString, "aref", rb_mlstr_aref, 1); */ /* rb_define_method(rb_cMLString, "substr", rb_mlstr_substr, 2); */ rb_define_method(rb_cIString, "[]", rb_istr_aref_method, -1); /* rb_define_method(rb_cMLString, "aset", rb_mlstr_aset, 2); */ /* rb_define_method(rb_cMLString, "[]=", rb_mlstr_aset, 2); */ rb_define_method(rb_cIString, "concat", rb_istr_concat, 1); rb_define_method(rb_cIString, "<<", rb_istr_concat, 1); /* rb_define_method(rb_cMLString, "+", rb_mlstr_plus, 1); */ /* rb_define_method(rb_cMLString, "==", rb_mlstr_equals, 1); */ /* rb_define_method(rb_cMLString, "===", rb_mlstr_equals, 1); */ /* rb_define_method(rb_cMLString, "eql?", rb_mlstr_equals, 1); */ /* rb_define_method(rb_cMLString, "<=>", rb_mlstr_compare, 1); */ /* rb_define_method(rb_cMLString, "index", rb_mlstr_index, 1); */ /* rb_define_method(rb_cMLString, "rindex", rb_mlstr_rindex, 1);*/ /* rb_define_method(rb_cMLString, "chop", rb_mlstr_chop, 0); */ /* rb_define_method(rb_cMLString, "chop!", rb_mlstr_chop2, 0); */ /* rb_define_method(rb_cMLString, "gsub", rb_mlstr_gsub, 2); */ /* rb_define_method(rb_cMLString, "gsub!", rb_mlstr_gsub_bang, 2); */ rb_define_method(rb_cIString, "each_char", rb_istr_each_char, 0); rb_define_method(rb_cIString, "each", rb_istr_each_char, 0); /* * LV class */ rb_cLV = rb_define_class("LV", rb_cObject); /* constants */ rb_define_const(rb_cLV, "AUTOSELECT", INT2FIX(AUTOSELECT)); rb_define_const(rb_cLV, "UTF_7", INT2FIX(UTF_7)); rb_define_const(rb_cLV, "HZ_GB", INT2FIX(HZ_GB)); rb_define_const(rb_cLV, "EUC_KOREA", INT2FIX(EUC_KOREA)); rb_define_const(rb_cLV, "EUC_JAPAN", INT2FIX(EUC_JAPAN)); rb_define_const(rb_cLV, "EUC_TAIWAN", INT2FIX(EUC_TAIWAN)); rb_define_const(rb_cLV, "EUC_CHINA", INT2FIX(EUC_CHINA)); rb_define_const(rb_cLV, "BIG_FIVE", INT2FIX(BIG_FIVE)); rb_define_const(rb_cLV, "SHIFT_JIS", INT2FIX(SHIFT_JIS)); rb_define_const(rb_cLV, "UTF_8", INT2FIX(UTF_8)); rb_define_const(rb_cLV, "ISO_8859_1", INT2FIX(ISO_8859_1)); rb_define_const(rb_cLV, "ISO_8859_2", INT2FIX(ISO_8859_2)); rb_define_const(rb_cLV, "ISO_8859_3", INT2FIX(ISO_8859_3)); rb_define_const(rb_cLV, "ISO_8859_4", INT2FIX(ISO_8859_4)); rb_define_const(rb_cLV, "ISO_8859_5", INT2FIX(ISO_8859_5)); rb_define_const(rb_cLV, "ISO_8859_6", INT2FIX(ISO_8859_6)); rb_define_const(rb_cLV, "ISO_8859_7", INT2FIX(ISO_8859_7)); rb_define_const(rb_cLV, "ISO_8859_8", INT2FIX(ISO_8859_8)); rb_define_const(rb_cLV, "ISO_8859_9", INT2FIX(ISO_8859_9)); rb_define_const(rb_cLV, "ISO_2022_CN", INT2FIX(ISO_2022_CN)); rb_define_const(rb_cLV, "ISO_2022_JP", INT2FIX(ISO_2022_JP)); rb_define_const(rb_cLV, "ISO_2022_KR", INT2FIX(ISO_2022_KR)); rb_define_const(rb_cLV, "RAW", INT2FIX(RAW)); /* class methods */ rb_define_singleton_method(rb_cLV, "decode", rb_iconv_decode, 2); rb_define_singleton_method(rb_cLV, "encode", rb_iconv_encode, 2); }