/* $Id: conv.c,v 1.7 2000/04/24 07:35:42 a Exp $ */
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <errno.h>
#include "jconv.h"
#define DEBUG_DO(x)
#undef MIN
#define MIN(x, y) ((x) < (y) ? (x) : (y))
/*
* Applies iconv() to the text in buffer and stores the result in
* *buffer_r. *buffer_r is newly allocated, and user is responsible
* to free() it.
*/
int
jconv_alloc_apply_iconv (iconv_t cd,
const char *buffer,
size_t len,
char **buffer_r,
size_t *len_r,
size_t *error_pos_r)
{
char *to;
const char *from_p, *from;
size_t from_len, to_len, to_alloc;
int error_code;
DEBUG_DO(printf ("jconv_alloc_apply_iconv\n"));
*buffer_r = NULL;
*len_r = 0;
*error_pos_r = 0;
from_p = from = buffer;
from_len = len;
to_alloc = 4096;
to = malloc(to_alloc);
if (to == NULL)
return errno;
to_len = 0;
for (;;) {
char *to_p;
const char *from_p_old;
size_t s, from_left, to_left;
if (to_alloc < to_len + 4096) {
to_alloc += 4096;
to = realloc(to, to_alloc);
if (to == NULL)
return errno;
}
from_left = MIN(from + from_len - from_p, 256);
to_p = to + to_len;
to_left = 4096;
error_code = 0;
from_p_old = from_p;
s = iconv(cd, &from_p, &from_left, &to_p, &to_left);
if (s == (size_t)-1)
error_code = errno;
switch (error_code) {
case 0:
if (from_p < from + from_len)
break;
/* write a reset sequence */
s = iconv(cd, NULL, NULL, &to_p, &to_left);
if (s == (size_t)-1)
error_code = errno;
to_len = to_p - to;
goto break_loop;
case EILSEQ:
goto break_loop;
case EINVAL:
if (from_p + from_left < from + from_len)
break;
goto break_loop;
case E2BIG:
case EBADF:
default:
abort();
}
if (from_p_old == from_p) {
/* I believe iconv() is buggy if we reach here.
* We stop calling iconv() and return E2BIG in
* order not to go into an infinite loop. */
error_code = E2BIG;
break;
}
to_len = to_p - to;
}
break_loop:
to = realloc(to, to_len + 1); /* truncate */
if (to == NULL)
return errno;
to[to_len] = 0;
if (error_code)
*error_pos_r = from_p - from;
*len_r = to_len;
*buffer_r = to;
return error_code;
}
/*
* Converts the text in src according to the specified codesets and
* stores the result in dest. The src_codesets are candidate names
* for codeset of src. At first, this function presumes src is coded in
* src_codesets[0] and tries to convert. If it failes, tries the next
* codeset, and so on. This function returns 0 if src is a valid text
* of one of specified codesets, and nonzero otherwise.
*/
int
jconv_alloc_conv (const char *src,
size_t src_len,
char **dest_r,
size_t *dest_len_r,
const char *const *src_codesets,
int num_src_codesets,
int *actual_codeset_r,
const char *dest_codeset)
{
int i;
char *new_buffer = NULL;
size_t new_buffer_len = 0, error_pos = 0;
int error_code = 0;
*dest_r = NULL;
*dest_len_r = 0;
*actual_codeset_r = num_src_codesets;
if (strcasecmp(dest_codeset, "SJIS") == 0)
dest_codeset = "Shift_JIS";
for (i = 0; i < num_src_codesets; i++) {
iconv_t cd;
const char *src_codeset = src_codesets[i];
if (strcasecmp(src_codeset, "SJIS") == 0)
src_codeset = "Shift_JIS";
DEBUG_DO(printf("jconv_alloc_conv: try %s\n", src_codeset));
cd = iconv_open(dest_codeset, src_codeset);
if (cd == (iconv_t)-1) {
/* EMFILE, ENFILE, ENOMEM, or EINVAL */
error_code = errno;
break;
}
error_code = jconv_alloc_apply_iconv (cd, src, src_len,
&new_buffer,
&new_buffer_len,
&error_pos);
/*
* Glibc don't reject paticular 8-bit strings when
* from_codeset is ISO-2022-JP. We reject them by
* hand.
*/
/******** DIRTY HACK ON ********/
if (error_code == 0 &&
strcasecmp(src_codesets[i], "ISO-2022-JP") == 0)
{
int j;
for (j = 0; j < src_len; j++) {
if (src[j] & 0x80) {
error_code = EILSEQ;
break;
}
}
}
/******** DIRTY HACK OFF ********/
iconv_close(cd);
if (error_code) {
if (new_buffer)
free(new_buffer);
new_buffer = NULL;
}
if (error_code != EILSEQ)
break;
}
if (num_src_codesets > 0 && i >= num_src_codesets)
i = num_src_codesets - 1;
DEBUG_DO(printf("FROM: %s\n", src));
DEBUG_DO(printf("CODESET: %s\n", src_codesets[i]));
*dest_r = new_buffer;
*dest_len_r = new_buffer_len;
*actual_codeset_r = i;
return error_code;
}
/*
* Converts the text in src according to the specified codesets and
* stores the result in dest. If dest_codeset is NULL, codeset of
* the current locale obtained by jconv_info_get_current_codeset() is
* used. The src_codesets are candidate names for codeset of src.
* If src_codesets is of zero-length, codeset names obtained by
* jconv_info_get_pref_codesets() are used. This function returns 0
* if src is a valid text of one of specified codesets, and nonzero
* otherwise.
*/
int
jconv_alloc_conv_autodetect (const char *src,
size_t src_len,
char **dest_r,
size_t *dest_len_r,
const char *const *src_codesets,
int num_src_codesets,
int *actual_codeset_r,
const char *dest_codeset)
{
if (dest_codeset == NULL)
dest_codeset = jconv_info_get_current_codeset();
if (num_src_codesets == 0)
src_codesets = jconv_info_get_pref_codesets(&num_src_codesets);
return jconv_alloc_conv(src, src_len, dest_r, dest_len_r,
src_codesets, num_src_codesets,
actual_codeset_r, dest_codeset);
}
/*
* Mostly same as jconv_alloc_conv_autodetect() except that src must be
* a null-terminated string, and that this function simply plays as
* strdup() if convertion is failed.
*/
char *
jconv_strdup_conv_autodetect (const char *src,
const char *dest_codeset,
const char *src_codeset,
...)
{
int n = 0, actual_codeset;
const char **cs = NULL;
va_list ap;
char *newstr;
size_t newstr_len;
int error_code;
if (src_codeset) {
cs = malloc(sizeof(*cs));
if (cs == NULL)
return NULL;
va_start(ap, src_codeset);
while (src_codeset) {
cs = realloc(cs, (n + 1) * sizeof(*cs));
if (cs == NULL) {
va_end(ap);
return NULL;
}
cs[n++] = src_codeset;
src_codeset = va_arg(ap, const char *);
}
va_end(ap);
}
error_code = jconv_alloc_conv_autodetect(src, strlen(src),
&newstr, &newstr_len,
cs, n,
&actual_codeset,
dest_codeset);
if (cs) free(cs);
if (error_code) {
if (newstr) free(newstr);
newstr = strdup(src);
}
return newstr;
}
char *
jconv_strdup_conv_fullauto (const char *src)
{
DEBUG_DO ({
int i;
for (i = 0; i <= strlen (src); i++) {
printf ("%02x ", ((unsigned char *)src)[i]);
}
printf ("\n");
})
return jconv_strdup_conv_autodetect (src, NULL, NULL);
}
syntax highlighted by Code2HTML, v. 0.9.1