/* * utf8conv version 1.1 The hex to UTF-8 Convertor * Copyright (C) 2001 FibreSpeed, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Contact the author, Michael T. Babcock at * or the Copyright holder, FibreSpeed Inc. at */ #include #include #include "debug.h" #include "unicode.h" #include "UnicodeMasks.h" int opt_quiet = 0, opt_showname = 0, opt_debug = 0, opt_raw = 0, opt_houtput = 0; int get_UTF8(const unsigned long val, char (*str)[]) { int bytes = 1, i = 0; unsigned char byte[10] = {0,0,0,0,0,0,0,0,0,0}; unsigned long bits = val; char *ucsname = NULL; if (opt_debug) { printf("val: "); print_long_in_binary(val); printf("\n"); } if (opt_showname) get_unicodename(val, &ucsname); if (val <= 0x7f) { byte[0] = (char)(val & M01111111); if (opt_debug) printf("UTF-8: "); printf("<%0X>", byte[0]); if (opt_raw) { printf("\t[ %c ]\n", byte[0]); } if (opt_houtput) { fprintf(stderr, "\t0x%0X", byte[0]); if (ucsname) fprintf(stderr, "\t/* %s */", ucsname); fprintf(stderr, "\n"); } return 0; } /* else: val > 0x7f */ bytes ++; if (val > 0x7ff) bytes ++; if (val > 0xffff) bytes ++; if (val > 0x1fffff) bytes ++; if (val > 0x3ffffff) { bytes ++; if (val > 0x7fffffff) bytes ++; UTF8_too_big(bytes); return 1; } /* Prepare LSBs */ for (i = 0; i < (bytes-1); i++) { byte[i] = (bits & M00111111) | M10000000; bits = bits >> 6; } switch (bytes) { case 2: byte[1] = (bits & M00011111) | M11000000; if (opt_debug) { print_byte_in_binary(byte[1]); printf(" "); print_byte_in_binary(byte[0]); printf("\n"); } break; case 3: byte[2] = (bits & M00001111) | M11100000; if (opt_debug) { print_byte_in_binary(byte[2]); printf(" "); print_byte_in_binary(byte[1]); printf(" "); print_byte_in_binary(byte[0]); printf("\n"); } break; case 4: byte[3] = (bits & M00000111) | M11110000; if (opt_debug) { print_byte_in_binary(byte[3]); printf(" "); print_byte_in_binary(byte[2]); printf(" "); print_byte_in_binary(byte[1]); printf(" "); print_byte_in_binary(byte[0]); printf("\n"); } break; case 5: byte[4] = (bits & M00000011) | M11111000; if (opt_debug) { print_byte_in_binary(byte[4]); printf(" "); print_byte_in_binary(byte[3]); printf(" "); print_byte_in_binary(byte[2]); printf(" "); print_byte_in_binary(byte[1]); printf(" "); print_byte_in_binary(byte[0]); printf("\n"); } break; default: printf("Too many bytes!\n"); return 2; } if (opt_debug) printf("UTF-8: "); if (opt_houtput) fprintf(stderr, "\t"); for (i = bytes-1; i>=0; i--) { printf("<%0X>", byte[i]); (*str)[bytes-i-1] = byte[i]; if (opt_houtput) { /* Need a ", " in between each */ fprintf(stderr, "0x%0X%s", byte[i], i>0?", ":""); } } (*str)[bytes] = 0; if (opt_raw) { printf("\t[ "); printf(*str); printf(" ]"); } if (opt_houtput) { if (ucsname || opt_raw) { fprintf(stderr, "\t/* "); if (opt_raw) { fprintf(stderr, "%s", *str); if (ucsname) fprintf(stderr, " "); } if (ucsname) fprintf(stderr, "%s", ucsname); fprintf(stderr, " */"); } fprintf(stderr, "\n"); } return 0; } void show_help() { printf(" -c C header output on stderr\n"); printf(" -h This help screen\n"); printf(" -q As quiet as possible\n"); printf(" -r Print raw Unicode bytes (not working)\n"); printf(" -s Show Unicode character names\n"); printf(" -v Be more verbose\n"); } int main(int argc, char *argv[]) { char buf[1025], *check = NULL; unsigned long val = 0; int i; for (i = 1; i < argc; i++) { if (argv[i][1] == 'h') { show_help(); return 0; } if (argv[i][1] == 'c') opt_houtput = 1; if (argv[i][1] == 'q') opt_quiet = 1; if (argv[i][1] == 'r') opt_raw = 1; if (argv[i][1] == 's') opt_showname = 1; if (argv[i][1] == 'v') opt_debug = 1; } if (!opt_quiet) { printf("Enter one 4 digit hex value per line.\n"); printf("Hit ENTER alone when finished\n"); } while (fgets(buf, 1024, stdin) && buf[0] != '\n') { char str[10]; val = strtoul(buf, &check, 16); if (check == '\0') { printf("Conversion failed.\n"); break; } if (opt_showname) { show_unicodename(val); } if (opt_houtput) fprintf(stderr, "char utf8chars[] = {\n"); if (get_UTF8(val, &str) == 0) { if (opt_raw) { // printf ("\tCharacter: %s", buf); } } if (opt_houtput) fprintf(stderr, "};\n"); printf("\n"); } return 0; }