/* * Copyright (c) 1999 G. Adam Stanislav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * sget.utf-8.c v.1 */ #include #define INVALID 0x80000000 #define get(c) c = *strptr++; \ if (chars) (*chars)++; \ if ((c) == 0) return (unsigned int)EOF unsigned int sgetu8(unsigned char *strptr, int *chars) { unsigned int c; int i, iterations; unsigned char ch; if (chars) *chars = 0; if (strptr == NULL) return (unsigned int)EOF; get(c); if ((c & 0xFE) == 0xFC) { c &= 0x01; iterations = 5; } else if ((c & 0xFC) == 0xF8) { c &= 0x03; iterations = 4; } else if ((c & 0xF8) == 0xF0) { c &= 0x07; iterations = 3; } else if ((c & 0xF0) == 0xE0) { c &= 0x0F; iterations = 2; } else if ((c & 0xE0) == 0xC0) { c &= 0x1F; iterations = 1; } else if ((c & 0x80) == 0x80) return INVALID; else return c; for (i = 0; i < iterations; i++) { get(ch); if ((ch & 0xC0) != 0x80) return INVALID; c <<= 6; c |= ch & 0x3F; } return c; }