/* Time-stamp: <2007-02-05 22:04:42 poser> * * This program is a filter that reverses its input character by character. * It works on both ASCII and UTF-8 Unicode. * * Copyright (C) 2007 William J. Poser (billposer@alum.mit.edu) * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License * as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * or go to the web page: http://www.gnu.org/licenses/gpl.txt. */ #include "config.h" #include #include #include #include #ifdef HAVE_LOCALE_H #include #endif #ifdef HAVE_LIBINTL_H #include #else #define gettext(x) (x) #endif char compdate[]="Compiled " __DATE__ " " __TIME__ ; char pgname[]="unirev"; void ShowUsage(void){ fprintf(stderr,"Read UTF-8 input line-by-line and emit reversed character-by-character.\n"); fprintf(stderr," -h Print help information.\n"); fprintf(stderr," -v Print version information.\n"); putc('\n',stderr); } void ShowVersion(void){ fprintf(stderr,"\n%s %s\n",pgname,PACKAGE_VERSION); fprintf(stderr,"%s\n",compdate); fprintf(stderr,"Copyright (C) 2007 William J. Poser\n"); fprintf(stderr,"This program is free software; you can redistribute it and/or modify\n"); fprintf(stderr,"it under the terms of version 2 of the GNU General Public License\n"); fprintf(stderr,"as published by the Free Software Foundation.\n"); fprintf(stderr,"Report bugs to: billposer@alum.mit.edu\n"); } static const char TrailingBytes[256] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; int main(int ac, char **av) { int len; int i; int j; int PreviousLeadByte; char *obuf; char *optr; int BytesInChar; char *ibuf; unsigned long LineCnt = 0L; char * GetLine(FILE *, int *); if (ac > 1) { if (av[1][0] == '-') { if(av[1][1] == 'v') {ShowVersion(); exit(1);} else if(av[1][1] == 'h') {ShowUsage(); exit(1);} else {fprintf(stderr,"Option %c not recognized.\n",av[1][1]); exit(2);} } ShowUsage();exit(1); } /* * The stratagey here is to work from the end of the line looking for * lead bytes. On finding one, we emit it plus the requisite number of * continuation bytes. */ while(1) { ibuf = GetLine(stdin,&len); if(len < 0) break; LineCnt++; if(len == 0) putchar('\n'); obuf = malloc((len + 1) * sizeof(char)); if(!obuf) { fprintf(stderr,"unirev: failed to allocate storage.\n"); exit(2); } optr = obuf; PreviousLeadByte = len; for (i = len-1; i >= 0; i--) { if (ibuf[i] >= 0) { *optr++ = ibuf[i]; /* ASCII character */ PreviousLeadByte = i; } else if (ibuf[i] & 0x40) { BytesInChar = 1 + (int) TrailingBytes[(unsigned char) ibuf[i]]; if (i + BytesInChar > PreviousLeadByte) { fprintf(stderr,"Truncated UTF-8 sequence at byte %d of line %lu\n",i+1,LineCnt); fprintf(stderr,"%d continuation bytes %s required but only %d %s present.\n", BytesInChar-1, (BytesInChar-1) >1?"are":"is", PreviousLeadByte-i-1, (PreviousLeadByte-i-i)>1?"are":"is"); exit(3); } PreviousLeadByte = i; for(j=0; j < BytesInChar; j++) *optr++ = ibuf[i+j]; } } *optr = '\0'; puts(obuf); free(obuf); free(ibuf); } exit(0); } /* * Read a line of arbitrary length from a file. * * Return a pointer to the null-terminated string allocated, or null on failure * to allocate sufficient storage. * It is the responsibility of the caller to free the space allocated. * * The length of the line is placed in the variable pointed to by * the second argument. (-1) is placed in this variable on EOF. */ #define INITLENGTH 32 char * GetLine(FILE *fp, int *LineLength) { char c; int Available; int CharsRead; char *Line; int BytesRead; Available = INITLENGTH; CharsRead=0; BytesRead=0; Line = (char *) malloc((size_t)Available); if(Line == (char *) 0) return (Line); while(1){ c=getc(fp); if(c == '\n'){ Line[CharsRead]='\0'; *LineLength=CharsRead; return(Line); } if(c == EOF){ Line[CharsRead]='\0'; if(BytesRead == 0) *LineLength = (-1); /* Signal EOF */ else *LineLength=CharsRead; return(Line); } BytesRead++; if(CharsRead == (Available-1)){ /* -1 because of null */ Available += INITLENGTH/2; Line = (char *) realloc( (void *) Line, (size_t) (Available * sizeof (char))); if(Line == (char *) 0) return(Line); } Line[CharsRead++]=c; } }