/*
 * TEXTFILE - General functions to deal with (possibly nested) text files.
 *	      Doesn't assume any particular file syntax or grammar, except
 *	      for text_getline() and text_getparsedword(). Doesn't need its
 *	      input files to be seekable.
 *
 * Author:
 * Emile van Bergen, emile@evbergen.xs4all.nl
 *
 * Permission to redistribute an original or modified version of this program
 * in source, intermediate or object code form is hereby granted exclusively
 * under the terms of the GNU General Public License, version 2. Please see the
 * file COPYING for details, or refer to http://www.gnu.org/copyleft/gpl.html.
 *
 * History:
 * 2000/12/19 - EvB - Created
 * 2000/12/30 - EvB - Changed text_strtok into text_strtok3(), which allows 
 *		      you to specify the three sets of separation characters 
 *		      that are used resp. to discard, copy, and discard again 
 *		      individually.
 *		    - Added a simple parser in text_getparseditem().
 * 2001/01/08 - EvB - Added the TEXT_ versions of the peeking and counting
 *                    functions also available for ringbuffers.
 *                  - (should also rewrite the other 'upper side' functions, 
 *                    simplifying things by making use of the new scanning 
 *                    functions).
 * 2001/01/10 - EvB - Finally found the right way to do the parser, so
 *		      that it will always keep a correct line count.
 * 2001/06/25 - EvB - Killed stdio as we can write decimals ourselves now
 * 2001/07/03 - EvB - Added text_ensure and #if-ed out some unneeded code
 * 		      that should be rewritten using text_ensure when it's
 * 		      needed again.
 * 		    - Simplified str(c)spn and made it work with the new
 * 		      ring_read implementation.
 */

char textfile_id[] = "TEXTFILE - Copyright (C) 2000 Emile van Bergen.";


/*
 * INCLUDES & DEFINES
 */


#include <stdlib.h>	/* For malloc() / free() */
#include <string.h>	/* For strcpy() / strcat() */
#include <unistd.h>	/* For open() */
#include <fcntl.h>	/* For O_RDONLY */
#include <errno.h>	/* For errno */

#include <metatype.h>	/* For meta_ordtoa() */
#include <textfile.h>

#define DEBUGLEVEL 0	/* Emit code for DEBUGx calls up to 3 */
#include <debug.h>

/*
 * FUNCTIONS
 */


/* Creation / deletion */

TEXT *text_new(char *basepath, int maxlinelen)
{
	TEXT *ret;

	/* Allocate object */
	ret = (TEXT *)malloc(sizeof(TEXT));
	if (!ret) return 0;
	memset(ret, 0, sizeof(TEXT));

	/* Init nonzero members */
	ret->maxlinelen = maxlinelen;
	if (basepath) {
		ret->basepath = (char *)malloc(strlen(basepath) + 1);
		if (!ret->basepath) { free(ret); return 0; }
		strcpy(ret->basepath, basepath);
	}

	return ret;
}


void  text_del(TEXT *t)
{
	if (t) {
		/* End the files on the stack */
		while(t->f) text_endfile(t);

		/* Free the basepath */
		if (t->basepath) free(t->basepath);

		/* Free the text object itself */
		free(t);
	}
}


/* Lower side */

/* start with a new fd or file */

int text_include(TEXT *t, char *fname)
{
	int fd, err, ret;
	char *fullfname = 0;

	/* Check the non-object argument */
	if (!fname || !fname[0]) return -1;

	/* See if we have a relative filename, and a zero basepath
	   (which will make this fail) or a non-empty basepath. An empty
	   basepath causes cwd-relative file access. */
	if (fname[0] != '/' && (!t->basepath || t->basepath[0])) {

		/* No, so check if we have a basepath at all, if not, fail. */
		if (!t->basepath) return -1;

		/* Otherwise, allocate a temporary fullfname and 
		   concatenate basepath and fname into it */
		fullfname = (char *)malloc(strlen(t->basepath) +
					   strlen(fname) + 2);
		if (!fullfname) return -1;
		strcpy(fullfname, t->basepath);
		strcat(fullfname, "/");
		strcat(fullfname, fname);

		/* Try to open fullfname, save errno */
		fd = open(fullfname, O_RDONLY); err = errno;
		msg(F_TEXT, L_DEBUG, "text_include: opening %s => %s: returned "
				     "%d\n", fname, fullfname, fd);
	}
	else {

		/* Try to open fname, save errno */
		fd = open(fname, O_RDONLY); err = errno;
		msg(F_TEXT, L_DEBUG, "text_include: opening %s: returned %d\n",
		    fname, fd);
	}

	/* Include the file only if the open was successful. */
	ret = (fd == -1) ? -1 :
	      text_include_fd(t, fd, 1, fullfname ? fullfname : fname);

	if (fullfname) free(fullfname);
	errno = err;
	return ret;
}


int text_include_fd(TEXT *t, int fd, int closeatend, char *infofname)
{
	TEXT_FILE *f;

	/* Allocate object */
	f = (TEXT_FILE *)malloc(sizeof(TEXT_FILE));
	if (!f) goto text_inc_err_close;
	memset(f, 0, sizeof(TEXT_FILE));

	/* Allocate ringbuffer */
	f->r = ring_new(t->maxlinelen + 4);	/* \r\n + 0 + guard byte */
	if (!f->r) goto text_inc_err_freeobj;

	/* Init other members */
	f->fd = fd;
	f->closeatend = closeatend;
	f->linenr = 1;

	/* Set the informational filename */
	if (infofname) {
		strncpy(f->fname, infofname, sizeof(f->fname) - 1);
		f->fname[sizeof(f->fname) - 1] = 0;
	}
	else if (sizeof(f->fname) > 20) {
		strncpy(f->fname, "/dev/fd/", sizeof(f->fname) - 1);
		meta_ordtoa(f->fname + 8, sizeof(f->fname) - 9, 0, 10, fd);
		f->fname[sizeof(f->fname) - 1] = 0;
	}

	/* Save the file that's currently on top and put this one there */
	f->next = t->f;
	t->f = f;

	return 0;

text_inc_err_freeobj:
	free(f);
text_inc_err_close:
	if (closeatend) close(fd);
	return -1;
}


/* ending the current file */

int text_endfile(TEXT *t)
{
	TEXT_FILE *f;

	if (t->f) {
		/* Get previous item from the one at the top */
		f = t->f->next;	

		/* Free the ringbuffer */
		ring_del(t->f->r);

		/* Close the file if that was requested */
		if (t->f->closeatend) {
			close(t->f->fd);
		}

		/* Free the object itself */
		free(t->f);

		/* Make the saved previous item the current one */
		t->f = f;

		/* Return 1 if we still have a file, 0 otherwise */
		return t->f != 0;
	}

	/* Return -1, saying we didn't have a file in the first place */
	return -1;
}


/* this makes sure we've at least tried to make a number of bytes available.
   If mincnt is zero, will read as much as we can regardless of what we have.
   Only reads if t->f->status is RING_OK or RING_EAGAIN. Returns number of 
   bytes that are available now, possibly after a read */

ssize_t text_ensure(TEXT *t, ssize_t mincnt, ssize_t *added)
{
	ssize_t ret;

	/* return if we already have at least mincnt bytes available or
	   we've already found that further reading won't do any good. */

	ret = ring_maxget(t->f->r); 
	if ((mincnt > 0 && ret >= mincnt) ||
	    (t->f->status != RING_OK && t->f->status != RING_EAGAIN)) {
		if (added) *added = 0;
		return ret;
	    }

	/* no, so try to add as much as we can and return what we now have */
	t->f->status |= ring_read(t->f->r, t->f->fd, added);
	return ring_maxget(t->f->r);
}


#if 0

/* This is all fine working code (except for the changed ring_read interface),
   but we're not using it at all, so it's left out. Things should be rewritten
   using text_ensure anyway, when needed again. */


/* Upper side */

/* get a single character, returns character, -2 on EOF, or -1 on error. */

int text_getchar(TEXT *t)
{
	int c;

	/* See if there is a char available and if so, return it */
	c = ring_peek(t->f->r, 0);
	if (c != -1) { ring_discard(t->f->r, 1); return c; }

	/* Nothing there, so fill the ring */
	c = ring_read(t->f->r, t->f->fd, 0, 0); 
	if (c != -1) {

		/* No read error, see if we have it now and if so, return it */
		c = ring_peek(t->f->r, 0);
		if (c != -1) { ring_discard(t->f->r, 1); return c; }

		/* Still nothing, so EOF. */
		return TEXT_EOF;
	}

	/* Read error */
	return TEXT_IOERR;
}


/* get a number of characters, returns -2 on EOF or -1 on error. */

ssize_t text_getdata(TEXT *t, char *buf, ssize_t len)
{
	ssize_t ret;

	/* See if we already have the requested amount */
	if (ring_maxget(t->f->r) < len) {

		/* We don't, so try to fill the ring, return -1 if it fails */
		if (ring_read(t->f->r, t->f->fd, 0, 0) == -1) return TEXT_IOERR;
	}

	/* Get it from the ring, possibly less than requested if we're getting
           at the EOF. */
	ret = ring_get(t->f->r, buf, len);

	/* Return number of characters obtained or EOF if zero */
	return ret > 0 ? ret : TEXT_EOF;
}


/* get a line, terminated with \n, \r, \r\n or \n\r. Always gets full lines,
   truncating if needed. Returns the non-truncated length of the line, -2 on 
   EOF or -1 on error. Removes the line ending character(s) from the ring, but 
   keeps empty lines intact. Puts a trailing 0 at the end of the line. */

int text_getline(TEXT *t, char *line, ssize_t maxlen)
{
	ssize_t avail, len, disc;

	/* See if we already have a full line. */
	avail = ring_maxget(t->f->r);
	len = ring_strcspn(t->f->r, "\r\n", 2);
	if (len >= avail - 1) {  
	
		/* We don't, so try to fill the ring, return -1 if it fails */
		if (ring_read(t->f->r, t->f->fd, 0, 0) == -1) return TEXT_IOERR;
	
		/* Look again at what we have, return EOF if buffer empty */
		avail = ring_maxget(t->f->r);
		if (!avail) return TEXT_EOF;

		len = ring_strcspn(t->f->r, "\r\n", 2);
	}

	/* We've found the end of a line, so increment the line number */
	t->f->linenr++;

	/* See if what we have plus a trailing 0 fits in the buffer */
	if (len <= maxlen - 1) {

		/* It fits, so copy all bytes */
		ring_get(t->f->r, line, len);

		/* Add a zero */
		line[len] = 0;
	}
	else {

		/* It doesn't fit, so first copy maxlen - 1 bytes */
		ring_get(t->f->r, line, maxlen - 1);

		/* Add a zero */
		line[maxlen - 1] = 0;

		/* And discard the rest */
		ring_discard(t->f->r, len - (maxlen - 1));
	}

	/* See if we need to discard any extra end-of-line combination
	   characters in addition to the one that ended the line */
	disc = 1 + ((ring_peek(t->f->r, 0) == '\r' && 
		     ring_peek(t->f->r, 1) == '\n') || 
		    (ring_peek(t->f->r, 0) == '\n' && 
		     ring_peek(t->f->r, 1) == '\r'));

	/* Discard the end-of-line character(s) */
	ring_discard(t->f->r, disc);

	/* Return the length that's put in the buffer */
	return len;
}


/* get a sequence of characters, separated by any of the characters in the
   given set. Removes all consecutive separation characters, so skips empty 
   lines or words. Truncates when needed. Returns the original length of the 
   word or -1 on EOF. The set of separation characters can be different when
   removing the any leading separation characters, when copying a segment and
   when removing anything up to the trailing separation characters. */

int text_strtok3(TEXT *t, char *string, ssize_t maxlen, 
		 char *sepchars1, char *sepchars2, char *sepchars3)
{
	ssize_t avail, len, disc;

	/* Discard any initial separation characters */
	disc = ring_strspn(t->f->r, sepchars1, strlen(sepchars1));
	if (disc) ring_discard(t->f->r, disc);

	/* See if we already have a full segment (left). */
	avail = ring_maxget(t->f->r);
	len = ring_strcspn(t->f->r, sepchars2, strlen(sepchars2));
	DEBUG3("text_strtok 1: avail=%d, len=%d\n", avail, len);

	if (len >= avail - 1) {  
	
		/* We don't, so fill up the ring, return -1 if it fails */
		if (ring_read(t->f->r, t->f->fd, 0, 0) == -1) return TEXT_IOERR;

		/* Discard any initial separation characters */
		disc = ring_strspn(t->f->r, sepchars1, strlen(sepchars1));
		if (disc) ring_discard(t->f->r, disc);

		/* Look again at what we have, return EOF if buffer empty */
		avail = ring_maxget(t->f->r);
		if (!avail) return TEXT_EOF;

		len = ring_strcspn(t->f->r, sepchars2, strlen(sepchars2));
		DEBUG3("text_strtok 2: avail=%d, len=%d\n", avail, len);
	}

	/* See if what we have plus a trailing 0 fits in the buffer */
	if (len <= maxlen - 1) {

		/* It fits, so copy all bytes */
		ring_get(t->f->r, string, len);

		/* Add a zero */
		string[len] = 0;
	}
	else {

		/* It doesn't fit, so first copy maxlen - 1 bytes */
		ring_get(t->f->r, string, maxlen - 1);

		/* Add a zero */
		string[maxlen - 1] = 0;

		/* And discard the rest */
		ring_discard(t->f->r, len - (maxlen - 1));
	}

	/* Discard up to the trailing separation character(s) - they
	   will be removed by the initial discard at the next call. */
	disc = ring_strcspn(t->f->r, sepchars3, strlen(sepchars3));
	if (disc) ring_discard(t->f->r, disc);

	/* Return the length that's put in the buffer */
	return len;
}

#endif


/* Peeking / scanning functions. They do the same as their corresponding ring 
   buffer counterparts, except they try to add to the buffer from the file 
   when reaching the end of the available segment, and retry the operation. */

/* peek ahead at a certain position */


#if 1	/* new, simpler implementation using text_ensure() */

#if 0	/* just a macro in the header now */
int text_peek(TEXT *t, ssize_t pos)
{
	text_ensure(t, pos + 1, 0); 
	return ring_peek(t->f->r, pos);
}
#endif

#else


int text_peek(TEXT *t, ssize_t pos)
{
	int ret;

	/* Check if we are even theoretically able to answer this */
	if (pos < 0 || pos >= ring_capacity(t->f->r)) return TEXT_ELEMTOOBIG;

	/* Do the peek, return if we already got something */
	ret = ring_peek(t->f->r, pos);
	if (ret != -1) return ret;

	/* We tried to peek beyond what we have available. Read as much as
	   we can, return if an error occurred */
	if (ring_read(t->f->r, t->f->fd, 0, 0) == -1) return TEXT_IOERR;

	/* Peek again, return if we got something this time */
	ret = ring_peek(t->f->r, pos);
	if (ret != -1) return ret;

	/* If we still haven't got anything and we peeked at position 0,
	   we know we're at the end of the file. */
	if (pos == 0) return TEXT_EOF;

	/* Otherwise, we can only say we looked beyond the end of the buffer,
	   due to a coming EOF. */
	return TEXT_ALMOSTEOF;
}
#endif


/* peek at a number of bytes - returns number of bytes copied or error (<0) */

#if 0	/* just a macro in the header now */
ssize_t text_peekdata(TEXT *t, char *buf, ssize_t buflen)
{
	text_ensure(t, buflen, 0); 
	return ring_peekdata(t->f->r, buf, buflen);
}
#endif


/* scan for a segment containing none of or only a list of characters. */

#if 1		/* new implementations */


ssize_t text_strcspn(TEXT *t, char *s, int slen, int *endchar)
{
	ssize_t ret, n;

	/* Give it a first shot and return if we're already satisfied */
	ret = ring_strcspn(t->f->r, s, slen);
	if (ret < ring_maxget(t->f->r)) {
		if (endchar) *endchar = ring_peek(t->f->r, ret);
		return ret;
	}

	/* We aren't, so read as much as we can, return if we didn't get any */
	text_ensure(t, 0, &n); 
	if (!n) {
		if (endchar) *endchar = ring_peek(t->f->r, ret);
		return ret;
	}

	/* We got some more data, so retry */
	ret = ring_strcspn(t->f->r, s, slen);

	if (endchar) *endchar = ring_peek(t->f->r, ret);
	return ret;
}


ssize_t text_strspn(TEXT *t, char *s, int slen, int *endchar)
{
	ssize_t ret, n;

	/* Give it a first shot and return if we're already satisfied */
	ret = ring_strspn(t->f->r, s, slen);
	if (ret < ring_maxget(t->f->r)) {
		if (endchar) *endchar = ring_peek(t->f->r, ret);
		return ret;
	}

	/* We aren't, so read as much as we can, return if we didn't get any */
	text_ensure(t, 0, &n); 
	if (!n) {
		if (endchar) *endchar = ring_peek(t->f->r, ret);
		return ret;
	}

	/* We got some more data, so retry */
	ret = ring_strspn(t->f->r, s, slen);

	if (endchar) *endchar = ring_peek(t->f->r, ret);
	return ret;
}


#else


ssize_t text_strcspn(TEXT *t, char *s, int slen, int *endchar)
{
	ssize_t ret;

	/* Do the scan */
	ret = ring_strcspn(t->f->r, s, slen);

	/* See if we spanned the whole ring capacity */
	if (ret == ring_capacity(t->f->r)) {

		/* Yes, set the endchar to ELEMTOOBIG and return */
		if (endchar) *endchar = TEXT_ELEMTOOBIG;
		return ret;
	}

	/* See if we spanned all available bytes */
	if (ret == ring_maxget(t->f->r)) {

		/* Yes, so try to read some more, checking for errors */
		if (ring_read(t->f->r, t->f->fd, 0, 0) == -1) {

			/* We have an error, set endchar to -errno and return */
			if (endchar) *endchar = -errno;
			return TEXT_IOERR;
		}

		/* Redo the scan */
		ret = ring_strcspn(t->f->r, s, slen);

		/* See if we still span the (new) number of available bytes */
		if (ret == ring_maxget(t->f->r)) {

			/* We do. See if the spanned count is zero, then	
			   set the end-char to EOF, otherwise ALMOSTEOF */
			if (ret == 0) {
				if (endchar) *endchar = TEXT_EOF;
			}
			else {
				if (endchar) *endchar = TEXT_ALMOSTEOF;
			}

			/* And return */
			return ret;
		}
	}

	/* We now know there is a character after the spanned segment - put it 
	   in endchar. */
	if (endchar) *endchar = ring_peek(t->f->r, ret);

	/* Return the spanned amount of bytes */
	return ret;
}



/* Idem, but now scan for a segment containing only the specified characters. */

ssize_t text_strspn(TEXT *t, char *s, int slen, int *endchar)
{
	ssize_t ret;

	ret = ring_strspn(t->f->r, s, slen);

	if (ret == ring_capacity(t->f->r)) {
		if (endchar) *endchar = TEXT_ELEMTOOBIG;
		return ret;
	}

	if (ret == ring_maxget(t->f->r)) {
		if (ring_read(t->f->r, t->f->fd, 0, 0) == -1) {
			if (endchar) *endchar = -errno;
			return TEXT_IOERR;
		}

		ret = ring_strspn(t->f->r, s, slen);

		if (ret == ring_maxget(t->f->r)) {
			if (ret == 0) {
				if (endchar) *endchar = TEXT_EOF;
			}
			else {
				if (endchar) *endchar = TEXT_ALMOSTEOF;
			}
			return ret;
		}
	}

	if (endchar) *endchar = ring_peek(t->f->r, ret);
	return ret;
}

#endif


/*
 * Parser. Warning: flexible but hairy.
 */


/* States */

#define PS_NORMAL		1
#define PS_ESCAPED		2
#define PS_QUOTED		3
#define PS_ESCQUOTED		4
#define PS_COMMENT		5
#define PS_EOLCOMMENTEND	6
#define PS_OUTSIDE		7
#define PS_EOLOUTSIDE		8
#define PS_RETURN		9

/* Line ending states */

#define LE_NORMAL		0
#define LE_GOTCR		'\r'
#define LE_GOTLF		'\n'


/* Max. number of bytes in scan set that may occur */

#define MAX_SCANCNT		8


/* Readability improvements (for the function, not the macros themselves ;-) */

#define SCAN_RST()							\
	(scancnt = 0)

#define SCAN_ADD(meta)							\
	((void)								\
	 ((meta) >= 0 && scancnt < MAX_SCANCNT && 			\
          (curscan[scancnt++] = (meta))))

#define SCAN_ENDSAT(meta)						\
	((meta) != TEXT_META_NONE && 					\
	 (c == (meta) || ((c == '\n' || c == '\r') &&			\
          (meta) == TEXT_META_EOL)))


/* The function */

int text_getparseditem(TEXT *t, char *string, ssize_t maxlen, 
		       int escape, int quotestart, int quoteend,
		       int commentstart, int commentend, 
		       int itemend, int itemstart)
{
	ssize_t len, copylen, disclen, scannedlen;
	int ret, state, lestate, scancnt;
	char curscan[MAX_SCANCNT];
	int c;

	/* Set initial state: inside item, no line ending characters found */
	state = PS_NORMAL; lestate = LE_NORMAL;

	/* Scantype: scan until line end, escape, starting quote and item end */
	SCAN_RST(); SCAN_ADD('\r'); SCAN_ADD('\n');
	SCAN_ADD(escape); SCAN_ADD(quotestart); 
	SCAN_ADD(itemend); SCAN_ADD(commentstart);

	/* Reset returned item length and the nr. of bytes to copy / discard */
	ret = 0; copylen = 0; disclen = 0;

	for(;;) {

		/* Copy, if needed */
		if (copylen) {

			/* See if all bytes to copy fit in the output buffer 
			   we have left, reserving room for a trailing zero. */
			if (copylen <= maxlen - 1) {

				/* It does, so copy all bytes we want. */
				len = ring_get(t->f->r, string, copylen);

				/* Advance the output pointer and decrease 
				   maxlen, to accomodate a next segment. */
				string += len;
				maxlen -= len;
			}
			else {

				/* It doesn't fit, so copy maxlen - 1 bytes */
				len = ring_get(t->f->r, string, maxlen - 1);

				/* And discard the rest */
			        ring_discard(t->f->r, copylen - (maxlen-1));

				/* Advance the output pointer and decrease 
				   maxlen, to accomodate a next segment. */
				string += len;
				maxlen -= len;
			}

			/* Add the original, non-truncated segment length to 
			   the returned word len. */
 			ret += copylen;
		}

		/* Discard a requested amount of bytes, if any */
		if (disclen) { ring_discard(t->f->r, disclen); }

		/*
		 * Scan the segment and put the char that stopped us in c.
		 * If we didn't put any characters in the set, we only
		 * fill c with the first character available. Handy for esc.
		 */

		if (scancnt) 
			scannedlen = text_strcspn(t, curscan, scancnt, &c);
		else
			scannedlen = 0, c = text_peek(t, 0);

		/* Show some debugging info */
		D1(msg(F_TEXT, LOG_DEBUG, "text_getparseditem: c/d %d/%d, state"
					  "%d, found %d bytes up to %s -> %c "
					  "(%d)\n", 
		       copylen, disclen, state, scannedlen, scancnt ? 
				     dbg_cvtstr(curscan, scancnt) : "anything", 
		       c > 31 && c < 127 ? c : '.', c));

		/* First handle the line counter in a bit compact manner. */
		if (c == '\r' || c == '\n') {
			if (lestate == LE_NORMAL) lestate = c, t->f->linenr++;
			else if (lestate == c) t->f->linenr++;
			else lestate = LE_NORMAL;
		}
		else lestate = LE_NORMAL;

		/* Now see what we have to do with the character we got,
		   depending on the state we're in. The order of tests
		   _is_ important and is even different between states (!).
		   When continuing the loop, we can set copylen to the
		   segment we want copied and set disclen to the the number 
		   of bytes that must be discarded after that. */

		switch(state) {

		  case PS_NORMAL:		/* Scan ended in state Normal */
	
			/* Test for 0. EOF, 1. esc, 2. quote start, 3. comment 
			   start, 4. item end, 5. no-op things (like EOL). */
			if (c == -1) {
				copylen = scannedlen;
				disclen = 0;
				SCAN_RST();
				state = PS_RETURN;
			}
			else if (SCAN_ENDSAT(escape)) {
				copylen = scannedlen;
				disclen = 1;
				SCAN_RST();
				state = PS_ESCAPED;
			}
			else if (SCAN_ENDSAT(quotestart)) {
				copylen = scannedlen;
				disclen = 1;
				SCAN_RST(); SCAN_ADD('\r'); SCAN_ADD('\n');
				SCAN_ADD(escape); SCAN_ADD(quoteend);
				state = PS_QUOTED;
			}
			else if (SCAN_ENDSAT(commentstart)) {
				copylen = scannedlen;
				disclen = 1;
				SCAN_RST(); SCAN_ADD('\r'); SCAN_ADD('\n');
				SCAN_ADD(commentend);
				state = PS_COMMENT;
			}
			else if (SCAN_ENDSAT(itemend)) {
				copylen = scannedlen;

				/* Discard only if itemend is not itemstart */
				disclen = ! SCAN_ENDSAT(itemstart);

				/* See if the item end is also an EOL char */
				if (SCAN_ENDSAT(TEXT_META_EOL)) {

					/* Yes, go through eol-outside */
					SCAN_RST(); state = PS_EOLOUTSIDE;
				}
				else {
					/* No, go straight outside */
					SCAN_RST(); SCAN_ADD('\r'); 
					SCAN_ADD('\n'); SCAN_ADD(itemstart);
					state = PS_OUTSIDE;
				}
			}
			else {
				/* Scan was ended by something uninteresting.
				   Copy and continue. */
				copylen = scannedlen + 1;
				disclen = 0;
			}

			break;

		  case PS_ESCAPED:		/* Scan ended while Escaped */
		  case PS_ESCQUOTED:		/* or quoted and escaped */
			copylen = 1;
			disclen = 0;

			/* See if the escaped char is a first line ending 
			   char, that set lestate to LE_GOTCR or LE_GOTLF. */
			if (lestate == LE_NORMAL) {

				/* No, so go back to the previous state. */
				SCAN_RST(); SCAN_ADD('\r'); SCAN_ADD('\n');
				SCAN_ADD(escape); SCAN_ADD(quotestart); 
				SCAN_ADD(itemend); SCAN_ADD(commentstart);
				if (state == PS_ESCAPED) state = PS_NORMAL;
				else state = PS_QUOTED;
			}
			break;

		  case PS_QUOTED:		/* Scan ended in state Quoted */

			/* Test for 0. eof, 1. esc, 2. quote end, 3. no-op */
			if (c == -1) {
				copylen = scannedlen;
				disclen = 0;
				SCAN_RST();
				state = PS_RETURN;
			}
			else if (SCAN_ENDSAT(escape)) {
				copylen = scannedlen;
				disclen = 1;
				SCAN_RST();
				state = PS_ESCQUOTED;
			}
			else if (SCAN_ENDSAT(quoteend)) {
				copylen = scannedlen;
				disclen = 1;
				SCAN_RST(); SCAN_ADD('\r'); SCAN_ADD('\n');
				SCAN_ADD(escape); SCAN_ADD(quotestart); 
				SCAN_ADD(itemend); SCAN_ADD(commentstart);
				state = PS_NORMAL;
			}
			else {
				/* No-op (EOL). Include in copied string. */
				copylen = scannedlen + 1;
				disclen = 0;
			}
			break;

		  case PS_COMMENT:		/* Scan ended in Comment */
			
			/* Test for 0. eof, 1. comment end, which may also be
			   EOL, in which case we may need to discard 2 
			   commentend characters, and which may also be itemend,
			   in which case we want to go straight outside, or
			   if itemend is also EOL, to eol-outside.
			   Test also for 2. no-op things, like EOL. */

			if (c == -1) {
				copylen = 0;
				disclen = scannedlen;
				SCAN_RST();
				state = PS_RETURN;
			}
			else if (SCAN_ENDSAT(commentend)) {

				copylen = 0;
				disclen = scannedlen;

				/* Done with the comment, test for 1. itemend
				   (and possibly EOL), 2. EOL alone, 3.
				   anything else, which must be discarded. */

				if (SCAN_ENDSAT(itemend)) {

					/* Disc. itemend only if not itemstrt */
					disclen += ! SCAN_ENDSAT(itemstart);

					/* See if itemend is also an EOL char */
					if (SCAN_ENDSAT(TEXT_META_EOL)) {

						/* Yes, go to eol-outside */
						SCAN_RST(); 
						state = PS_EOLOUTSIDE;
					}
					else {
						/* No, go straight outside */
						SCAN_RST(); SCAN_ADD('\r'); 
						SCAN_ADD('\n'); 
						SCAN_ADD(itemstart);
						state = PS_OUTSIDE;
					}
				}
				else if (SCAN_ENDSAT(TEXT_META_EOL)) {

					/* Go through eol-commentend */
					disclen++;
					SCAN_RST(); state = PS_EOLCOMMENTEND;
				}
				else {
					/* Comment ended by a normal char.
					   Add to the discard count and go
					   back to normal state. */
					disclen++;
					SCAN_RST(); SCAN_ADD('\r'); 
					SCAN_ADD('\n'); SCAN_ADD(escape); 
					SCAN_ADD(quotestart); SCAN_ADD(itemend);
					SCAN_ADD(commentstart);
					state = PS_NORMAL;
				}
			}
			else {
				/* Scan ended by something uninteresting.
				   Discard that char with the rest. */
				copylen = 0;
				disclen = scannedlen + 1;
			}
			break;

		  case PS_EOLCOMMENTEND:	/* Scan ended in EOL-cmntend */
			/* If we got the second char of an-EOL, discard it */
			copylen = 0;
			disclen = SCAN_ENDSAT(TEXT_META_EOL);

			SCAN_RST(); SCAN_ADD('\r'); SCAN_ADD('\n'); 
			SCAN_ADD(escape); SCAN_ADD(quotestart); 
			SCAN_ADD(itemend); SCAN_ADD(commentstart);
			state = PS_NORMAL;
			break;

		  case PS_EOLOUTSIDE:		/* Scan ended in EOL-outside */

			/* If we got the second char of an-EOL, discard it */
			copylen = 0;
			disclen = SCAN_ENDSAT(TEXT_META_EOL);

			SCAN_RST(); SCAN_ADD('\r'); SCAN_ADD('\n'); 
			SCAN_ADD(itemstart);
			state = PS_OUTSIDE;
			break;

		  case PS_OUTSIDE:		/* Scan ended while Outside */

			/* Test for 0. eof, 1. itemstart, 2. for no-op */
			if (c == -1) {
				copylen = 0;
				disclen = scannedlen;
				SCAN_RST();
				state = PS_RETURN;
			}
			else if (SCAN_ENDSAT(itemstart)) {

				copylen = 0;
				disclen = scannedlen + 1;
				SCAN_RST();
				state = PS_RETURN;
			}
			else {
				copylen = 0;
				disclen = scannedlen + 1;
			}
			break;

		  case PS_RETURN:		/* We're completely done. */
			*string = 0;
			if (ret == 0 && c < 0) ret = c;
			D1(msg(F_TEXT, L_DEBUG, "text_getparseditem: returning "
						"%d.\n\n", ret));
			return ret;
			break;
		}
	}
}



syntax highlighted by Code2HTML, v. 0.9.1