D			[0-9]
L			[a-zA-Z_]
H			[a-fA-F0-9]
E			[Ee][+-]?{D}+
FS			(f|F|l|L)
IS			(u|U|l|L)*


%{

/* this scanner sourced from: http://www.lysator.liu.se/c/ANSI-C-grammar-l.html */

void count();
int yylineno = 0;
#include <stdio.h>
#include <string.h>
#include "Python.h"
#define YYSTYPE void *
#include "tokens.h"
extern void *py_parser;
extern void (*py_input)(PyObject *parser, char *buf, int *result, int max_size);
#define returntoken(tok) /*printf("%d=%s\n", tok, yytext);*/ yylval = PyString_FromString(strdup(yytext)); return (tok);
#define YY_INPUT(buf,result,max_size) { (*py_input)(py_parser, buf, &result, max_size); }

%}


%%
"/*"			{ comment(); }

"auto"			{ count(); returntoken(AUTO); }
"break"			{ count(); returntoken(BREAK); }
"case"			{ count(); returntoken(CASE); }
"char"			{ count(); returntoken(CHAR); }
"const"			{ count(); returntoken(CONST); }
"continue"		{ count(); returntoken(CONTINUE); }
"default"		{ count(); returntoken(DEFAULT); }
"do"			{ count(); returntoken(DO); }
"double"		{ count(); returntoken(DOUBLE); }
"else"			{ count(); returntoken(ELSE); }
"enum"			{ count(); returntoken(ENUM); }
"extern"		{ count(); returntoken(EXTERN); }
"float"			{ count(); returntoken(FLOAT); }
"for"			{ count(); returntoken(FOR); }
"goto"			{ count(); returntoken(GOTO); }
"if"			{ count(); returntoken(IF); }
"int"			{ count(); returntoken(INT); }
"long"			{ count(); returntoken(LONG); }
"register"		{ count(); returntoken(REGISTER); }
"return"		{ count(); returntoken(RETURN); }
"short"			{ count(); returntoken(SHORT); }
"signed"		{ count(); returntoken(SIGNED); }
"sizeof"		{ count(); returntoken(SIZEOF); }
"static"		{ count(); returntoken(STATIC); }
"struct"		{ count(); returntoken(STRUCT); }
"switch"		{ count(); returntoken(SWITCH); }
"typedef"		{ count(); returntoken(TYPEDEF); }
"union"			{ count(); returntoken(UNION); }
"unsigned"		{ count(); returntoken(UNSIGNED); }
"void"			{ count(); returntoken(VOID); }
"volatile"		{ count(); returntoken(VOLATILE); }
"while"			{ count(); returntoken(WHILE); }

{L}({L}|{D})*		{ count(); returntoken(check_type()); }

0[xX]{H}+{IS}?		{ count(); returntoken(CONSTANT); }
0{D}+{IS}?		{ count(); returntoken(CONSTANT); }
{D}+{IS}?		{ count(); returntoken(CONSTANT); }
L?'(\\.|[^\\'])+'	{ count(); returntoken(CONSTANT); }

{D}+{E}{FS}?		{ count(); returntoken(CONSTANT); }
{D}*"."{D}+({E})?{FS}?	{ count(); returntoken(CONSTANT); }
{D}+"."{D}*({E})?{FS}?	{ count(); returntoken(CONSTANT); }

L?\"(\\.|[^\\"])*\"	{ count(); returntoken(STRING_LITERAL); }

"..."			{ count(); returntoken(ELLIPSIS); }
">>="			{ count(); returntoken(RIGHT_ASSIGN); }
"<<="			{ count(); returntoken(LEFT_ASSIGN); }
"+="			{ count(); returntoken(ADD_ASSIGN); }
"-="			{ count(); returntoken(SUB_ASSIGN); }
"*="			{ count(); returntoken(MUL_ASSIGN); }
"/="			{ count(); returntoken(DIV_ASSIGN); }
"%="			{ count(); returntoken(MOD_ASSIGN); }
"&="			{ count(); returntoken(AND_ASSIGN); }
"^="			{ count(); returntoken(XOR_ASSIGN); }
"|="			{ count(); returntoken(OR_ASSIGN); }
">>"			{ count(); returntoken(RIGHT_OP); }
"<<"			{ count(); returntoken(LEFT_OP); }
"++"			{ count(); returntoken(INC_OP); }
"--"			{ count(); returntoken(DEC_OP); }
"->"			{ count(); returntoken(PTR_OP); }
"&&"			{ count(); returntoken(BOOL_AND_OP); }
"||"			{ count(); returntoken(BOOL_OR_OP); }
"<="			{ count(); returntoken(LE_OP); }
">="			{ count(); returntoken(GE_OP); }
"=="			{ count(); returntoken(EQ_OP); }
"!="			{ count(); returntoken(NE_OP); }
";"			{ count(); returntoken(SEMICOLON); }
("{"|"<%")		{ count(); returntoken(LBRACE); }
("}"|"%>")		{ count(); returntoken(RBRACE); }
","			{ count(); returntoken(COMMA); }
":"			{ count(); returntoken(COLON); }
"="			{ count(); returntoken(ASSIGN); }
"("			{ count(); returntoken(LPAREN); }
")"			{ count(); returntoken(RPAREN); }
("["|"<:")		{ count(); returntoken(LBRACKET); }
("]"|":>")		{ count(); returntoken(RBRACKET); }
"."			{ count(); returntoken(PERIOD); }
"&"			{ count(); returntoken(AND_OP); }
"!"			{ count(); returntoken(BANG); }
"~"			{ count(); returntoken(TILDE); }
"-"			{ count(); returntoken(MINUS); }
"+"			{ count(); returntoken(PLUS); }
"*"			{ count(); returntoken(STAR); }
"/"			{ count(); returntoken(SLASH); }
"%"			{ count(); returntoken(PERCENT); }
"<"			{ count(); returntoken(LT_OP); }
">"			{ count(); returntoken(GT_OP); }
"^"			{ count(); returntoken(CIRCUMFLEX); }
"|"			{ count(); returntoken(OR_OP); }
"?"			{ count(); returntoken(QUESTIONMARK); }

[ \t\v\n\f]		{ count(); }
.			{ /* ignore bad characters */ }

%%

yywrap()
{
	return(1);
}


comment()
{
	char c, c1;

loop:
	while ((c = input()) != '*' && c != 0)
      /*putchar(c)*/;

	if ((c1 = input()) != '/' && c != 0)
	{
		unput(c1);
		goto loop;
	}

	if (c != 0)
      /*putchar(c1)*/;
}


int column = 0;

void count()
{
	int i;

	for (i = 0; yytext[i] != '\0'; i++)
		if (yytext[i] == '\n')
			column = 0;
		else if (yytext[i] == '\t')
			column += 8 - (column % 8);
		else
			column++;

	/*ECHO*/;
}


int check_type()
{
/*
* pseudo code --- this is what it should check
*
*	if (yytext == type_name)
*		return(TYPE_NAME);
*
*	return(IDENTIFIER);
*/

/*
*	it actually will only return IDENTIFIER
*/

	return(IDENTIFIER);
}



syntax highlighted by Code2HTML, v. 0.9.1