/* $Id: dfa.c,v 1.30 2003/06/18 21:32:44 adam Exp $
Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
Index Data Aps
This file is part of the Zebra server.
Zebra is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2, or (at your option) any later
version.
Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with Zebra; see the file LICENSE.zebra. If not, write to the
Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
*/
#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <zebrautl.h>
#include "dfap.h"
#include "imalloc.h"
#define DFA_OPEN_RANGE 1
#define CAT 16000
#define OR 16001
#define STAR 16002
#define PLUS 16003
#define EPSILON 16004
struct Tnode {
union {
struct Tnode *p[2]; /* CAT,OR,STAR,PLUS (left,right) */
short ch[2]; /* if ch[0] >= 0 then this Tnode represents */
/* a character in range [ch[0]..ch[1]] */
/* otherwise ch[0] represents */
/* accepting no (-acceptno) */
} u;
unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
unsigned nullable : 1;
Set firstpos; /* first positions */
Set lastpos; /* last positions */
};
struct Tblock { /* Tnode bucket (block) */
struct Tblock *next; /* pointer to next bucket */
struct Tnode *tarray; /* array of nodes in bucket */
};
#define TADD 64
#define STATE_HASH 199
#define POSET_CHUNK 100
int debug_dfa_trav = 0;
int debug_dfa_tran = 0;
int debug_dfa_followpos = 0;
int dfa_verbose = 0;
static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
BSet charset);
static void term_Tnode (struct DFA_parse *parse_info);
static void
del_followpos (struct DFA_parse *parse_info),
init_pos (struct DFA_parse *parse_info),
del_pos (struct DFA_parse *parse_info),
mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
add_follow (struct DFA_parse *parse_info, Set lastpos, Set firstpos),
dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
init_followpos (struct DFA_parse *parse_info),
pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
pr_followpos (struct DFA_parse *parse_info),
out_char (int c),
lex (struct DFA_parse *parse_info);
static int
nextchar (struct DFA_parse *parse_info, int *esc),
read_charset (struct DFA_parse *parse_info);
static const char
*str_char (unsigned c);
#define L_LP 1
#define L_RP 2
#define L_CHAR 3
#define L_CHARS 4
#define L_ANY 5
#define L_ALT 6
#define L_ANYZ 7
#define L_WILD 8
#define L_QUEST 9
#define L_CLOS1 10
#define L_CLOS0 11
#define L_END 12
#define L_START 13
static struct Tnode *expr_1 (struct DFA_parse *parse_info),
*expr_2 (struct DFA_parse *parse_info),
*expr_3 (struct DFA_parse *parse_info),
*expr_4 (struct DFA_parse *parse_info);
static struct Tnode *expr_1 (struct DFA_parse *parse_info)
{
struct Tnode *t1, *t2, *tn;
if (!(t1 = expr_2 (parse_info)))
return t1;
while (parse_info->lookahead == L_ALT)
{
lex (parse_info);
if (!(t2 = expr_2 (parse_info)))
return t2;
tn = mk_Tnode (parse_info);
tn->pos = OR;
tn->u.p[0] = t1;
tn->u.p[1] = t2;
t1 = tn;
}
return t1;
}
static struct Tnode *expr_2 (struct DFA_parse *parse_info)
{
struct Tnode *t1, *t2, *tn;
if (!(t1 = expr_3 (parse_info)))
return t1;
while (parse_info->lookahead == L_WILD ||
parse_info->lookahead == L_ANYZ ||
parse_info->lookahead == L_ANY ||
parse_info->lookahead == L_LP ||
parse_info->lookahead == L_CHAR ||
parse_info->lookahead == L_CHARS)
{
if (!(t2 = expr_3 (parse_info)))
return t2;
tn = mk_Tnode (parse_info);
tn->pos = CAT;
tn->u.p[0] = t1;
tn->u.p[1] = t2;
t1 = tn;
}
return t1;
}
static struct Tnode *expr_3 (struct DFA_parse *parse_info)
{
struct Tnode *t1, *tn;
if (!(t1 = expr_4 (parse_info)))
return t1;
if (parse_info->lookahead == L_CLOS0)
{
lex (parse_info);
tn = mk_Tnode (parse_info);
tn->pos = STAR;
tn->u.p[0] = t1;
t1 = tn;
}
else if (parse_info->lookahead == L_CLOS1)
{
lex (parse_info);
tn = mk_Tnode (parse_info);
tn->pos = PLUS;
tn->u.p[0] = t1;
t1 = tn;
}
else if (parse_info->lookahead == L_QUEST)
{
lex (parse_info);
tn = mk_Tnode(parse_info);
tn->pos = OR;
tn->u.p[0] = t1;
tn->u.p[1] = mk_Tnode(parse_info);
tn->u.p[1]->pos = EPSILON;
t1 = tn;
}
return t1;
}
static struct Tnode *expr_4 (struct DFA_parse *parse_info)
{
struct Tnode *t1;
switch (parse_info->lookahead)
{
case L_LP:
lex (parse_info);
if (!(t1 = expr_1 (parse_info)))
return t1;
if (parse_info->lookahead == L_RP)
lex (parse_info);
else
return NULL;
break;
case L_CHAR:
t1 = mk_Tnode(parse_info);
t1->pos = ++parse_info->position;
t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
lex (parse_info);
break;
case L_CHARS:
t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
lex (parse_info);
break;
case L_ANY:
t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
lex (parse_info);
break;
case L_ANYZ:
t1 = mk_Tnode(parse_info);
t1->pos = OR;
t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
t1->u.p[1] = mk_Tnode(parse_info);
t1->u.p[1]->pos = EPSILON;
lex (parse_info);
break;
case L_WILD:
t1 = mk_Tnode(parse_info);
t1->pos = STAR;
t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
lex (parse_info);
default:
t1 = NULL;
}
return t1;
}
static void do_parse (struct DFA_parse *parse_info, const char **s,
struct Tnode **tnp)
{
int start_anchor_flag = 0;
struct Tnode *t1, *t2, *tn;
parse_info->err_code = 0;
parse_info->expr_ptr = * (const unsigned char **) s;
parse_info->inside_string = 0;
lex (parse_info);
if (parse_info->lookahead == L_START)
{
start_anchor_flag = 1;
lex (parse_info);
}
if (parse_info->lookahead == L_END)
{
t1 = mk_Tnode (parse_info);
t1->pos = ++parse_info->position;
t1->u.ch[1] = t1->u.ch[0] = '\n';
lex (parse_info);
}
else
{
t1 = expr_1 (parse_info);
if (t1 && parse_info->lookahead == L_END)
{
t2 = mk_Tnode (parse_info);
t2->pos = ++parse_info->position;
t2->u.ch[1] = t2->u.ch[0] = '\n';
tn = mk_Tnode (parse_info);
tn->pos = CAT;
tn->u.p[0] = t1;
tn->u.p[1] = t2;
t1 = tn;
lex (parse_info);
}
}
if (t1 && parse_info->lookahead == 0)
{
t2 = mk_Tnode(parse_info);
t2->pos = ++parse_info->position;
t2->u.ch[0] = -(++parse_info->rule);
t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
*tnp = mk_Tnode(parse_info);
(*tnp)->pos = CAT;
(*tnp)->u.p[0] = t1;
(*tnp)->u.p[1] = t2;
}
else
{
if (!parse_info->err_code)
{
if (parse_info->lookahead == L_RP)
parse_info->err_code = DFA_ERR_RP;
else if (parse_info->lookahead == L_LP)
parse_info->err_code = DFA_ERR_LP;
else
parse_info->err_code = DFA_ERR_SYNTAX;
}
}
*s = (const char *) parse_info->expr_ptr;
}
static int nextchar (struct DFA_parse *parse_info, int *esc)
{
*esc = 0;
if (*parse_info->expr_ptr == '\0')
return 0;
else if (*parse_info->expr_ptr != '\\')
return *parse_info->expr_ptr++;
*esc = 1;
switch (*++parse_info->expr_ptr)
{
case '\r':
case '\n':
case '\0':
return '\\';
case '\t':
++parse_info->expr_ptr;
return ' ';
case 'n':
++parse_info->expr_ptr;
return '\n';
case 't':
++parse_info->expr_ptr;
return '\t';
case 'r':
++parse_info->expr_ptr;
return '\r';
case 'f':
++parse_info->expr_ptr;
return '\f';
default:
return *parse_info->expr_ptr++;
}
}
static int nextchar_set (struct DFA_parse *parse_info, int *esc)
{
if (*parse_info->expr_ptr == ' ')
{
*esc = 0;
return *parse_info->expr_ptr++;
}
return nextchar (parse_info, esc);
}
static int read_charset (struct DFA_parse *parse_info)
{
int i, ch0, ch1, esc0, esc1, cc = 0;
parse_info->look_chars = mk_BSet (&parse_info->charset);
res_BSet (parse_info->charset, parse_info->look_chars);
ch0 = nextchar_set (parse_info, &esc0);
if (!esc0 && ch0 == '^')
{
cc = 1;
ch0 = nextchar_set (parse_info, &esc0);
}
while (ch0 != 0)
{
if (!esc0 && ch0 == ']')
break;
if (!esc0 && ch0 == '-')
{
ch1 = ch0;
esc1 = esc0;
ch0 = 1;
add_BSet (parse_info->charset, parse_info->look_chars, ch0);
}
else
{
if (parse_info->cmap)
{
const char **mapto;
char mapfrom[2];
const char *mcp = mapfrom;
mapfrom[0] = ch0;
mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
assert (mapto);
ch0 = mapto[0][0];
}
add_BSet (parse_info->charset, parse_info->look_chars, ch0);
ch1 = nextchar_set (parse_info, &esc1);
}
if (!esc1 && ch1 == '-')
{
int open_range = 0;
if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
break;
#if DFA_OPEN_RANGE
if (!esc1 && ch1 == ']')
{
ch1 = 255;
open_range = 1;
}
#else
if (!esc1 && ch1 == ']')
{
add_BSet (parse_info->charset, parse_info->look_chars, '-');
break;
}
#endif
if (!open_range && parse_info->cmap)
{
const char **mapto;
char mapfrom[2];
const char *mcp = mapfrom;
mapfrom[0] = ch1;
mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
assert (mapto);
ch1 = mapto[0][0];
}
for (i=ch0; ++i<=ch1;)
add_BSet (parse_info->charset, parse_info->look_chars, i);
if (!open_range)
ch0 = nextchar_set (parse_info, &esc0);
else
break;
}
else
{
esc0 = esc1;
ch0 = ch1;
}
}
if (cc)
com_BSet (parse_info->charset, parse_info->look_chars);
return L_CHARS;
}
static int map_l_char (struct DFA_parse *parse_info)
{
const char **mapto;
const char *cp0 = (const char *) (parse_info->expr_ptr-1);
int i = 0, len = strlen(cp0);
if (cp0[0] == 1 && cp0[1])
{
parse_info->expr_ptr++;
parse_info->look_ch = ((unsigned char *) cp0)[1];
return L_CHAR;
}
if (!parse_info->cmap)
return L_CHAR;
mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
assert (mapto);
parse_info->expr_ptr = (const unsigned char *) cp0;
parse_info->look_ch = ((unsigned char **) mapto)[i][0];
logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
return L_CHAR;
}
static int lex_sub(struct DFA_parse *parse_info)
{
int esc;
while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
if (parse_info->look_ch == '\"')
{
if (esc)
return map_l_char (parse_info);
parse_info->inside_string = !parse_info->inside_string;
}
else if (esc || parse_info->inside_string)
return map_l_char (parse_info);
else if (parse_info->look_ch == '[')
return read_charset(parse_info);
else
{
const int *cc;
for (cc = parse_info->charMap; *cc; cc += 2)
if (*cc == (int) (parse_info->look_ch))
{
if (!cc[1])
--parse_info->expr_ptr;
return cc[1];
}
return map_l_char (parse_info);
}
return 0;
}
static void lex (struct DFA_parse *parse_info)
{
parse_info->lookahead = lex_sub (parse_info);
}
static const char *str_char (unsigned c)
{
static char s[6];
s[0] = '\\';
if (c < 32 || c >= 127)
switch (c)
{
case '\r':
strcpy (s+1, "r");
break;
case '\n':
strcpy (s+1, "n");
break;
case '\t':
strcpy (s+1, "t");
break;
default:
sprintf (s+1, "x%02x", c);
break;
}
else
switch (c)
{
case '\"':
strcpy (s+1, "\"");
break;
case '\'':
strcpy (s+1, "\'");
break;
case '\\':
strcpy (s+1, "\\");
break;
default:
s[0] = c;
s[1] = '\0';
}
return s;
}
static void out_char (int c)
{
printf ("%s", str_char (c));
}
static void term_Tnode (struct DFA_parse *parse_info)
{
struct Tblock *t, *t1;
for (t = parse_info->start; (t1 = t);)
{
t=t->next;
ifree (t1->tarray);
ifree (t1);
}
}
static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
{
struct Tblock *tnew;
if (parse_info->use_Tnode == parse_info->max_Tnode)
{
tnew = (struct Tblock *) imalloc (sizeof(struct Tblock));
tnew->tarray = (struct Tnode *) imalloc (TADD*sizeof(struct Tnode));
if (!tnew->tarray)
return NULL;
if (parse_info->use_Tnode == 0)
parse_info->start = tnew;
else
parse_info->end->next = tnew;
parse_info->end = tnew;
tnew->next = NULL;
parse_info->max_Tnode += TADD;
}
return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
}
struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
{
struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
if (ch0 == -1)
tn0->pos = EPSILON;
else
{
tn0->u.ch[0] = ch0;
tn0->pos = ++parse_info->position;
ch1 = travi_BSet (parse_info->charset, charset, ch0+1) ;
if (ch1 == -1)
tn0->u.ch[1] = parse_info->charset->size;
else
{
tn0->u.ch[1] = ch1-1;
while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
{
tn1 = mk_Tnode(parse_info);
tn1->pos = OR;
tn1->u.p[0] = tn0;
tn0 = tn1;
tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
tn1->u.ch[0] = ch0;
tn1->pos = ++(parse_info->position);
if ((ch1 = travi_BSet (parse_info->charset, charset,
ch0+1)) == -1)
{
tn1->u.ch[1] = parse_info->charset->size;
break;
}
tn1->u.ch[1] = ch1-1;
}
}
}
return tn0;
}
static void del_followpos (struct DFA_parse *parse_info)
{
ifree (parse_info->followpos);
}
static void init_pos (struct DFA_parse *parse_info)
{
parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
* (1+parse_info->position));
}
static void del_pos (struct DFA_parse *parse_info)
{
ifree (parse_info->posar);
}
static void add_follow (struct DFA_parse *parse_info,
Set lastpos, Set firstpos)
{
while (lastpos)
{
parse_info->followpos[lastpos->value] =
union_Set (parse_info->poset,
parse_info->followpos[lastpos->value], firstpos);
lastpos = lastpos->next;
}
}
static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
{
struct Tnode **posar = parse_info->posar;
SetType poset = parse_info->poset;
switch (n->pos)
{
case CAT:
dfa_trav (parse_info, n->u.p[0]);
dfa_trav (parse_info, n->u.p[1]);
n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
n->firstpos = mk_Set (poset);
n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
if (n->u.p[0]->nullable)
n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
n->lastpos = mk_Set (poset);
n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
if (n->u.p[1]->nullable)
n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
if (debug_dfa_trav)
printf ("CAT");
break;
case OR:
dfa_trav (parse_info, n->u.p[0]);
dfa_trav (parse_info, n->u.p[1]);
n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
n->u.p[1]->firstpos);
n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
n->u.p[1]->lastpos);
n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
if (debug_dfa_trav)
printf ("OR");
break;
case PLUS:
dfa_trav (parse_info, n->u.p[0]);
n->nullable = n->u.p[0]->nullable;
n->firstpos = n->u.p[0]->firstpos;
n->lastpos = n->u.p[0]->lastpos;
add_follow (parse_info, n->lastpos, n->firstpos);
if (debug_dfa_trav)
printf ("PLUS");
break;
case STAR:
dfa_trav (parse_info, n->u.p[0]);
n->nullable = 1;
n->firstpos = n->u.p[0]->firstpos;
n->lastpos = n->u.p[0]->lastpos;
add_follow (parse_info, n->lastpos, n->firstpos);
if (debug_dfa_trav)
printf ("STAR");
break;
case EPSILON:
n->nullable = 1;
n->lastpos = mk_Set (poset);
n->firstpos = mk_Set (poset);
if (debug_dfa_trav)
printf ("EPSILON");
break;
default:
posar[n->pos] = n;
n->nullable = 0;
n->firstpos = mk_Set (poset);
n->firstpos = add_Set (poset, n->firstpos, n->pos);
n->lastpos = mk_Set (poset);
n->lastpos = add_Set (poset, n->lastpos, n->pos);
if (debug_dfa_trav)
{
if (n->u.ch[0] < 0)
printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
else if (n->u.ch[1] > n->u.ch[0])
{
putchar ('[');
out_char (n->u.ch[0]);
if (n->u.ch[1] > n->u.ch[0]+1)
putchar ('-');
out_char (n->u.ch[1]);
putchar (']');
}
else
out_char (n->u.ch[0]);
}
}
if (debug_dfa_trav)
{
printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
printf (" firstpos :");
pr_Set (poset, n->firstpos);
printf (" lastpos :");
pr_Set (poset, n->lastpos);
}
}
static void init_followpos (struct DFA_parse *parse_info)
{
Set *fa;
int i;
parse_info->followpos = fa =
(Set *) imalloc (sizeof(Set) * (1+parse_info->position));
for (i = parse_info->position+1; --i >= 0; fa++)
*fa = mk_Set (parse_info->poset);
}
static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
{
int i, j, c;
int max_char;
short *pos, *pos_i;
Set tran_set;
int char_0, char_1;
struct DFA_state *dfa_from, *dfa_to;
struct Tnode **posar;
SetType poset;
Set *followpos;
assert (parse_info);
posar = parse_info->posar;
max_char = parse_info->charset->size;
pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
tran_set = cp_Set (parse_info->poset, parse_info->root->firstpos);
i = add_DFA_state (dfas, &tran_set, &dfa_from);
assert (i);
dfa_from->rule_no = 0;
poset = parse_info->poset;
followpos = parse_info->followpos;
while ((dfa_from = get_DFA_state (dfas)))
{
pos_i = pos;
j = i = 0;
for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next)
if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char)
*pos_i++ = tran_set->value;
else if (c < 0)
{
if (i == 0 || c > i)
i = c;
c = posar[tran_set->value]->u.ch[1];
if (j == 0 || c > j)
j = c;
}
*pos_i = -1;
dfa_from->rule_no = -i;
dfa_from->rule_nno = -j;
for (char_1 = 0; char_1 <= max_char; char_1++)
{
char_0 = max_char+1;
for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
if (posar[i]->u.ch[1] >= char_1
&& (c=posar[i]->u.ch[0]) < char_0)
{
if (c < char_1)
char_0 = char_1;
else
char_0 = c;
}
if (char_0 > max_char)
break;
char_1 = max_char;
tran_set = mk_Set (poset);
for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
{
if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
char_1 = c - 1; /* forward chunk */
else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
char_1 = c; /* backward chunk */
if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
tran_set = union_Set (poset, tran_set, followpos[i]);
}
if (tran_set)
{
add_DFA_state (dfas, &tran_set, &dfa_to);
add_DFA_tran (dfas, dfa_from, char_0, char_1, dfa_to->no);
}
}
}
ifree (pos);
sort_DFA_states (dfas);
}
static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
{
struct DFA_state *s;
struct DFA_tran *tran;
int prev_no, i, c, no;
for (no=0; no < dfas->no; ++no)
{
s = dfas->sortarray[no];
assert (s->no == no);
printf ("DFA state %d", no);
if (s->rule_no)
printf ("#%d", s->rule_no);
if (s->rule_nno)
printf (" #%d", s->rule_nno);
putchar (':');
pr_Set (parse_info->poset, s->set);
prev_no = -1;
for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
{
if (prev_no != tran->to)
{
if (prev_no != -1)
printf ("]\n");
printf (" goto %d on [", tran->to);
prev_no = tran->to;
}
for (c = tran->ch[0]; c <= tran->ch[1]; c++)
printf ("%s", str_char(c));
}
if (prev_no != -1)
printf ("]\n");
putchar ('\n');
}
}
static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
{
long i, j;
int k;
printf ("%d/%d tree nodes used, %d bytes each\n",
parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode));
k = inf_BSetHandle (parse_info->charset, &i, &j);
printf ("%ld/%ld character sets, %d bytes each\n",
i/k, j/k, k*sizeof(BSetWord));
k = inf_SetType (parse_info->poset, &i, &j);
printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
printf ("%d DFA states\n", dfas->no);
}
static void pr_followpos (struct DFA_parse *parse_info)
{
struct Tnode **posar = parse_info->posar;
int i;
printf ("\nfollowsets:\n");
for (i=1; i <= parse_info->position; i++)
{
printf ("%3d:", i);
pr_Set (parse_info->poset, parse_info->followpos[i]);
putchar ('\t');
if (posar[i]->u.ch[0] < 0)
printf ("#%d", -posar[i]->u.ch[0]);
else if (posar[i]->u.ch[1] > posar[i]->u.ch[0])
{
putchar ('[');
out_char (posar[i]->u.ch[0]);
if (posar[i]->u.ch[1] > posar[i]->u.ch[0]+1)
putchar ('-');
out_char (posar[i]->u.ch[1]);
putchar (']');
}
else
out_char (posar[i]->u.ch[0]);
putchar ('\n');
}
putchar ('\n');
}
void dfa_parse_cmap_clean (struct DFA *d)
{
struct DFA_parse *dfa = d->parse_info;
assert (dfa);
if (!dfa->charMap)
{
dfa->charMapSize = 7;
dfa->charMap = (int *)
imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
}
dfa->charMap[0] = 0;
}
void dfa_parse_cmap_new (struct DFA *d, const int *cmap)
{
struct DFA_parse *dfa = d->parse_info;
const int *cp;
int size;
assert (dfa);
for (cp = cmap; *cp; cp += 2)
;
size = cp - cmap + 1;
if (size > dfa->charMapSize)
{
if (dfa->charMap)
ifree (dfa->charMap);
dfa->charMapSize = size;
dfa->charMap = (int *) imalloc (size * sizeof(*dfa->charMap));
}
memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
}
void dfa_parse_cmap_del (struct DFA *d, int from)
{
struct DFA_parse *dfa = d->parse_info;
int *cc;
assert (dfa);
for (cc = dfa->charMap; *cc; cc += 2)
if (*cc == from)
{
while ((cc[0] = cc[2]))
{
cc[1] = cc[3];
cc += 2;
}
break;
}
}
void dfa_parse_cmap_add (struct DFA *d, int from, int to)
{
struct DFA_parse *dfa = d->parse_info;
int *cc;
int indx, size;
assert (dfa);
for (cc = dfa->charMap; *cc; cc += 2)
if (*cc == from)
{
cc[1] = to;
return ;
}
indx = cc - dfa->charMap;
size = dfa->charMapSize;
if (indx >= size)
{
int *cn = (int *) imalloc ((size+16) * sizeof(*dfa->charMap));
memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
ifree (dfa->charMap);
dfa->charMap = cn;
dfa->charMapSize = size+16;
}
dfa->charMap[indx] = from;
dfa->charMap[indx+1] = to;
dfa->charMap[indx+2] = 0;
}
void dfa_parse_cmap_thompson (struct DFA *d)
{
static int thompson_chars[] =
{
'*', L_CLOS0,
'+', L_CLOS1,
'|', L_ALT,
'^', L_START,
'$', L_END,
'?', L_QUEST,
'.', L_ANY,
'(', L_LP,
')', L_RP,
' ', 0,
'\t', 0,
'\n', 0,
0
};
dfa_parse_cmap_new (d, thompson_chars);
}
static struct DFA_parse *dfa_parse_init (void)
{
struct DFA_parse *parse_info =
(struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
parse_info->charset = mk_BSetHandle (255, 20);
parse_info->position = 0;
parse_info->rule = 0;
parse_info->root = NULL;
parse_info->anyset = mk_BSet (&parse_info->charset);
res_BSet (parse_info->charset, parse_info->anyset);
com_BSet (parse_info->charset, parse_info->anyset);
parse_info->use_Tnode = parse_info->max_Tnode = 0;
parse_info->start = parse_info->end = NULL;
parse_info->charMap = NULL;
parse_info->charMapSize = 0;
parse_info->cmap = NULL;
return parse_info;
}
static void rm_dfa_parse (struct DFA_parse **dfap)
{
struct DFA_parse *parse_info = *dfap;
assert (parse_info);
term_Tnode(parse_info);
rm_BSetHandle (&parse_info->charset);
ifree (parse_info->charMap);
ifree (parse_info);
*dfap = NULL;
}
static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
{
struct DFA_states *dfas;
struct DFA_parse *parse_info = dfap;
assert (poset_chunk > 10);
assert (dfap);
parse_info->poset = mk_SetType (poset_chunk);
init_pos(parse_info);
init_followpos(parse_info);
assert (parse_info->root);
dfa_trav (parse_info, parse_info->root);
if (debug_dfa_followpos)
pr_followpos(parse_info);
init_DFA_states (&dfas, parse_info->poset, (int) (STATE_HASH));
mk_dfa_tran (parse_info, dfas);
if (debug_dfa_tran)
{
pr_tran (parse_info, dfas);
}
if (dfa_verbose)
pr_verbose (parse_info, dfas);
del_pos(parse_info);
del_followpos(parse_info);
rm_SetType (parse_info->poset);
return dfas;
}
struct DFA *dfa_init (void)
{
struct DFA *dfa;
dfa = (struct DFA *) imalloc (sizeof(*dfa));
dfa->parse_info = dfa_parse_init ();
dfa->state_info = NULL;
dfa->states = NULL;
dfa_parse_cmap_thompson (dfa);
return dfa;
}
void dfa_set_cmap (struct DFA *dfa, void *vp,
const char **(*cmap)(void *vp, const char **from, int len))
{
dfa->parse_info->cmap = cmap;
dfa->parse_info->cmap_data = vp;
}
int dfa_parse (struct DFA *dfa, const char **pattern)
{
struct Tnode *top;
struct DFA_parse *parse_info;
assert (dfa);
assert (dfa->parse_info);
parse_info = dfa->parse_info;
if (!parse_info->cmap)
{
res_BSet (parse_info->charset, parse_info->anyset);
add_BSet (parse_info->charset, parse_info->anyset, '\n');
com_BSet (parse_info->charset, parse_info->anyset);
}
do_parse (parse_info, pattern, &top);
if (parse_info->err_code)
return parse_info->err_code;
if ( !dfa->parse_info->root )
dfa->parse_info->root = top;
else
{
struct Tnode *n;
n = mk_Tnode (parse_info);
n->pos = OR;
n->u.p[0] = dfa->parse_info->root;
n->u.p[1] = top;
dfa->parse_info->root = n;
}
return 0;
}
void dfa_mkstate (struct DFA *dfa)
{
assert (dfa);
dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK);
dfa->no_states = dfa->state_info->no;
dfa->states = dfa->state_info->sortarray;
rm_dfa_parse (&dfa->parse_info);
}
void dfa_delete (struct DFA **dfap)
{
assert (dfap);
assert (*dfap);
if ((*dfap)->parse_info)
rm_dfa_parse (&(*dfap)->parse_info);
if ((*dfap)->state_info)
rm_DFA_states (&(*dfap)->state_info);
ifree (*dfap);
*dfap = NULL;
}
syntax highlighted by Code2HTML, v. 0.9.1