/* ntp_scanner.c
*
* The source code for a simple lexical analyzer.
*
* Written By: Sachin Kamboj
* University of Delaware
* Newark, DE 19711
* Copyright (c) 2006
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include "ntp_config.h"
#include "ntpsim.h"
#include "ntp_scanner.h"
#include "ntp_parser.h"
/* Define a structure to hold the FSA for the keywords.
* The structure is actually a trie
*/
struct state {
char ch; /* Input character associated with the state */
struct state *next_state; /* Next state to advance to on reading ch */
struct state *next_char; /* Pointer to next character associated with
the state */
int token; /* Token to be returned on successful parse */
int expect_string; /* A boolean flag, which when set, indicates
that the next token should be a string */
};
/* SCANNER GLOBAL VARIABLES
* ------------------------
*/
#define MAX_LEXEME 1024+1 /* The maximum size of a lexeme */
char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */
struct state *key_scanner; /* A FSA for recognizing keywords */
extern int input_from_file;
/* CONSTANTS
* ---------
*/
#define NON_ACCEPTING 0 /* A constant that depicts a non-accepting state */
#define MAX_LEXEME 1024+1 /* The maximum size of a lexeme */
#define NO_OF_SPECIAL_CHARS 8
/* SCANNER GLOBAL VARIABLES
* ------------------------
*/
char special_char[] = /* This list of special characters */
{ '{', '}', '(', ')', ',', ';','|','=' };
/* FUNCTIONS
* ---------
*/
int get_next_char(void);
/* Define a function to create the states of the scanner. This function
* is used by the create_keyword_scanner function below.
*
* This function takes a suffix of a keyword, the token to be returned on
* recognizing the complete keyword, and any pre-existing state that exists
* for some other keyword that has the same prefix as the current one.
*/
struct state *create_states(char *keyword,
int token,
int expect_string,
struct state *pre_state)
{
struct state *my_state;
struct state *return_state = pre_state;
struct state *prev_char = NULL;
struct state *curr_char = pre_state;
/* Find the correct position to insert the state.
* All states should be in alphabetical order
*/
while (curr_char && (keyword[0] < curr_char->ch)) {
prev_char = curr_char;
curr_char = curr_char->next_char;
}
/* Check if a previously seen keyword has the same prefix as the
* current keyword. If yes, simply use the state for that keyword
* as my_state
*/
if (curr_char && (keyword[0] == curr_char->ch))
my_state = curr_char;
else {
my_state = (struct state *) malloc(sizeof(struct state));
my_state->ch = keyword[0]; /* Store the first character
of the keyword */
my_state->next_state = NULL;
my_state->next_char = curr_char;
my_state->token = NON_ACCEPTING; /* Not an accepting state */
my_state->expect_string = NO_ARG;
if (prev_char)
prev_char->next_char = my_state;
else
return_state = my_state;
}
/* Check if the next character is '\0'.
* If yes, we are done with the recognition and this is an accepting
* state.
* If not, we need to continue scanning
*/
if (keyword[1] == '\0') {
my_state->token = token;
my_state->expect_string = expect_string;
}
else {
my_state->next_state = create_states(&keyword[1],
token,
expect_string,
my_state->next_state);
}
return return_state;
}
/* Define a function that takes a list of (keyword, token) values and
* creates a keywords scanner out of it.
*/
struct state *create_keyword_scanner(struct key_tok *keyword_list)
{
struct state *scanner = NULL;
while (keyword_list->keyword != NULL) {
scanner = create_states(keyword_list->keyword,
keyword_list->token,
keyword_list->expect_string,
scanner);
++keyword_list;
}
return scanner;
}
/* Define a function to delete the keyword scanner, freeing all the allocated
* memory
*/
static void delete_keyword_scanner(struct state *my_key_scanner)
{
if (my_key_scanner) {
delete_keyword_scanner(my_key_scanner->next_char);
delete_keyword_scanner(my_key_scanner->next_state);
free(my_key_scanner);
}
}
/* Define a function to print the keyword scanner built using the
* above functions.
*/
void print_keyword_scanner(struct state *my_key_scanner, int pos)
{
static char lexeme[MAX_LEXEME];
struct state *curr_state = my_key_scanner;
while (curr_state != NULL) {
lexeme[pos] = curr_state->ch;
if (curr_state->token != NON_ACCEPTING) {
lexeme[pos + 1] = '\0';
printf("%s\n",lexeme);
}
if (curr_state->next_state != NULL)
print_keyword_scanner(curr_state->next_state, pos + 1);
curr_state = curr_state->next_char;
}
}
/* FILE INTERFACE
* --------------
* We define a couple of wrapper functions around the standard C fgetc
* and ungetc functions in order to include positional bookkeeping
*/
struct FILE_INFO *F_OPEN(const char *path, const char *mode)
{
struct FILE_INFO *my_info = (struct FILE_INFO *)
malloc(sizeof(struct FILE_INFO));
if (my_info == NULL)
return NULL;
my_info->line_no = 0;
my_info->col_no = 0;
my_info->fname = path;
if ((my_info->fd = fopen(path,mode)) == NULL) {
free(my_info);
return NULL;
}
return my_info;
}
int FGETC(struct FILE_INFO *stream)
{
int ch = fgetc(stream->fd);
++stream->col_no;
if (ch == '\n') {
stream->prev_line_col_no = stream->col_no;
++stream->line_no;
stream->col_no = 1;
}
return ch;
}
/* BUGS: 1. Function will fail on more than one line of pushback
* 2. No error checking is done to see if ungetc fails
* SK: I don't think its worth fixing these bugs for our purposes ;-)
*/
int UNGETC(int ch, struct FILE_INFO *stream)
{
if (ch == '\n') {
stream->col_no = stream->prev_line_col_no;
stream->prev_line_col_no = -1;
--stream->line_no;
}
--stream->col_no;
return ungetc(ch, stream->fd);
}
int FCLOSE(struct FILE_INFO *stream)
{
int ret_val = fclose(stream->fd);
if (!ret_val)
free(stream);
return ret_val;
}
/* STREAM INTERFACE
* ----------------
* Provide a wrapper for the stream functions so that the
* stream can either read from a file or from a character
* array.
* NOTE: This is not very efficient for reading from character
* arrays, but needed to allow remote configuration where the
* configuration command is provided through ntpq
*
* The behavior of there two functions is determined by the
* input_from_file flag.
*/
int get_next_char(void)
{
if (input_from_file)
return FGETC(ip_file);
else {
if (remote_config.buffer[remote_config.pos] == '\0')
return EOF;
else
return(remote_config.buffer[remote_config.pos++]);
}
}
void push_back_char(int ch)
{
if (input_from_file)
UNGETC(ch, ip_file);
else
remote_config.pos--;
}
/* STATE MACHINES
* --------------
*/
/* Keywords */
static int is_keyword(char *lexeme, int *expect_string)
{
struct state *curr_state = key_scanner;
int token = NON_ACCEPTING;
int i;
for (i = 0;lexeme[i];++i) {
while (curr_state && (lexeme[i] != curr_state->ch))
curr_state = curr_state->next_char;
if (curr_state && (lexeme[i] == curr_state->ch)) {
*expect_string = curr_state->expect_string;
token = curr_state->token;
curr_state = curr_state->next_state;
}
else {
*expect_string = NO_ARG;
token = NON_ACCEPTING;
break;
}
}
return token;
}
/* Integer */
static int is_integer(char *lexeme)
{
int i = 0;
/* Allow a leading minus sign */
if (lexeme[i] == '-')
++i;
/* Check that all the remaining characters are digits */
for (;lexeme[i]; ++i) {
if (!isdigit(lexeme[i]))
return 0;
}
return 1;
}
/* Double */
static int is_double(char *lexeme)
{
int i;
/* Initialize the state machine
int int_part = 1;
int frac_part = 0;
*/
int no_digits = 0; /* Number of digits read */
i = 0;
/* Check for an optional '+' or '-' */
if (lexeme[i] == '+' || lexeme[i] == '-')
++i;
/* Read the integer part */
for (;lexeme[i] && isdigit(lexeme[i]); ++i)
++no_digits;
/* Check for the required decimal point */
if (lexeme[i] == '.')
++i;
else
return 0;
/* Check for any digits after the decimal point */
for (;lexeme[i] && isdigit(lexeme[i]); ++i)
++no_digits;
/* The number of digits in both the decimal part and the fraction part
must not be zero at this point */
if (no_digits == 0)
return 0;
/* Check if we are done */
if (lexeme[i] == '\0')
return 1;
/* There is still more output, read the Exponent Part */
if (lexeme[i] == 'e' || lexeme[i] == 'E') {
++i;
}
else
return 0;
/* Read an optional Sign */
if (lexeme[i] == '+' || lexeme[i] == '-')
++i;
/* Now read the exponent part */
for (;lexeme[i] && isdigit(lexeme[i]); ++i)
++no_digits;
/* Check if we are done */
if (lexeme[i] == '\0')
return 1;
else
return 0;
}
/* IPv4 Address */
static int is_ipv4_address(char *lexeme)
{
int i;
int octet_no = 1;
int digits_read = 0;
for (i = 0;lexeme[i];++i) {
if (isdigit(lexeme[i]))
++digits_read;
else if (lexeme[i] == '.') {
++octet_no;
if (digits_read < 1 || digits_read > 3)
return 0;
digits_read = 0;
}
else
return 0;
}
if (octet_no == 4 && digits_read >= 1 && digits_read <= 3)
return 1;
else
return 0;
}
/* IPv6 Address
* See RFC 3513, Section 2.2 for details
* Note: full error checking is not being done
*/
static int is_ipv6_address(char *lexeme)
{
int i;
int quad_no = 1;
int digits_read = 0;
int group_seen = 0;
for (i = 0;lexeme[i];++i) {
if (isxdigit(lexeme[i]))
++digits_read;
else if (lexeme[i] == ':') {
++quad_no;
if (digits_read == 0 && !group_seen)
group_seen = 1;
else if (digits_read > 4 ||
(digits_read == 0 && group_seen))
return 0;
digits_read = 0;
}
else if (lexeme[i] == '.') {
/* A IPv4 address has been mixed with an IPv6 Address
* Ensure that less than 6 quads have been read and that
* the IPv4 address is valid
*/
while (i >= 0 && lexeme[i-1] != ':')
--i;
return ((quad_no <= 6) && is_ipv4_address(&lexeme[i]));
}
else
return 0;
}
/* Make sure that we don't end with a single ':' */
if (lexeme[i-1] == ':' && lexeme[i-2] != ':')
return 0;
return ((quad_no <= 7 && digits_read <= 4) ? 1 : 0);
}
/* Host Name */
/* static int is_host_name (char *lexeme) */
/* { */
/* int i; */
/* for (i = 0;lexeme[i];++i) */
/* if (!isalnum(lexeme[i])) { */
/* Check for two consequtive periods which are not allowed */
/* if (lexeme[i] == '.' && lexeme[i + 1] != '.') */
/* ; */
/* else */
/* return 0; */
/* } */
/* return 1; */
/* } */
/* Define a function to test whether a character is a special character */
static int is_special(char ch)
{
int i;
for (i = 0;i < NO_OF_SPECIAL_CHARS;++i)
if (ch == special_char[i])
return 1;
return 0;
}
static int is_EOC(char ch)
{
if ((old_config_style && (ch == '\n')) ||
(!old_config_style && (ch == ';')))
return 1;
return 0;
}
/* Define a function that does the actual scanning
* Bison expects this function to be called yylex and for it to take no
* input and return an int
*/
int yylex()
{
int i, instring = 0;
int token; /* The return value/the recognized token */
int ch;
static int expect_string = NO_ARG;
do {
/* Ignore whitespace at the beginning */
while ((ch = get_next_char()) != EOF &&
isspace(ch) &&
!is_EOC(ch))
; /* Null Statement */
if (ch == EOF) {
if (input_from_file == 0)
return 0;
if (!(curr_include_level > 0))
return 0;
else {
ip_file = fp[--curr_include_level];
return T_EOC;
}
} else if (is_EOC(ch)) {
expect_string = NO_ARG; /* Reset expect_string */
return T_EOC;
}
/* Check if the next character is a special character. If yes, return
* the special character
*/
else if (is_special(ch))
return ch;
else
push_back_char(ch);
/* Read in the lexeme */
for (i = 0;(i < MAX_LEXEME) &&
(yytext[i] = get_next_char()) != EOF; ++i) {
/* Break on reading in whitespace or a special character */
if (isspace(yytext[i]) || is_special(yytext[i]) || is_EOC(ch)
|| yytext[i] == '"')
break;
/* Read the rest of the line on reading a start of comment
character */
if (yytext[i] == '#') {
while ((yytext[i] = get_next_char()) != EOF &&
yytext[i] != '\n')
; /* Null Statement */
break;
}
}
/* Pick up all of the string inside between " marks, to end of line.
* If we make it to EOL without a terminating " assume it for them.
*/
if (yytext[i] == '"') {
instring = 1;
while ((yytext[i] = get_next_char()) != EOF &&
yytext[i] != '"' && yytext[i] != '\n') {
i++;
}
if (yytext[i] == '"')
yytext[i] = ' ';
}
/* If the last character read was an EOF, pushback a newline
* character. This is to prevent a parse error when there is
* no newline at the end of a file
*/
if (yytext[i] == EOF)
push_back_char('\n');
/* Pushback the last character read that is not a part of this lexeme */
push_back_char(yytext[i]);
yytext[i] = '\0';
} while (i == 0);
#ifdef DEBUG
if (debug >= 3)
printf ("yylex: Just Read: %s\n", yytext);
#endif
/* Now return the desired token */
if ((expect_string == NO_ARG) && (!instring) &&
(token = is_keyword(yytext, &expect_string)))
return token;
else if (is_integer(yytext) && (!instring) ) {
errno = 0;
if ((yylval.Integer = strtol(yytext,(char **) NULL, 10)) == 0
&& ((errno == EINVAL) || (errno == ERANGE))) {
fprintf(stderr,"Integer cannot be represented: %s\n",
yytext);
exit(1);
}
else
return T_Integer;
}
else if (is_double(yytext) && (!instring)) {
errno = 0;
if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) {
fprintf(stderr, "Double too large to represent: %s\n",
yytext);
exit(1);
}
else
return T_Double;
}
else if (is_ipv4_address(yytext) && (!instring)) {
if (expect_string == SINGLE_ARG)
expect_string = NO_ARG;
errno = 0;
if ((yylval.String = strdup(yytext)) == NULL &&
errno == ENOMEM) {
fprintf(stderr, "Could not allocate memory for: %s\n",
yytext);
exit(1);
}
else
return T_IPv4_address;
}
else if (is_ipv6_address(yytext) && (!instring)) {
if (expect_string == SINGLE_ARG)
expect_string = NO_ARG;
errno = 0;
if ((yylval.String = strdup(yytext)) == NULL &&
errno == ENOMEM) {
fprintf(stderr, "Could not allocate memory for: %s\n",
yytext);
exit(1);
}
else
return T_IPv6_address;
}
else { /* Default: Everything is a string */
instring = 0;
if (expect_string == SINGLE_ARG)
expect_string = NO_ARG;
errno = 0;
if ((yylval.String = strdup(yytext)) == NULL &&
errno == ENOMEM) {
fprintf(stderr, "Could not allocate memory for: %s\n",
yytext);
exit(1);
}
else
return T_String;
}
return 1;
}
syntax highlighted by Code2HTML, v. 0.9.1