#!/usr/bin/env python # -*- coding: latin-1; -*- # # PgWorksheet - PostgreSQL Front End # http://pgworksheet.projects.postgresql.org/ # # Copyright © 2004-2005 Henri Michelon & CML http://www.e-cml.org/ # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details (read LICENSE.txt). # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # $Id: Lexical.py,v 1.6 2005/10/25 17:31:24 hmichelon Exp $ # # http://www.postgresql.org/docs/8.0/static/sql-syntax.html # basic characters sets SPACES = [ ' ', '\t', '\n' ] DIGITS = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ] NUMERIC = DIGITS + [ 'e', '.', '+', '-' ] OPERATOR_CHARS = [ '+', '-', '*', '/', '<', '>', '=', '~', '!', \ '@', '#', '%', '^', '&', '|', '`', '?' ] SPECIAL_CHARS = [ '(', ')', '[', ']', ',', ';', ':', '*', '.' ] OPERATORS = OPERATOR_CHARS + SPECIAL_CHARS # not the first character of an identifier NOT_IDENT_START = SPECIAL_CHARS + OPERATOR_CHARS + DIGITS + [ '$' ] # not a character of an identifier NOT_IDENT_CHAR = SPECIAL_CHARS + OPERATOR_CHARS + SPACES + [ "'" ] # not a character of a dollar quoted string NOT_DOLLAR_QUOTED = [ '$' ] + SPACES class Token: def __init__(self, token, start_iter, end_iter, value=None): self.token = token self.start_iter = start_iter self.end_iter = end_iter self.value = value class Eob: """End of Buffer Exception""" class Lexical: """Simplified lexical analyser""" def analyse(self, buffer, start, end): """Run the lexical and syntaxical analysers then apply the syntax highlight to the buffer""" self.buffer = buffer self.current = start.copy() self.tokens = []; try: self.lexical_analyser(end.copy()) except Eob: pass return self.tokens def next_char(self): """Returns the next character to analyse""" if (self.current.is_end()): raise Eob() c = self.current.get_char() self.current.forward_char() return c def skip_spaces(self, c): """Skips everything that looks like a space/tab/etc...""" while (c in SPACES): c = self.next_char() return c def string(self): """Single quoted strings""" start = self.current.copy() start.backward_char() prev = None c = self.next_char() try: while (True): if ((c == "'") and (prev != '\\')): # a single quote in the string... c = self.next_char() if (c != "'"): break prev = c c = self.next_char() except Eob: end = self.current.copy() self.tokens.append(Token('string', start, end)) raise end = self.current.copy() end.backward_char() self.tokens.append(Token('string', start, end)) return c def dollar_string(self): """Dollar-quoted strings""" # first bound start = self.current.copy() start.backward_char() c = self.next_char() string_tag = '' try: while (c not in NOT_DOLLAR_QUOTED): string_tag = string_tag + c c = self.next_char() except Eob: end = self.current.copy() self.tokens.append(Token('identifier', start, end, string_tag.upper())) raise end = self.current.copy() end.backward_char() if (c != '$'): self.tokens.append(Token('identifier', start, end, string_tag.upper())) return c self.tokens.append(Token('dollarquote', start, end, string_tag.upper())) # string content start = self.current.copy() try: c = self.next_char() except Eob: end = self.current.copy() self.tokens.append(Token('identifier', start, end, string_tag.upper())) raise try: while (True): if (c == '$'): string_end = self.current.copy() c = self.next_char() s = '' while (c not in NOT_DOLLAR_QUOTED): s = s + c c = self.next_char() if (s == string_tag): string_end.backward_char() self.tokens.append(Token('string', start, string_end)) end = self.current.copy() end.backward_char() self.tokens.append(Token('dollarquote', start, end, s.upper())) return c else: c = self.next_char() except Eob: end = self.current.copy() self.tokens.append(Token('string', start, end)) raise end = self.current.copy() end.backward_char() self.tokens.append(Token('string', start, end)) return c def bit_string_constant(self, start): """Binary and Hexadecimal numeric constants using strings""" c = self.next_char() if (c == "'"): c = self.next_char() start = self.current.copy() start.backward_char() start.backward_char() start.backward_char() while (c != "'"): c = self.next_char() end = self.current.copy() self.tokens.append(Token('numeric_constant', start, end)) return self.next_char() else: return self.identifier(c, start) def identifier(self, c, ident = ''): """An identifier, keyword, type name, etc...""" start = self.current.copy() for i in range(0, len(ident) + 1): start.backward_char() try: while (c not in NOT_IDENT_CHAR): ident = ident + c c = self.next_char() except Eob: end = self.current.copy() self.tokens.append(Token('identifier', start, end, ident.upper())) raise end = self.current.copy() end.backward_char() self.tokens.append(Token('identifier', start, end, ident.upper())) return c def numeric(self, c): """A numeric constant""" start = self.current.copy() start.backward_char() try: while (c in NUMERIC): c = self.next_char() except Eob: end = self.current.copy() self.tokens.append(Token('numeric_constant', start, end)) raise end = self.current.copy() end.backward_char() self.tokens.append(Token('numeric_constant', start, end)) return c def simple_comment(self): """One line comment using --""" start = self.current.copy() start.backward_char() start.backward_char() c = self.next_char() try: while (c != '\n'): c = self.next_char() except Eob: end = self.current.copy() self.tokens.append(Token('comment', start, end)) raise end = self.current.copy() self.tokens.append(Token('comment', start, end)) def comment(self): """Multi lines comments using /* */""" start = self.current.copy() start.backward_char() start.backward_char() c = self.next_char() prev = None nested = 0 try: while (True): if (c == '*'): c = self.next_char() if (prev == '/'): nested = nested + 1 continue if (c == '/'): if (nested == 0): c = self.next_char() break else: nested = nested - 1 else: prev = c continue prev = c c = self.next_char() except Eob: end = self.current.copy() self.tokens.append(Token('comment', start, end)) raise end = self.current.copy() end.backward_char() self.tokens.append(Token('comment', start, end)) return c def psql(self): """A PgSQL Command""" start = self.current.copy() start.backward_char() c = self.next_char() cmd = '\\' try: while (c != '\n') and (c != ';'): cmd = cmd + c c = self.next_char() except: end = self.current.copy() self.tokens.append(Token('psql', start, end, cmd)) raise end = self.current.copy() self.tokens.append(Token('psql', start, end, cmd)) def lexical_analyser(self, fin): """A simplified lexical analyser""" c = self.next_char() while (self.current.compare(fin) <= 0): c = self.skip_spaces(c) # Multi lines comments if (c == '/'): c = self.next_char() if (c == '*'): c = self.comment() continue else: self.current.backward_char() # One line comments elif (c == '-'): c = self.next_char() if (c == '-'): self.simple_comment() else: self.current.backward_char() # psql commands elif (c == '\\'): self.psql() # numeric elif (c in DIGITS): c = self.numeric(c) continue # bit strings elif (c == 'B') or (c == 'b') or (c == 'H') or (c == 'h'): c = self.bit_string_constant(c) continue # strings elif (c == "'"): c = self.string() continue # dollar-quoted strings elif (c == '$'): c = self.dollar_string() continue # numeric elif (c == '.'): c = self.next_char() if (c in DIGITS): self.current.backward_char() c = self.numeric(self.current.get_char()) continue # quoted identifiers elif (c == '"'): c = self.next_char() while (c != '"'): c = self.next_char() # operators elif (c in OPERATORS): start = self.current.copy() start.backward_char() end = self.current.copy() self.tokens.append(Token('operator', start, end, c)) # everything else elif (c not in NOT_IDENT_START): c = self.identifier(c) continue c = self.next_char()