#! /usr/bin/env python # encoding: utf-8 # Thomas Nagy, 2006 (ita) """Waf preprocessor for finding dependencies because of the includes system, it is necessary to do the preprocessing in at least two steps: - filter the comments and output the preprocessing lines - interpret the preprocessing lines, jumping on the headers during the process """ import sys, os, string import Params from Params import debug, error, warning strict_quotes = 0 "Keep <> for system includes (do not search for those includes)" parse_cache = {} alpha = string.letters + '_' + string.digits accepted = 'a' ignored = 'i' undefined = 'u' skipped = 's' num = 'number' op = 'operator' ident = 'ident' stri = 'string' chr = 'char' trigs = { '=' : '#', '-' : '~', '/' : '\\', '!' : '|', '\'': '^', '(' : '[', ')' : ']', '<' : '{', '>' : '}', } punctuators_table = [ {'!': 43, '#': 45, '%': 22, '&': 30, ')': 50, '(': 49, '+': 11, '*': 18, '-': 14, ',': 56, '/': 20, '.': 38, ';': 55, ':': 41, '=': 28, '<': 1, '?': 54, '>': 7, '[': 47, ']': 48, '^': 36, '{': 51, '}': 52, '|': 33, '~': 53}, {'=': 6, ':': 5, '%': 4, '<': 2, '$$': '<'}, {'$$': '<<', '=': 3}, {'$$': '<<='}, {'$$': '<%'}, {'$$': '<:'}, {'$$': '<='}, {'$$': '>', '=': 10, '>': 8}, {'$$': '>>', '=': 9}, {'$$': '>>='}, {'$$': '>='}, {'$$': '+', '+': 12, '=': 13}, {'$$': '++'}, {'$$': '+='}, {'=': 17, '-': 15, '$$': '-', '>': 16}, {'$$': '--'}, {'$$': '->'}, {'$$': '-='}, {'$$': '*', '=': 19}, {'$$': '*='}, {'$$': '/', '=': 21}, {'$$': '/='}, {'$$': '%', ':': 23, '=': 26, '>': 27}, {'$$': '%:', '%': 24}, {':': 25}, {'$$': '%:%:'}, {'$$': '%='}, {'$$': '%>'}, {'$$': '=', '=': 29}, {'$$': '=='}, {'$$': '&', '=': 32, '&': 31}, {'$$': '&&'}, {'$$': '&='}, {'$$': '|', '=': 35, '|': 34}, {'$$': '||'}, {'$$': '|='}, {'$$': '^', '=': 37}, {'$$': '^='}, {'$$': '.', '.': 39}, {'.': 40}, {'$$': '...'}, {'$$': ':', '>': 42}, {'$$': ':>'}, {'$$': '!', '=': 44}, {'$$': '!='}, {'#': 46, '$$': '#'}, {'$$': '##'}, {'$$': '['}, {'$$': ']'}, {'$$': '('}, {'$$': ')'}, {'$$': '{'}, {'$$': '}'}, {'$$': '~'}, {'$$': '?'}, {'$$': ';'}, {'$$': ','} ] preproc_table = [ {'e': 16, 'd': 26, 'i': 1, 'p': 37, 'u': 32, 'w': 46}, {'f': 8, 'n': 2}, {'c': 3}, {'l': 4}, {'u': 5}, {'d': 6}, {'e': 7}, {'$$': 'include'}, {'$$': 'if', 'd': 9, 'n': 12}, {'e': 10}, {'f': 11}, {'$$': 'ifdef'}, {'d': 13}, {'e': 14}, {'f': 15}, {'$$': 'ifndef'}, {'r': 53, 'l': 17, 'n': 22}, {'i': 20, 's': 18}, {'e': 19}, {'$$': 'else'}, {'f': 21}, {'$$': 'elif'}, {'d': 23}, {'i': 24}, {'f': 25}, {'$$': 'endif'}, {'e': 27}, {'b': 43, 'f': 28}, {'i': 29}, {'n': 30}, {'e': 31}, {'$$': 'define'}, {'n': 33}, {'d': 34}, {'e': 35}, {'f': 36}, {'$$': 'undef'}, {'r': 38}, {'a': 39}, {'g': 40}, {'m': 41}, {'a': 42}, {'$$': 'pragma'}, {'u': 44}, {'g': 45}, {'$$': 'debug'}, {'a': 47}, {'r': 48}, {'n': 49}, {'i': 50}, {'n': 51}, {'g': 52}, {'$$': 'warning'}, {'r': 54}, {'o': 55}, {'r': 56}, {'$$': 'error'}] def parse_token(stuff, table): c = stuff.next() stuff.back(1) if not (c in table[0].keys()): #print "error, character is not in table", c return 0 pos = 0 while stuff.good(): c = stuff.next() if c in table[pos].keys(): pos = table[pos][c] else: stuff.back(1) try: return table[pos]['$$'] except: return 0 # lexer error return table[pos]['$$'] def get_punctuator_token(stuff): return parse_token(stuff, punctuators_table) def get_preprocessor_token(stuff): return parse_token(stuff, preproc_table) def subst(lst, defs): if not lst: return [] a1_t = lst[0][0] a1 = lst[0][1] if len(lst) == 1: if a1_t == ident: if a1 in defs: return defs[a1] return lst # len(lst) > 1 : search for macros a2_type = lst[1][0] a2 = lst[1][1] if a1_t == ident: if a1 == 'defined': if a2_type == ident: if a2 in defs: return [[num, '1']] + subst(lst[2:], defs) else: return [[num, '0']] + subst(lst[2:], defs) if a2_type == op and a2 == '(': if len(lst) < 4: raise ValueError, "expected 4 tokens defined(ident)" if lst[2][0] != ident: raise ValueError, "expected defined(ident)" if lst[2][1] in defs: return [[num, '1']] + subst(lst[4:], defs) else: return [[num, '0']] + subst(lst[4:], defs) if a1 in defs: #print a2 if a2_type == op and a2 == '(': # beginning of a macro function - ignore for now args = [] i = 2 while 1: if lst[i][1] == ')': return subst(lst[i+1:], defs) args += lst[i] # TODO #print 'macro subst' else: # not a '(', try to substitute now if a1 in defs: return defs[a1] + subst(lst[1:], defs) else: return [lst[0]] + subst(lst[1:], defs) return [lst[0]] + subst(lst[1:], defs) def comp(lst): if not lst: return [stri, ''] if len(lst) == 1: return lst[0] #print "lst len is ", len(lst) #print "lst is ", str(lst) a1_type = lst[0][0] a1 = lst[0][1] a2_type = lst[1][0] a2 = lst[1][1] if a1_type == ident: if a2 == '#': return comp( [[stri, a1]] + lst[2:] ) if a1 == '#': if a2_type == ident: return comp( [[stri, a2]] + lst[2:] ) if a1_type == op: if a2_type == num: if a1 == '-': return [num, - int(a2)] if a1 == '!': if int(a2) == 0: return [num, 1] else: return [num, 0] raise ValueError, "cannot compute %s (1)" % str(lst) raise ValueError, "cannot compute %s (2)" % str(lst) if a1_type == stri: if a2_type == stri: if lst[2:]: return comp( [[stri, a1+a2], comp(lst[2:])] ) else: return [[stri, a1+a2]] ## we need to scan the third argument given try: a3_type = lst[2][0] a3 = lst[2][1] except: raise ValueError, "cannot compute %s (3)" % str(lst) if a1_type == ident: #print "a1" if a2 == '#': #print "a2" if a3_type == stri: #print "hallo" return comp([[stri, a1 + a3]] + lst[3:]) if a1_type == num: if a3_type == num: a1 = int(a1) a3 = int(a3) if a2_type == op: val = None if a2 == '+': val = a1+a3 elif a2 == '-': val = a1-a3 elif a2 == '/': val = a1/a3 elif a2 == '*': val = a1 * a3 elif a2 == '%': val = a1 % a3 if not val is None: return comp( [[num, val]] + lst[3:] ) elif a2 == '|': val = a1 | a3 elif a2 == '&': val = a1 & a3 elif a2 == '||': val = a1 or a3 elif a2 == '&&': val = a1 and a3 if val: val = 1 else: val = 0 return comp( [[num, val]] + lst[3:] ) raise ValueError, "could not parse the macro %s " % str(lst) class filter: def __init__(self): self.fn = '' self.i = 0 self.max = 0 self.txt = "" self.buf = [] self.lines = [] #self.debug = [] def next(self): ret = self.txt[self.i] # trigraphs can be filtered straight away if ret == '?': if self.txt[self.i+1] == '?': try: car = trigs[self.txt[self.i+2]] self.i += 3 #self.debug.append(car) return car except: pass # unterminated lines can be eliminated too elif ret == '\\': try: if self.txt[self.i+1] == '\n': self.i += 2 return self.next() elif self.txt[self.i+1] == '\r': if self.txt[self.i+2] == '\n': self.i += 3 return self.next() else: pass except: pass elif ret == '\r': if self.txt[self.i+1] == '\n': self.i += 2 #self.debug.append('\n') return '\n' self.i += 1 #self.debug.append(ret) return ret def good(self): return self.i < self.max def initialize(self, filename): self.fn = filename f=open(filename, "r") self.txt = f.read() f.close() self.i = 0 self.max = len(self.txt) def start(self, filename): self.initialize(filename) while self.good(): c = self.next() #print self.buf.append(c) #continue if c == ' ' or c == '\t' or c == '\n': continue elif c == '#': self.preprocess() elif c == '%': d = self.next() if d == ':': self.preprocess() else: self.eat_line() elif c == '/': c = self.next() if c == '*': self.get_c_comment() elif c == '/': self.get_cc_comment() # else: let the 2 cars read go elif c == '"': self.skip_string() self.eat_line() elif c == '\'': self.skip_char() self.eat_line() def get_cc_comment(self): c = self.next() while c != '\n': c = self.next() def get_c_comment(self): c = self.next() prev = 0 while self.good(): if c == '*': prev = 1 elif c == '/': if prev: break else: prev = 0 c = self.next() def skip_char(self, store=0): c = self.next() if store: self.buf.append(c) # skip one more character if there is a backslash '\'' if c == '\\': c = self.next() if store: self.buf.append(c) # skip a hex char (e.g. '\x50') if c == 'x': c = self.next() if store: self.buf.append(c) c = self.next() if store: self.buf.append(c) c = self.next() if store: self.buf.append(c) if c != '\'': print "uh-oh, invalid character" def skip_string(self, store=0): c='' while self.good(): p = c c = self.next() if store: self.buf.append(c) if c == '"': cnt = 0 while 1: #print "cntcnt = ", str(cnt), self.txt[self.i-2-cnt] if self.txt[self.i-2-cnt] == '\\': cnt+=1 else: break #print "cnt is ", str(cnt) if (cnt%2)==0: break #if c == '\n': # print 'uh-oh, invalid line >'+c+'< '+self.fn # raise "".join(self.debug) # break def eat_line(self): while self.good(): c = self.next() if c == '\n': break elif c == '"': self.skip_string() elif c == '\'': self.skip_char() elif c == '/': c = self.next() if c == '*': self.get_c_comment() elif c == '/': self.get_cc_comment() # else: let the two cars read go def preprocess(self): #self.buf.append('#') # skip whitespaces like "# define" while self.good(): car = self.txt[self.i] if car == ' ' or car == '\t': self.i+=1 else: break while self.good(): c = self.next() if c == '\n': #self.buf.append(c) self.lines.append( "".join(self.buf) ) self.buf = [] break elif c == '"': self.buf.append(c) self.skip_string(store=1) elif c == '\'': self.buf.append(c) self.skip_char(store=1) elif c == '/': c = self.next() if c == '*': self.get_c_comment() elif c == '/': self.get_cc_comment() else: self.buf.append('/'+c) # simple punctuator '/' else: self.buf.append(c) class cparse: def __init__(self, nodepaths=[], strpaths=[], defines={}): #self.lines = txt.split('\n') self.lines = [] self.i = 0 self.txt = '' self.max = 0 self.buf = [] self.defs = defines self.state = [] self.env = None # needed for the variant when searching for files # include paths self.strpaths = strpaths self.pathcontents = {} self.deps = [] self.deps_paths = [] # waf uses self.m_nodepaths = nodepaths self.m_nodes = [] self.m_names = [] def tryfind(self, filename): if self.m_nodepaths: found = 0 for n in self.m_nodepaths: found = n.find_source(filename, create=0) if found: self.m_nodes.append(found) self.addlines(found.abspath(self.env)) break if not found: if not filename in self.m_names: self.m_names.append(filename) else: found = 0 for p in self.strpaths: if not p in self.pathcontents.keys(): self.pathcontents[p] = os.listdir(p) if filename in self.pathcontents[p]: #print "file %s found in path %s" % (filename, p) np = os.path.join(p, filename) self.addlines(np) self.deps_paths.append(np) found = 1 if not found: pass #error("could not find %s " % filename) def addlines(self, filepath): global parse_cache if filepath in parse_cache.keys(): self.lines = parse_cache[filepath] + self.lines return try: stuff = filter() stuff.start(filepath) if stuff.buf: stuff.lines.append( "".join(stuff.buf) ) parse_cache[filepath] = stuff.lines # memorize the lines filtered self.lines = stuff.lines + self.lines except IOError: raise except: if Params.g_verbose > 0: warning("parsing %s failed" % filepath) raise def start2(self, node, env): debug("scanning %s (in %s)" % (node.m_name, node.m_parent.m_name), 'preproc') self.env = env variant = node.variant(env) self.addlines(node.abspath(env)) if env['DEFLINES']: self.lines = env['DEFLINES'] + self.lines while self.lines: line = self.lines.pop(0) if not line: continue self.txt = line self.i = 0 self.max = len(line) try: self.process_line() except: debug("line parsing failed >%s<" % line, 'preproc') if Params.g_verbose: warning("line parsing failed >%s<" % line) # debug only def start(self, filename): self.addlines(filename) while self.lines: line = self.lines.pop(0) if not line: continue self.txt = line self.i = 0 self.max = len(line) try: self.process_line() except: print "warning: line parsing failed >%s<" % line raise def back(self, c): self.i -= c def next(self): car = self.txt[self.i] self.i += 1 return car def good(self): return self.i < self.max def skip_spaces(self): # skip the spaces while self.good(): c = self.next() if c == ' ' or c == '\t': continue else: self.i -= 1 break def isok(self): if not self.state: return 1 for tok in self.state: if tok == skipped or tok == ignored: return None return 1 def process_line(self): type = '' l = len(self.txt) token = get_preprocessor_token(self) if not token: return if token == 'endif': self.state.pop(0) elif token[0] == 'i' and token != 'include': self.state = [undefined] + self.state #print "token before ok is ", token # skip lines when in a dead block # wait for the endif if not token in ['else', 'elif']: if not self.isok(): return #print "token is ", token debug("line is %s state is %s" % (self.txt, self.state), 'preproc') if token == 'if': ret = self.comp(self.get_body()) if ret: self.state[0] = accepted else: self.state[0] = ignored elif token == 'ifdef': ident = self.get_name() if ident in self.defs.keys(): self.state[0] = accepted else: self.state[0] = ignored elif token == 'ifndef': ident = self.get_name() if ident in self.defs.keys(): self.state[0] = ignored else: self.state[0] = accepted elif token == 'include': (type, body) = self.get_include() if self.isok(): debug("include found %s (%s) " % (body, type), 'preproc') if type == '"': if not body in self.deps: self.deps.append(body) self.tryfind(body) elif type == '<': if not strict_quotes: if not body in self.deps: self.deps.append(body) self.tryfind(body) else: res = self.comp(body) #print 'include body is ', res if res and (not res in self.deps): self.deps.append(res) self.tryfind(res) elif token == 'elif': if self.state[0] == accepted: self.state[0] = skipped elif self.state[0] == ignored: if self.comp(self.get_body()): self.state[0] = accepted else: # let another 'e' treat this case pass pass else: pass elif token == 'else': if self.state[0] == accepted: self.state[0] = skipped elif self.state[0] == ignored: self.state[0] = accepted elif token == 'endif': pass elif token == 'define': name = self.get_name() args = self.get_args() body = self.get_body() #print "define %s (%s) { %s }" % (name, str(args), str(body)) if not args: self.defs[name] = body else: # TODO handle macros pass elif token == 'undef': name = self.get_name() if name: if name in self.defs.keys(): self.defs.__delitem__(name) #print "undef %s" % name def get_include(self): self.skip_spaces() delimiter = self.next() if delimiter == '"': buf = [] while self.good(): c = self.next() if c == delimiter: break buf.append(c) return (delimiter, "".join(buf)) elif delimiter == "<": buf = [] while self.good(): c = self.next() if c == '>': break buf.append(c) return (delimiter, "".join(buf)) else: self.i -= 1 return ('', self.get_body()) def get_name(self): ret = [] self.skip_spaces() # get the first word found while self.good(): c = self.next() if c != ' ' and c != '\t' and c != '(': ret.append(c) else: self.i -= 1 break return "".join(ret) def get_args(self): ret = [] self.skip_spaces() if not self.good(): return None c = self.next() if c != '(': self.i -= 1 return None buf = [] while self.good(): c = self.next() if c == ' ' or c == '\t': continue elif c == ',': ret.append("".join(buf)) buf = [] elif c == '.': if self.txt[self.i:self.i+2]=='..': buf += ['.', '.', '.'] ret.append("".join(buf)) self.i += 2 elif c == ')': break else: buf.append(c) return ret def get_body(self): buf = [] self.skip_spaces() while self.good(): c = self.next() self.back(1) #print "considering ", c if c == ' ' or c == '\t': self.i += 1 continue elif c == '"': self.i += 1 r = self.get_string() buf.append( [stri, r] ) elif c == '\'': self.i += 1 r = self.get_char() buf.append( [chr, r] ) elif c in string.digits: res = self.get_number() buf.append( [num, res] ) elif c in alpha: r = self.get_ident() buf.append( [ident, r] ) else: r = get_punctuator_token(self) if r: #print "r is ", r buf.append( [op, r]) #else: # print "NO PUNCTUATOR FOR ", c #def end(l): # return l[1] #print buf #return "".join( map(end, buf) ) return buf def get_char(self): buf = [] c = self.next() buf.append(c) # skip one more character if there is a backslash '\'' if c == '\\': c = self.next() buf.append(c) c = self.next() #buf.append(c) if c != '\'': error("uh-oh, invalid character"+str(c)) return ''.join(buf) def get_string(self): buf = [] c='' while self.good(): p = c c = self.next() if c == '"': cnt = 0 while 1: #print "cntcnt = ", str(cnt), self.txt[self.i-2-cnt] if self.txt[self.i-2-cnt] == '\\': cnt+=1 else: break #print "cnt is ", str(cnt) if (cnt%2)==0: break else: buf.append(c) else: buf.append(c) return ''.join(buf) def get_number(self): buf =[] while self.good(): c = self.next() if c in string.digits: buf.append(c) else: self.i -= 1 break return ''.join(buf) def get_ident(self): buf = [] while self.good(): c = self.next() if c in alpha: buf.append(c) else: self.i -= 1 break return ''.join(buf) def comp(self, stuff): clean = subst(stuff, self.defs) res = comp(clean) #print res if res: if res[0] == num: return int(res[1]) return res[1] return 0 if __name__ == "__main__": try: arg = sys.argv[1] except: arg = "file.c" paths = ['.'] gruik = cparse(strpaths = paths) gruik.start(arg) print "we have found the following dependencies" print gruik.deps print gruik.deps_paths