COMPILER C /* An attempt to describe a subset of C */ CHARACTERS letter = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" . digit = "0123456789" . hexdigit = digit + "ABCDEFabcdef" . tab = CHR(9) . eol = CHR(10) . files = letter + digit + ":\." . chars = CHR(32) .. CHR(255) - "'" . strings = CHR(32) .. CHR(255) - '"'. macros = ANY - eol . COMMENTS FROM "/*" TO "*/" IGNORE tab + eol + CHR(13) TOKENS identifier = ( "_" | letter) { "_" | letter | digit } . number = digit { digit } [ "." { digit }] [ "U" | "u" | "L" | "l" ] . hexnumber = "0" ( "x" | "X" ) hexdigit { hexdigit } [ "U" | "u" | "L" | "l" ] . string = '"' { strings } '"' . char = "'" [ "\" ] chars "'" . library = "<" files { files } ">" . PRAGMAS PreProcessor = "#" {macros} . PRODUCTIONS C = { Definition } EOF . /* Variable and Type Definitions */ Definition = [ StorageClass ] Type { "*" } identifier ( FunctionDefinition | VarList ";" ) . StorageClass = "auto" | "extern" | "register" | "static" . Type = "short" [ "int" ] | "long" [ "int" | "float" ] | "unsigned" [ "char" | "int" | "long" ] | "char" | "int" | "float" | "double" | "void" . VarList = ArraySize { "," { "*" } identifier ArraySize } . ArraySize = { "[" [ ConstExpression ] "]" } . /* Function Definitions */ FunctionDefinition = FunctionHeader ( ";" | FunctionBody ) . FunctionHeader = "(" [ FormalParamList ] ")" . FunctionBody = CompoundStatement . FormalParamList = FormalParameter { "," FormalParameter } . FormalParameter = Type { "*" } [ identifier ] ArraySize . /* Statements */ Statement = { Label } ( AssignmentExpression | BreakStatement | CompoundStatement | ContinueStatement | DoStatement | ForStatement | IfStatement | NullStatement | ReturnStatement | SwitchStatement | WhileStatement ) . Label = "case" ConstExpression ":" | "default" ":" . /* There is no requirement that a switch statement be followed by a compound statement. Actually labels may be even more general */ AssignmentExpression = Expression ";" . BreakStatement = "break" ";" . CompoundStatement = "{" { LocalDeclaration } { Statement } "}" . ContinueStatement = "continue" ";" . DoStatement = "do" Statement "while" "(" Expression ")" ";" . ForStatement = "for" "(" [ Expression ] ";" [ Expression ] ";" [ Expression ] ")" Statement . IfStatement = "if" "(" Expression ")" Statement [ "else" Statement ] . NullStatement = ";" . ReturnStatement = "return" [ Expression ] ";" . /* Expression usually in parentheses */ SwitchStatement = "switch" "(" Expression ")" Statement . WhileStatement = "while" "(" Expression ")" Statement . /* LocalDeclarations */ LocalDeclaration = [ StorageClass ] Type { "*" } identifier ( FunctionHeader | VarList ) ";" . /* Expressions, based on Kernighan and Ritchie: "The C Programming Language". There does not seem to be a way to make this work in an LL(1) fashion, but this generates a "working" parser */ ConstExpression = Expression . Expression = Conditional { AssignmentOperator Expression }. Conditional = LogORExp . LogORExp = LogANDExp { "||" LogANDExp } . LogANDExp = InclORExp { "&&" InclORExp }. InclORExp = ExclORExp { "|" ExclORExp } . ExclORExp = ANDExp { "^" ANDExp } . ANDExp = EqualExp { "&" EqualExp } . EqualExp = RelationExp { ( "==" | "!=" ) RelationExp } . RelationExp = ShiftExp { ( "<" | ">" | "<=" | ">=" ) ShiftExp }. ShiftExp = AddExp { ( "<<" | ">>" ) AddExp } . AddExp = MultExp { ( "+" | "-" ) MultExp } . MultExp = CastExp { ( "*" | "/" | "%" ) CastExp } . CastExp = UnaryExp . /* we should really add | "(" identifier ")" CastExp . but this breaks it badly */ UnaryExp = PostFixExp | ( "++" | "--" ) UnaryExp | UnaryOperator CastExp . /* we should really add | "sizeof" ( UnaryExp | "(" Type ")" ) . but this breaks it badly */ PostFixExp = Primary { "[" Expression "]" | FunctionCall | "." identifier | "->" identifier | "++" | "--" } . Primary = identifier | string | char | number | "(" Expression ")" . FunctionCall = "(" [ ActualParameters ] ")" . ActualParameters = Expression { "," Expression } . AssignmentOperator = "=" | "*=" | "/=" | "%=" | "+=" | "-=" | "&=" | "^=" | "|=" | "<<=" | ">>=" . UnaryOperator = "+" | "-" | "*" | "!" | "&" | "~" . END C.