#!/usr/local/bin/munger ; Copyright (c) 2005-2007 James Bailie ; All rights reserved. ; ; Redistribution and use in source form, with or without modification, is ; permitted provided that the following conditions are met: ; ; * Redistributions of source code must retain the above copyright ; notice, this list of conditions and the following disclaimer. ; * The name of James Bailie may not be used to endorse or promote ; products derived from this software without specific prior written permission. ; ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS" ; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE ; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN ; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE ; POSSIBILITY OF SUCH DAMAGE. ; ------------------------------------------------------------------------- ; This is a Compiler to C for a restricted, properly tail-recursive ; Munger-like language, with first-class continuations, but no lists, ; first-class symbols, runtime error-checking, nor local side-effects. ; It is derived from Marc Feeley's 90 Minute Scheme to C compiler, ; presented at the October 2004 Montreal Lisp And Scheme Users' Group: ; http://www.iro.umontreal.ca/~boucherd/mslug/meetings/20041020/minutes-en.html ; This version is a straight-forward translation to Munger, with a slightly ; modified code generator, and the toy runtime engine replaced with a ; stand-alone runtime engine. ; ------------------------------------------------------------------------- ; Causes interpreter to exit upon encountering an error, instead of returning ; to toplevel. (fatal) (setq mm_version "1.27") ; Makes GC happen less frequently to speed things up. (gc_freq 60000) ; Path to the installed MM library code. (setq lib_path "/usr/local/share/munger-4.143/") ;---------------------------------------------------------------------- ; ; Functions to create and query the abstractions we use to represent ; programs and environments. ; ;---------------------------------------------------------------------- ; make_node creates records to represent nodes in the abstract syntax tree ; which will be constructed to represent the program being compiled, and to ; represent bindings in environments. Each node is a typed structure ; containing multiple fields. The function accepts a type specifier ; followed by multiple two-element lists grouping field names with their ; values. The values may not be modifed subsequently. (setq VARIABLE 1) (setq REFERENCE 2) (setq LAMBDA 3) (setq SEQUENCE 4) (setq SETQ 6) (setq IF 7) (setq APPLICATION 8) (setq PRIMITIVE 9) (setq STRING 10) (setq NUMBER 11) (setq MACRO 12) (setq TYPE 0) (setq ID 1) (setq EXPANDER 2) (setq VID 1) (setq CID 2) (setq SUBEXPR 1) (setq VAR 2) (setq VALUE 2) (setq OPERATION 2) (setq PARAMS 2) (defun make_node (type (fields)) (extend 'node (record 3)) (setfield node 0 type) (while fields (setfield node (caar fields) (cadr (car fields))) (setq fields (cdr fields))) node) ; new_var creates a new local variable object. Local variables are renamed ; so that each is uniquely named. The id field contains the original name ; of the variable, while the cid field contains the new name. (let ((idx -1)) (defun new_var (id) (make_node VARIABLE (list VID id) (list CID (intern (stringify id "_" (inc idx))))))) ; new_global_var creates a new global variable object. Globals have the ; same two fields as locals, but since it is not necessary to rename global ; variables, they are both set to the name of the global. Checking for ; this identity is our means of discriminating a global variable object ; from a local variable object. (defun new_global_var (id) (make_node VARIABLE (list VID id) (list CID id))) ; Predicate to determine if a variable object represents a global variable. (defun global_var_p (var) (eq (getfield var VID) (getfield var CID))) ; lookup_env looks up a specified symbol in a specified environment. ; Environments are lists of variable objects. The corresponding variable ; object or the empty list is returned. (defun lookup_env (id env) (catch (while env (when (eq (getfield (car env) VID) id) (throw (car env))) (setq env (cdr env))))) ; ------------------------------------------------------------------------------ ; ; Functions which expand the list-structure of the program into a form we ; can more easily manipulate. Each node of the abstract syntax tree is a ; structure containing an expression and its classification. ; Subexpressions become subnodes. By converting the program an AST, we are ; restructuring and annotating it according to the basic expressions we ; will need to subsequently recognize and modify. ; ; ------------------------------------------------------------------------------ ; Code manipulating the compile-time environment (cte). ; lookup_id attempts to lookup a variable in the local, then global ; environments. Looking up a non-existent variable creates a new global ; variable object. (defun lookup_id (id cte) (or (lookup_env id cte) (lookup global_cte id) (hash global_cte id (new_global_var id)))) ; The initial toplevel cte binds symbols to "macro" objects to define the ; intrinsic functions. Each macro has an expander function which checks ; for the correct number of arguments, and then builds the AST for the ; expression. The expander is applied by expand_form_expr, below. (defun make_initial_cte () (append ; 2 argument intrinsics. (mapcar (lambda (id) (make_node MACRO (list ID id) (list EXPANDER (lambda (e cte) (if (eq (length (cdr e)) 2) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (cdr e) cte)) (list OPERATION id)) (die "Error: " id " requires 2 arguments: " e)))))) '(rename symlink pipe match lookup dissociate matches unshift push index eq = < <= > >= + - * % / setenv expand_tabs getfield strcmp)) (append ; 1 argument intrinsics. (mapcar (lambda (id) (make_node MACRO (list ID id) (list EXPANDER (lambda (e cte) (if (eq (length (cdr e)) 1) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (cdr e) cte)) (list OPERATION id)) (die "Error: " id " requires 1 argument: " e)))))) '(resume char chop chomp display display_error length clear abs digitize stringify getenv remove directory date random code pop used sort_numbers sort_strings topidx explode stringp stackp numberp readchars tablep regexp exec_stack shift join_stack system regcomp exit keys values basename dirname record stat)) (append ; no argument intrinsics. (mapcar (lambda (id) (make_node MACRO (list ID id) (list EXPANDER (lambda (e cte) (if (not (length (cdr e))) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (cdr e) cte)) (list OPERATION id)) (die "Error: " id " requires no arguments: " e)))))) '(table next previous current time fork rewind getline newline newline_error)) (append ; 3 argument intrinsics. (mapcar (lambda (id) (make_node MACRO (list ID id) (list EXPANDER (lambda (e cte) (if (eq (length (cdr e)) 3) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (cdr e) cte)) (list OPERATION id)) (die "Error: " id " requires 3 arguments: " e)))))) '(substack setfield redirect store substring associate)) (list (make_node MACRO (list ID 'append) (list EXPANDER (lambda (e cte) (if (not (extend 'len (length (cdr e)))) (die "Error: append requires 1 or more arguments: " e) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (append (cdr e) (list len)) cte)) (list OPERATION 'append)))))) (make_node MACRO (list ID 'assign) (list EXPANDER (lambda (e cte) (if (< (length (cdr e)) 2) (die "Error: assign requires 2 or more arguments: " e) (expand_expr (cons 'progn (mapcar (lambda (x) (qquote (push ,(cadr e) ,x))) (cddr e))) cte))))) (make_node MACRO (list ID 'stack) (list EXPANDER (lambda (e cte) (cond ((not (extend 'len (length (cdr e)))) (expand_expr (list 'stack 0) cte)) ((eq len 1) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (cdr e) cte)) (list OPERATION 'stack))) (1 (die "Error: stack requires 0 or 1 argument: " e)))))) (make_node MACRO (list ID 'substitute) (list EXPANDER (lambda (e cte) (cond ((eq (extend 'len (length (cdr e))) 3) (expand_expr (append e (list 1)) cte)) ((eq len 4) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (cdr e) cte)) (list OPERATION 'substitute))) (1 (die "Error: substitute requires 3 or 4 arguments: " e)))))) (make_node MACRO (list ID 'with_input_process) (list EXPANDER (lambda (e cte) (if (>= (extend 'len (length (cdr e))) 2) (expand_expr (append (list 'unless (list 'stringp (list 'pipe 0 (cadr e)))) (append (cddr e) (list '(resume 0)))) cte) (die "Error: with_input_process requires 2 or more arguments: " e))))) (make_node MACRO (list ID 'with_output_process) (list EXPANDER (lambda (e cte) (if (>= (extend 'len (length (cdr e))) 2) (expand_expr (append (list 'unless (list 'stringp (list 'pipe 1 (cadr e)))) (append (cddr e) (list '(resume 1)))) cte) (die "Error: with_output_process requires 2 or more arguments: " e))))) (make_node MACRO (list ID 'with_input_file) (list EXPANDER (lambda (e cte) (if (>= (length (cdr e)) 2) (expand_expr (list 'if (list 'not (list 'stringp (list 'redirect 0 (cadr e) 0))) (cons 'progn (append (cddr e) (list '(resume 0))))) cte) (die "Error: with_input_file requires 2 or more arguments: " e))))) (make_node MACRO (list ID 'with_output_file) (list EXPANDER (lambda (e cte) (if (>= (length (cdr e)) 2) (expand_expr (list 'if (list 'not (list 'stringp (list 'redirect 1 (cadr e) 0))) (cons 'progn (append (cddr e) (list '(resume 1))))) cte) (die "Error: with_output_file requires 2 or more arguments: " e))))) (make_node MACRO (list ID 'with_output_file_appending) (list EXPANDER (lambda (e cte) (if (>= (length (cdr e)) 2) (expand_expr (list 'if (list 'not (list 'stringp (list 'redirect 1 (cadr e) 1))) (cons 'progn (append (cddr e) (list '(resume 1))))) cte) (die "Error: with_output_file_appending requires 2 or more arguments: " e))))) (make_node MACRO (list ID 'exec) (list EXPANDER (lambda (e cte) (if (extend 'len (length (cdr e))) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (append (cdr e) (list len)) cte)) (list OPERATION 'exec)) (die "Error: exec requires at least 1 argument: " e))))) (make_node MACRO (list ID 'concat) (list EXPANDER (lambda (e cte) (if (>= (extend 'len (length (cdr e))) 2) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (append (cdr e) (list len)) cte)) (list OPERATION 'concat)) (die "Error: concat requires at least 2 arguments: " e))))) (make_node MACRO (list ID 'setq) (list EXPANDER (lambda (e cte) (if (eq (length (cdr e)) 2) (make_node SETQ (list SUBEXPR (expand_list_expr (cddr e) cte)) (list VAR (lookup_id (cadr e) cte))) (die "Error: setq requires 2 arguments: " e))))) (make_node MACRO (list ID 'println) (list EXPANDER (lambda (e cte) (if (length (cdr e)) (expand_expr (cons 'progn (append (mapcar (lambda (x) (list 'display x)) (cdr e)) '((newline)))) cte) (die "Error: println requires at least 1 argument: " e))))) (make_node MACRO (list ID 'print) (list EXPANDER (lambda (e cte) (if (length (cdr e)) (expand_expr (cons 'progn (mapcar (lambda (x) (list 'display x)) (cdr e))) cte) (die "Error: print requires at least 1 argument: " e))))) (make_node MACRO (list ID 'die) (list EXPANDER (lambda (e cte) (expand_expr (if (cdr e) (cons 'progn (append (mapcar (lambda (x) (list 'display_error x)) (cdr e)) '((newline_error) (exit 1)))) '(exit 1)) cte)))) (make_node MACRO (list ID 'warn) (list EXPANDER (lambda (e cte) (if (length (cdr e)) (expand_expr (cons 'progn (append (mapcar (lambda (x) (list 'display_error x)) (cdr e)) '((newline_error)))) cte) (die "Error: warn requires at least 1 argument: " e))))) (make_node MACRO (list ID 'not) (list EXPANDER (lambda (e cte) (if (eq (length (cdr e)) 1) (expand_expr (list 'if (cadr e) 0 1) cte) (die "Error: not requires 1 argument: " e))))) (make_node MACRO (list ID 'cond) (list EXPANDER (lambda (e cte) (if (length (cdr e)) (expand_expr (list 'if (car (cadr e)) (cons 'progn (cdr (cadr e))) (if (cddr e) (cons 'cond (cddr e)) 0)) cte) (die "Error: cond requires at least 1 argument: " e))))) (make_node MACRO (list ID 'if) (list EXPANDER (lambda (e cte) (cond ((eq (extend 'len (length (cdr e))) 2) (make_node IF (list SUBEXPR (expand_list_expr (list (cadr e) (car (cddr e)) 0) cte)))) ((>= len 3) (make_node IF (list SUBEXPR (expand_list_expr (list (cadr e) (car (cddr e)) (cons 'progn (cdddr e))) cte)))) (1 (die "Error: if requires at least 2 arguments: " e)))))) (make_node MACRO (list ID 'when) (list EXPANDER (lambda (e cte) (if (>= (length (cdr e)) 2) (expand_expr (list 'if (cadr e) (cons 'progn (cddr e))) cte) (die "Error: when requires at least 2 arguments: " e))))) (make_node MACRO (list ID 'unless) (list EXPANDER (lambda (e cte) (if (>= (length (cdr e)) 2) (expand_expr (list 'if (list 'not (cadr e)) (cons 'progn (cddr e))) cte) (die "Error: unless requires at least 2 arguments: " e))))) (make_node MACRO (list ID 'lambda) (list EXPANDER (lambda (e cte) (if (>= (length (cdr e)) 2) (progn (extend 'params (mapcar new_var (cadr e))) (extend 'new_cte (append params cte)) (make_node LAMBDA (list SUBEXPR (list (expand_expr (cons 'progn (cddr e)) new_cte))) (list PARAMS params))) (die "Error: malformed function: " e))))) (make_node MACRO (list ID 'progn) (list EXPANDER (lambda (e cte) (if (length (cdr e)) (make_node SEQUENCE (list SUBEXPR (expand_list_expr (cdr e) cte))) (die "Error: empty progn: " e))))) (make_node MACRO (list ID 'let) (list EXPANDER (lambda (e cte) (if (>= (length (cdr e)) 2) (expand_expr (cons (cons 'lambda (cons (mapcar car (cadr e)) (cddr e))) (mapcar cadr (cadr e))) cte) (die "Error: malformed let: " e))))) (make_node MACRO (list ID 'letn) (list EXPANDER (lambda (e cte) (if (>= (length (cdr e)) 2) (expand_expr (append (list 'let (list (caar (cdr e)))) (if (cdr (cadr e)) (list (append (list 'letn (cdr (cadr e))) (cddr e))) (cddr e))) cte) (die "Error: malformed letn: " e))))) (make_node MACRO (list ID 'join) (list EXPANDER (lambda (e cte) (if (>= (extend 'len (length (cdr e))) 2) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (append (cdr e) (list len)) cte)) (list OPERATION 'join)) (die "Error: join requires 2 or more arguments: " e))))) (make_node MACRO (list ID 'split) (list EXPANDER (lambda (e cte) (cond ((eq (extend 'len (length (cdr e))) 3) (make_node PRIMITIVE (list SUBEXPR (expand_list_expr (cdr e) cte)) (list OPERATION 'split))) ((eq len 2) (expand_expr (list 'split (cadr e) (car (cddr e)) -1) cte)) (1 (die "Error: split requires 2 or 3 arguments: " e)))))) (make_node MACRO (list ID 'or) (list EXPANDER (lambda (e cte) (cond ((not (extend 'len (length (cdr e)))) (die "Error: empty or: " e)) ((eq len 1) (expand_expr (cadr e) cte)) (1 (expand_expr (qquote ((lambda (x y) (if x x (y))) ,(cadr e) (lambda () ,(cons 'or (cddr e))))) cte)))))) (make_node MACRO (list ID 'and) (list EXPANDER (lambda (e cte) (cond ((not (extend 'len (length (cdr e)))) (die "Error: empty and: " e)) ((eq len 1) (expand_expr (cadr e) cte)) (1 (expand_expr (qquote ((lambda (x y) (if x (y) x)) ,(cadr e) (lambda () ,(cons 'and (cddr e))))) cte)))))))))))) (setq global_cte (table)) (foreach (lambda (x) (hash global_cte (getfield x ID) x)) (make_initial_cte)) ; Functions to expand an expression into its equivalent AST structure. (defun expand_expr (e cte) (cond ((fixnump e) (make_node NUMBER (list VALUE e))) ((stringp e) (make_node STRING (list VALUE e))) ((symbolp e) (make_node REFERENCE (list VAR (lookup_id e cte)))) ((pairp e) (tailcall expand_form_expr e cte)) (1 (die "Error: syntax error: " e)))) (defun expand_list_expr (e cte) (mapcar (lambda (e) (expand_expr e cte)) e)) (let ((func_obj 0) (func 0)) (defun expand_form_expr (e cte) (setq func_obj (car e)) (setq func (and (symbolp func_obj) (lookup_id func_obj cte))) (if (and func (eq (getfield func TYPE) MACRO)) ((getfield func EXPANDER) e cte) (make_node APPLICATION (list SUBEXPR (expand_list_expr e cte)))))) ; ------------------------------------------------------------------------------------------ ; ; Loads file specified on command line and converts s-expressions therein to AST. ; ; ------------------------------------------------------------------------------------------ (defun parse_file () (open) (extend 'result (read 0 (current))) (cond ((eq result -2) (die "Error: You do not have permission to read " (basename (current)) ".")) ((stringp result) (die "Error: " result))) (extend 'rx (regcomp "^#include[\b\t]*\"(.+)\"")) (extend 'rx2 (regcomp "%%LIBRARY_PATH%%")) (extend 'f (find 1 1 0 rx 1)) (extend 'l (car f)) (extend 'm "") (while l (setq m (substitute rx2 lib_path (cadr (matches rx (retrieve l))))) (if (<= (read l m) 0) (die "Error: cannot include " m) (delete l) (setq f (find 1 l 0 rx 1)) (setq l (car f)))) ; Wrap the entire file in a lambda-expression to create a toplevel node ; for the AST. This also prevents evaluation of the program. (insert 1 (stringify "(setq __toplevel__ (lambda ()" (char 10)) -1) (insert (lastline) (stringify "))" (char 10)) 1) ; Write the wrapped code to a temporary file and load it. (extend 'name (join "." (basename (current)) "tmp")) (extend 'result (write 1 (lastline) name 1)) (close) (when (stringp result) (die "Error: couln't write " name ": " result)) (load name) (unlink name) ; Expand the program (expand_expr (cons 'progn (cddr (extract __toplevel__))) ())) ; --------------------------------------------------------------------------- ; ; Utility functions used to extract a list of the free variables in an ; expression. ; ; --------------------------------------------------------------------------- (defun keep (f lst) (cond ((nullp lst) ()) ((f (car lst)) (cons (car lst) (keep f (cdr lst)))) (1 (tailcall keep f (cdr lst))))) (defun difference (set1 set2) (cond ((nullp set1) ()) ((member (car set1) set2) (tailcall difference (cdr set1) set2)) (1 (cons (car set1) (difference (cdr set1) set2))))) (defun union (set1 set2) (cond ((nullp set1) set2) ((member (car set1) set2) (tailcall union (cdr set1) set2)) (1 (cons (car set1) (union (cdr set1) set2))))) (defun fold (f base lst) (if (nullp lst) base (tailcall fold f (f base (car lst)) (cdr lst)))) (defun union_multi (sets) (tailcall fold union () sets)) (defun free (ast) (extend 'type (getfield ast TYPE)) (cond ((eq type REFERENCE) (list (getfield ast VAR))) ((eq type SETQ) (tailcall union (free (car (getfield ast SUBEXPR))) (list (getfield ast VAR)))) ((eq type LAMBDA) (tailcall difference (free (car (getfield ast SUBEXPR))) (getfield ast PARAMS))) (1 (tailcall union_multi (mapcar free (getfield ast SUBEXPR)))))) ; --------------------------------------------------------------------------- ; ; CPS conversion. Continuations are made explicit, making it trivial to ; implement call_cc, and every function call becomes a tailcall. ; ; --------------------------------------------------------------------------- (defun cps (ast cont) (extend 'type (getfield ast TYPE)) (cond ((or (eq type STRING) (eq type NUMBER) (eq type REFERENCE)) (make_node APPLICATION (list SUBEXPR (list cont ast)))) ((eq type SETQ) (cps_list (getfield ast SUBEXPR) (lambda (value) (make_node APPLICATION (list SUBEXPR (list cont (make_node SETQ (list SUBEXPR value) (list VAR (getfield ast VAR))))))))) ((eq type IF) (extend 'xform (lambda (cont) (cps_list (list (car (getfield ast SUBEXPR))) (lambda (test) (make_node type (list SUBEXPR (append test (mapcar (lambda (x) (cps x cont)) (cdr (getfield ast SUBEXPR)))))))))) ; Avoid creating an unnecessary administrative closure if ; our continuation is a variable reference, and avoid ; passing a lambda-expression as a continuation and so ; having it propagated unnecessarily throughout our code. (if (eq (getfield cont TYPE) REFERENCE) (xform cont) (let ((k (new_var 'k))) (make_node APPLICATION (list SUBEXPR (list (make_node LAMBDA (list SUBEXPR (list (xform (make_node REFERENCE (list VAR k))))) (list PARAMS (list k))) cont)))))) ((eq type PRIMITIVE) (cps_list (getfield ast SUBEXPR) (lambda (args) (make_node APPLICATION (list SUBEXPR (list cont (make_node PRIMITIVE (list SUBEXPR args) (list OPERATION (getfield ast OPERATION))))))))) ((eq type APPLICATION) (extend 'func (car (getfield ast SUBEXPR))) (if (eq (getfield func TYPE) LAMBDA) ; Lambda-expressions in function position have static ; continuations, so they do not need to receive their ; continuation dynamically, at run-time. The static ; continuation is passed directly to cps_sequence which ; cps-converts the function's body. (cps_list (cdr (getfield ast SUBEXPR)) (lambda (values) (make_node APPLICATION (list SUBEXPR (cons (make_node LAMBDA (list SUBEXPR (list (cps_sequence (getfield func SUBEXPR) cont))) (list PARAMS (getfield func PARAMS))) values))))) ; Other functions can be invoked in different contexts, ; and so will be converted to receive their continuation ; as their first argument in the "function" clause of this ; cond, so we add another argument to the arguments of ; their application here. (cps_list (getfield ast SUBEXPR) (lambda (args) (make_node APPLICATION (list SUBEXPR (cons (car args) (cons cont (cdr args))))))))) ((eq type LAMBDA) (extend 'k (new_var 'k)) (make_node APPLICATION (list SUBEXPR (list cont (make_node LAMBDA (list SUBEXPR (list (cps_sequence (getfield ast SUBEXPR) (make_node REFERENCE (list VAR k))))) (list PARAMS (cons k (getfield ast PARAMS)))))))) ((eq type SEQUENCE) (cps_sequence (getfield ast SUBEXPR) cont)))) (defun cps_list (asts inner) (extend 'body (lambda (x) (cps_list (cdr asts) (lambda (new_asts) (inner (cons x new_asts)))))) (cond ((nullp asts) (inner ())) ((or (eq (extend 'type (getfield (car asts) TYPE)) NUMBER) (eq type STRING) (eq type REFERENCE)) (body (car asts))) (1 (let ((r (new_var 'r))) (cps (car asts) (make_node LAMBDA (list SUBEXPR (list (body (make_node REFERENCE (list VAR r))))) (list PARAMS (list r)))))))) (defun cps_sequence (asts cont) (if (nullp (cdr asts)) (cps (car asts) cont) (cps (car asts) (make_node LAMBDA (list SUBEXPR (list (cps_sequence (cdr asts) cont))) (list PARAMS (list (new_var 'r))))))) (defun cps_convert (ast) ; The cps function is passed the primordial continuation, which calls halt ; with the result of the final evaluation. (extend 'ast_cps (cps ast (let ((r (new_var 'r))) (make_node LAMBDA (list SUBEXPR (list (make_node PRIMITIVE (list SUBEXPR (list (make_node REFERENCE (list VAR r)))) (list OPERATION 'halt)))) (list PARAMS (list r)))))) (if (lookup_env 'call_cc (free ast)) ; The definition for call_cc is added if it is needed. (make_node APPLICATION (list SUBEXPR (list (make_node LAMBDA (list SUBEXPR (list ast_cps)) (list PARAMS (list (new_var '__)))) (expand_expr '(setq call_cc (lambda (k f) (f k (lambda (__ result) (k result))))) ())))) ast_cps)) ; --------------------------------------------------------------------------- ; ; Closure conversion. The VALUES of free variables get bundled together ; with the closures that access them, becoming hyper-statically closed. ; ; --------------------------------------------------------------------------- (defun position (x lst) (catch (extend 'i 0) (while lst (when (eq x (car lst)) (throw i)) (setq lst (cdr lst)) (inc i)))) (defun cc (ast self free_vars) (extend 'type (getfield ast TYPE)) (cond ((or (eq type STRING) (eq type NUMBER)) ast) ((eq type REFERENCE) (extend 'i (position (getfield ast VAR) free_vars)) ; References to free variables get converted into ; CLOSURE_REF objects, consisting of a reference ; to a closure object, and an offset into the object. (if (fixnump i) (make_node PRIMITIVE (list SUBEXPR (list (make_node REFERENCE (list VAR self)) (make_node NUMBER (list VALUE (+ i 1))))) (list OPERATION 'CLOSURE_REF)) ast)) ((eq type SETQ) (make_node SETQ (list SUBEXPR (mapcar (lambda (e) (cc e self free_vars)) (getfield ast SUBEXPR))) (list VAR (getfield ast VAR)))) ((eq type IF) (make_node IF (list SUBEXPR (mapcar (lambda (e) (cc e self free_vars)) (getfield ast SUBEXPR))))) ((eq type PRIMITIVE) (make_node PRIMITIVE (list SUBEXPR (mapcar (lambda (e) (cc e self free_vars)) (getfield ast SUBEXPR))) (list OPERATION (getfield ast OPERATION)))) ; This clause is only triggered if call_cc has been included in the ; program. Otherwise, no sequences will remain after CPS conversion. ((eq type SEQUENCE) (make_node SEQUENCE (list SUBEXPR (mapcar (lambda (e) (cc e self free_vars)) (getfield ast SUBEXPR))))) ((eq type APPLICATION) (extend 'func (car (getfield ast SUBEXPR))) (extend 'args (mapcar (lambda (e) (cc e self free_vars)) (cdr (getfield ast SUBEXPR)))) ; Lambda-expressions in function position do not get closed ; because they cannot be invoked in contexts outside of the ; one in which they are defined, therefore their free ; variables are always available via the stack when they are ; being executed. They will be found in the activation records ; of any enclosing lambda-expressions or CLOSURE, or they will ; be CLOSURE_REFs to an enclosing CLOSURE's closed variables. (if (eq LAMBDA (getfield func TYPE)) (make_node APPLICATION (list SUBEXPR (cons (make_node LAMBDA (list SUBEXPR (list (cc (car (getfield func SUBEXPR)) self free_vars))) (list PARAMS (getfield func PARAMS))) args))) ; Otherwise func will be an expression evaluating to a ; pointer to a closure object on the heap. We make a ; CLOSURE_REF to the first item in the object, which will ; contain a pointer to the function definition which was ; "evaluated" to form the closure, and we prepend the ; pointer to func's arguments to form the self argument. (extend 'f (cc func self free_vars)) (make_node APPLICATION (list SUBEXPR (cons (make_node PRIMITIVE (list SUBEXPR (list f (make_node NUMBER (list VALUE 0)))) (list OPERATION 'CLOSURE_REF)) (cons f args)))))) ((eq type LAMBDA) (extend 'new_free_vars (keep (lambda (v) (not (global_var_p v))) (free ast))) (extend 'new_self (new_var 'self)) ; Lambda-expressions which can be invoked in contexts ; other than the one in which they were defined need to ; close over any free variables they reference. Their ; parameter lists are prepended with a self variable, for ; the use of internal CLOSURE_REFs. (make_node PRIMITIVE (list SUBEXPR (cons (make_node LAMBDA (list SUBEXPR (list (cc (car (getfield ast SUBEXPR)) new_self new_free_vars))) (list PARAMS (cons new_self (getfield ast PARAMS)))) (mapcar (lambda (v) (cc (make_node REFERENCE (list VAR v)) self free_vars)) new_free_vars))) (list OPERATION 'CLOSURE))))) ; This toplevel wrapper lambda expression becomes function number 0, ; which is the "main" function of the lisp program. (defun closure_convert (ast) (make_node LAMBDA (list SUBEXPR (list (cc ast 0 ()))) (list PARAMS ()))) ; --------------------------------------------------------------------------- ; ; Code generation. ; ; --------------------------------------------------------------------------- (setq to_do ()) (setq global_vars ()) (let ((count -1)) (defun add_lambda (l) (inc count) (setq to_do (cons (list count l) to_do)) count)) (defun cg_list (asts vars stack_env cont) (if (nullp asts) (tailcall cont "" stack_env) (extend 'x (code_gen (car asts) stack_env)) (tailcall cg_list (cdr asts) (cdr vars) (cons (car vars) stack_env) (lambda (code new_stack_env) (cont (list x code) new_stack_env))))) (defun range (from to) (extend 'l ()) (when (>= to from) (for (n to from) (setq l (cons n l)))) l) (defun cg_args (args stack_env) (tailcall cg_list args (range 1 (length args)) stack_env (lambda (code stack_env) code))) (defun access_var (var stack_env) (if (global_var_p var) (list " GLOBAL( " (position var global_vars) " ) /* " (getfield var VID) " */" ) (if (fixnump (position var stack_env)) (list " LOCAL( " (- (- (length stack_env) (position var stack_env)) 1) " ) /* " (getfield var CID) " */ ") (die "Variable not found: " (getfield var VID) ":" (getfield var CID))))) ; Table mapping symbols to intrinsics. (setq intrinsics (table)) (foreach (lambda (i) (hash intrinsics (car i) (cadr i))) '( ; Fixnums (< " LT;") (> " GT;") (>= " GTE;") (<= " LTE;") (= " EQ;") (+ " ADD;") (- " SUB;") (/ " DIV;") (* " MUL;") (% " MOD;") (numberp " NUMBERP;") (eq " EQ;") (abs " ABS;") ; Records (record " MAKE_RECORD;") (getfield " GETFIELD;") (setfield " SETFIELD;") ; System (display " DISPLAY;") (display_error " DISPLAY_ERROR;") (newline " NEWLINE;") (newline_error " NEWLINE_ERROR;") (halt " HALT;") (exit " EXIT;") (fork " FORK;") (exec " EXEC;") (exec_stack " EXEC_STACK;") (system " SYSTEM;") (setenv " SETENV;") (getenv " GETENV;") (getline " GETLINE;") (readchars " READCHARS;") (date " DATE;") (time " TIME;") (random " RANDOM;") ; Files (basename " BASENAME;") (dirname " DIRNAME;") (remove " REMOVE;") (rename " RENAME;") (symlink " SYMLINK;") (directory " DIRECTORY;") (stat " STAT;") (redirect " REDIRECT;") (pipe " PIPE;") (resume " RESUME;") ; Command-line args (current " CURRENT;") (next " NEXT;") (previous " PREVIOUS;") (rewind " REWIND;") ; Regexps (regexp " REGEXP;") (regcomp " REG_COMP;") (match " REG_MATCH;") (matches " REG_MATCHES;") (substitute " REG_SUBST;") ; Tables (tablep " TABLEP;") (table " TABLE;") (associate " ASSOCIATE;") (dissociate " DISSOCIATE;") (lookup " LOOKUP;") (keys " KEYS;") (values " VALUES;") ; Stacks (stackp " STACKP;") (index " INDEX;") (used " USED;") (push " STACK_PUSH;") (pop " STACK_POP;") (unshift " UNSHIFT;") (shift " SHIFT;") (store " STORE;") (stack " STACK;") (topidx " TOPIDX;") (sort_strings " SORT_STRINGS;") (sort_numbers " SORT_NUMBERS;") (clear " CLEAR;") (substack " SUBSTACK;") (append " APPEND;") ; Strings (stringp " STRINGP;") (substring " SUBSTRING;") (concat " CONCAT;") (explode " EXPLODE;") (digitize " DIGITIZE;") (stringify " STRINGIFY;") (code " CODE;") (chomp " CHOMP;") (chop " CHOP;") (char " CHAR;") (length " LENGTH;") (join " JOIN;") (join_stack " JOIN_STACK;") (strcmp " STRCMP;") (expand_tabs " EXPAND_TABS;") (split " SPLIT;"))) (setq str_rx (regcomp "\"")) (setq escape_rx (regcomp "\\\\")) (defun code_gen (ast stack_env) (extend 'type (getfield ast TYPE)) (cond ((eq type NUMBER) (list " PUSH( MAKE_NUMBER( " (getfield ast VALUE) " ) );" (char 10))) ((eq type STRING) (list " PUSH( MAKE_STRING( \"" (substitute str_rx "\\\\\"" (substitute escape_rx "\\\\\\\\" (getfield ast VALUE) 0) 0) "\", " (length (getfield ast VALUE)) " ) );" (char 10))) ((eq type REFERENCE) (list " PUSH(" (access_var (getfield ast VAR) stack_env) " );" (char 10))) ((eq type SETQ) (list (code_gen (car (getfield ast SUBEXPR)) stack_env) (access_var (getfield ast VAR) stack_env) " = TOS;" (char 10))) ((eq type IF) (extend 'x (mapcar (lambda (c) (code_gen c stack_env)) (getfield ast SUBEXPR))) (list (car x) " BEGIN_IF" (char 10) (cadr x) (if (cddr x) (list " ELSE" (char 10) (car (cddr x))) "") " END_IF" (char 10))) ; Only used if call_cc is included in the program. No ; sequences will remain after CPS conversion of the rest of the ; program. Since call_cc contains only the invocation of a ; continuation, we do not need to include a DROP instruction to ; discard the results of evaluating preceding items in the ; sequence. ((eq type SEQUENCE) (mapcar (lambda (ast) (code_gen ast stack_env)) (getfield ast SUBEXPR))) ((eq type PRIMITIVE) (extend 'op (getfield ast OPERATION)) (extend 'args (getfield ast SUBEXPR)) ; The values of the free variables a closure accesses are ; copied into a heap-allocated closure record, along with ; its function pointer, every time the closure is evaluated. ; A pointer to this record is pushed onto the stack to ; represent the evaluated closure. The first item in the ; closure record is the function pointer. (cond ((eq op 'CLOSURE) (list (cg_args (cdr args) stack_env) " MAKE_CLOSURE( " (add_lambda (car args)) ", " (length (cdr args)) " );" (char 10))) ; The first element of the CLOSURE_REF will be a ; function pointer to the closure record associated ; with this closure, while the second argument will ; be an offset into to the closure record of the ; desired item. ((eq op 'CLOSURE_REF) (list (code_gen (car args) stack_env) " TOS = CLOSURE_REF( " (getfield (cadr args) VALUE) " );" (char 10))) ; The intrinsics. (1 (list (cg_args args stack_env) (unless (lookup intrinsics op) (die "Error: unknown intrinsic: " op) ) (char 10))))) ((eq type APPLICATION) (extend 'args (cdr (getfield ast SUBEXPR))) (extend 'fn (car (getfield ast SUBEXPR))) (if (eq (getfield fn TYPE) LAMBDA) ; Since lambda-expressions in function position cannot be ; called in other contexts, we compile them inline. (tailcall cg_list args (getfield fn PARAMS) stack_env (lambda (thecode new_stack_env) (list thecode (code_gen (car (getfield fn SUBEXPR)) new_stack_env)))) ; Otherwise we must jump to the new function. We don't generate ; code here for fn, because that expression will also be the first ; (self) argument to the function, and we don't want to execute ; that expression twice. (extend 'n (length args)) (cg_list args (range 1 n) stack_env (lambda (thecode new_stack_env) (list thecode " RESET_STACK;" (char 10) (mapcar (lambda (n) (list " PUSH( LOCAL( " (+ n (length stack_env)) " ) );" (char 10))) (range 0 (- n 1))) " JUMP;" (char 10)))))))) (defun print_code (thecode) (when thecode (if (pairp (car thecode)) (print_code (car thecode)) (print (car thecode))) (tailcall print_code (cdr thecode)))) (let ((x ()) (ast ()) (prototypes ()) (result ())) (defun generate_code (ast) (setq global_vars (free ast)) (add_lambda ast) (with_output_file "functions.c" (print "#include \"minimunger.h\"") (newline) (print "#include \"functions.h\"") (newline) (newline) (while to_do (setq x (caar to_do)) (setq ast (car (cdar to_do))) (setq to_do (cdr to_do)) (setq prototypes (cons (stringify "void FUNCTION_" x "();" (char 10)) prototypes)) (print_code (list "void FUNCTION_" x "()" (char 10) "{" (char 10) (code_gen (car (getfield ast SUBEXPR)) (reverse (getfield ast PARAMS))) "}" (char 10) (char 10))))) (with_output_file "functions.h" (print "int num_globals = " (length global_vars) ";") (newline) (newline) (mapcar print prototypes)))) ; --------------------------------------------------------------------------- ; ; Conversion from AST back to list structure. Not used by the compiler. ; Included for debugging purposes. ; ; --------------------------------------------------------------------------- (defun source (ast) (extend 'type (getfield ast TYPE)) (cond ((eq type STRING) (getfield ast VALUE)) ((eq type NUMBER) (getfield ast VALUE)) ((eq type REFERENCE) (getfield (getfield ast VAR) CID)) ((eq type SETQ) (list 'setq (getfield (getfield ast VAR) CID) (source (car (getfield ast SUBEXPR))))) ((eq type IF) (cons 'if (mapcar source (getfield ast SUBEXPR)))) ((eq type PRIMITIVE) (cons (getfield ast OPERATION) (mapcar source (getfield ast SUBEXPR)))) ((eq type APPLICATION) (if (not (eq LAMBDA (getfield (car (getfield ast SUBEXPR)) TYPE))) (mapcar source (getfield ast SUBEXPR)) (extend 'func (car (getfield ast SUBEXPR))) (extend 'params (mapcar (lambda (n) (getfield n CID)) (getfield func PARAMS))) (extend 'args (cdr (getfield ast SUBEXPR))) (extend 'init ()) (while params (setq init (append init (list (list (car params) (source (car args)))))) (setq params (cdr params)) (setq args (cdr args))) (list 'let init (source (car (getfield (car (getfield ast SUBEXPR)) SUBEXPR)))))) ((eq type LAMBDA) (list 'lambda (mapcar (lambda (n) (getfield n CID)) (getfield ast PARAMS)) (source (car (getfield ast SUBEXPR))))) ((eq type SEQUENCE) (cons 'progn (mapcar source (getfield ast SUBEXPR)))))) ; ------------------------------------------------------------------------------ ; ; Toplevel code. ; ; ------------------------------------------------------------------------------ (next) (unless (next) (die "Error: filename argument missing.")) (when (eq (current) "__VERSION__") (println mm_version) (exit 0)) (unless (eq 1 (exists (current))) (die "Error: " (current) " does not exist or is not regular file.")) (print "Compilation of " (basename (current)) " started " (date)) (newline) (print "Creating AST...") (newline) (setq c (parse_file)) (print "Finished.") (newline) (print "CPS Conversion in progress...") (newline) (setq c (cps_convert c)) (print "Finished.") (newline) (print "Closure Conversion in progress...") (newline) (setq c (closure_convert c)) (print "Finished.") (newline) (print "Code Generation in progress...") (newline) (generate_code c) (print "Finished.") (newline) (print "Compilation finished " (date)) (newline) (quit)