From af95bd9dec673ef3a76d706cfc1e512973ad358b Mon Sep 17 00:00:00 2001 From: sjs Date: Wed, 13 May 2009 20:56:46 -0700 Subject: [PATCH] Initial commit of a compiler in Ruby. --- Makefile | 8 +++ compiler.rb | 166 +++++++++++++++++++++++++++++++++++++++++++++++++++ cradle.rb | 119 ++++++++++++++++++++++++++++++++++++ epilogue.asm | 3 + prologue.asm | 3 + test.rb | 39 ++++++++++++ 6 files changed, 338 insertions(+) create mode 100644 Makefile create mode 100644 compiler.rb create mode 100644 cradle.rb create mode 100644 epilogue.asm create mode 100644 prologue.asm create mode 100644 test.rb diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e97ce0e --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +build: +# ruby test.rb '5-5+3-2-1' + ruby test.rb '5*3-5*2+3-9/3-1*1-8/2' + nasm -f elf -g -o test.o test.asm + ld -o test test.o +# $? indicates success as per unix convention + ./test + diff --git a/compiler.rb b/compiler.rb new file mode 100644 index 0000000..390f4c6 --- /dev/null +++ b/compiler.rb @@ -0,0 +1,166 @@ +# A compiler as described by Jack Crenshaw in his famous book "Let's +# Build a Compiler". At least in the beginning, this code will +# closely reflect the Pascal code written by Jack. Over time it may +# become more idiomatic, however this is an academic exercise. +# +# sjs +# may 2009 + +class ParseError < StandardError; end + +class Compiler + def initialize(input=STDIN, output=STDOUT) + @look = '' # next lookahead char + @input = input # stream to read from + @output = output # stream to write to + + # seed the lexer + get_char + end + + # Read the next character from the input stream + def get_char + @look = @input.getc + @look = @look.chr if @look + end + + # Report error and halt + def abort(msg) + raise ParseError, msg + end + + # Report what was expected + def expected(what) + if eof? + raise ParseError, "Premature end of file, expected: #{what}." + else + raise ParseError, "Expected: #{what}, got: #{@look}." + end + end + + # Match a specific input character + def match(char) + if @look == char + get_char + else + expected("'#{char}'") + end + end + + # Recognize an alphabetical character + def is_alpha(char) + ('A'..'Z') === char.upcase + end + + # Recognize a decimal digit + def is_digit(char) + ('0'..'9') === char + end + + # Get an identifier + def get_name + expected('identifier') unless is_alpha(@look) + c = @look + get_char + return c + end + + # Get a number + def get_num + expected('integer') unless is_digit(@look) + c = @look + get_char + return c + end + + # Print a tab followed by a string and a newline + def emit(s) + @output.puts("\t#{s}") + end + + # Parse and translate a single factor. Result is in eax. + def factor + emit("mov eax, #{get_num}") + end + + # Parse and translate a single term. Result is in eax. + def term + factor # Result in eax. + while ['*', '/'].include?(@look) + # Stash the 1st factor on the stack. This is expected by + # multiply & divide. Because they leave their results in eax + # associativity works. Each interim result is pushed on the + # stack here. + emit("push eax") + + case @look + when '*': multiply + when '/': divide + else + expected('Multiplication or division operator (* or /)') + end + emit("add esp, 4") # Remove the 1st factor from the stack. + end + end + + # Parse and translate a mathematical expression of terms. Result is + # in eax. + def expression + term # Result is in eax. + + while ['+', '-'].include?(@look) + # Stash the 1st term on the stack. This is expected by add & + # subtract. Because they leave their results in eax + # associativity works. Each interim result is pushed on the + # stack here. + emit("push eax") + + case @look + when '+': add + when '-': subtract + else + expected('Addition or subtraction operator (+ or -)') + end + emit("add esp, 4") # Remove 1st term (a) from the stack. + end + end + + # Parse an addition operator and the 2nd term (b). The result is + # left in eax. The 1st term (a) is expected on the stack. + def add + match('+') + term # Result is in eax. + emit("add eax, [esp]") # Add a to b. + end + + # Parse a subtraction operator and the 2nd term (b). The result is + # left in eax. The 1st term (a) is expected on the stack. + def subtract + match('-') + term # Result is in eax. + emit("sub eax, [esp]") # Subtract a from b (this is backwards). + emit("neg eax") # Fix things up. -(b-a) == a-b + end + + # Parse an addition operator and the 2nd term (b). The result is + # left in eax. The 1st term (a) is expected on the stack. + def multiply + match('*') + factor # Result is in eax. + emit("imul dword [esp]") # Multiply a by b. + end + + # Parse a division operator and the divisor (b). The result is + # left in eax. The dividend (a) is expected on the stack. + def divide + match('/') + factor # Result is in eax. + emit("xchg eax, [esp]") # Swap the divisor and dividend into + # the correct places. + emit("idiv dword [esp]") # Divide a (eax) by b ([esp]). + end + + def eof? + @input.eof? && @look.nil? + end +end diff --git a/cradle.rb b/cradle.rb new file mode 100644 index 0000000..8c8dbb1 --- /dev/null +++ b/cradle.rb @@ -0,0 +1,119 @@ +# A compiler skeleton, or cradle, as described by Jack Crenshaw in his +# famous book "Let's Build a Compiler". At least in the beginning, +# this code will closely reflect the Pascal code written by Jack. +# Over time it may become more idiomatic, however this is an academic +# exercise. + +class ParseError < StandardError; end + +class Compiler + def initialize(input=STDIN) + @look = '' # next lookahead char + @input = input # stream to read from + + # seed the lexer + get_char + end + + # Read the next character from the input stream + def get_char + @look = @input.getc + @look = @look.chr if @look + end + + # Report error and halt + def abort(msg) + raise ParseError, msg + end + + # Report what was expected + def expected(what) + if eof? + raise ParseError, "Premature end of file, expected: #{what}." + else + raise ParseError, "Expected: #{what}, got: #{@look}." + end + end + + # Match a specific input character + def match(char) + if @look == char + get_char + else + expected("'#{char}'") + end + end + + # Recognize an alphabetical character + def is_alpha(char) + ('A'..'Z') === char.upcase + end + + # Recognize a decimal digit + def is_digit(char) + ('0'..'9') === char + end + + # Get an identifier + def get_name + expected('identifier') unless is_alpha(@look) + c = @look + get_char + return c + end + + # Get a number + def get_num + expected('integer') unless is_digit(@look) + c = @look + get_char + return c + end + + # Print a tab followed by a string and a newline + def emit(s) + puts "\t#{s}" + end + + # Parse and translate a single mathematical term. Result is in eax. + def term + emit("mov eax, #{get_num}") + end + + # Parse an addition operator and the 2nd term. The 1st term is + # expected in ebx, and is added to the 2nd term leaving the result + # in eax. + def add + match('+') + term # result in eax + emit("add eax, ebx") + end + + # Parse a subtraction operator and the 2nd term (b). The 1st term + # (a) is expected in ebx, and the b is subtracted from a + # leaving the result in eax. + def subtract + match('-') + term # result in eax (b) + emit("sub eax, ebx") # subtract a from b (this is backwards) + emit("neg eax") # fix things up. -(b-a) == a-b + end + + # Parse and translate a mathematical expression of terms. Result is + # in eax. + def expression + term # result is in eax + emit("mov ebx, eax") # move 1st term to ebx (expected by + # add & subtract) + case @look + when '+': add + when '-': subtract + else + expected('Addition or subtraction operator (+ or -)') + end + end + + def eof? + @input.eof? && @look.nil? + end +end diff --git a/epilogue.asm b/epilogue.asm new file mode 100644 index 0000000..15fa155 --- /dev/null +++ b/epilogue.asm @@ -0,0 +1,3 @@ + mov ebx, eax + mov eax, 1 + int 0x80 diff --git a/prologue.asm b/prologue.asm new file mode 100644 index 0000000..db8840a --- /dev/null +++ b/prologue.asm @@ -0,0 +1,3 @@ +GLOBAL _start +SECTION .text +_start: diff --git a/test.rb b/test.rb new file mode 100644 index 0000000..d361087 --- /dev/null +++ b/test.rb @@ -0,0 +1,39 @@ +require 'compiler' +require 'stringio' + +MaxRetries = 1 + +def error(msg) STDERR.puts(msg) end + +# Main program +def main + retries = 0 + input = StringIO.new(ARGV[0] || '5-5') + output = StringIO.new + parse = Compiler.new(input, output) + until parse.eof? + begin + parse.expression + rescue ParseError => e + error("[error] #{e.message}") + if retries < MaxRetries + retries += 1 + error("Skipping token...") + parse.get_char + retry + else + error("Aborting!") + break + end + end + end + output.string +end + +code = main +File.open("test.asm", "w") do |f| + f.puts(File.readlines("prologue.asm")) + f.puts(code) + f.puts(File.readlines("epilogue.asm")) +end +