Initial commit of a compiler in Ruby.

This commit is contained in:
sjs 2009-05-13 20:56:46 -07:00
commit af95bd9dec
6 changed files with 338 additions and 0 deletions

8
Makefile Normal file
View file

@ -0,0 +1,8 @@
build:
# ruby test.rb '5-5+3-2-1'
ruby test.rb '5*3-5*2+3-9/3-1*1-8/2'
nasm -f elf -g -o test.o test.asm
ld -o test test.o
# $? indicates success as per unix convention
./test

166
compiler.rb Normal file
View file

@ -0,0 +1,166 @@
# A compiler as described by Jack Crenshaw in his famous book "Let's
# Build a Compiler". At least in the beginning, this code will
# closely reflect the Pascal code written by Jack. Over time it may
# become more idiomatic, however this is an academic exercise.
#
# sjs
# may 2009
class ParseError < StandardError; end
class Compiler
def initialize(input=STDIN, output=STDOUT)
@look = '' # next lookahead char
@input = input # stream to read from
@output = output # stream to write to
# seed the lexer
get_char
end
# Read the next character from the input stream
def get_char
@look = @input.getc
@look = @look.chr if @look
end
# Report error and halt
def abort(msg)
raise ParseError, msg
end
# Report what was expected
def expected(what)
if eof?
raise ParseError, "Premature end of file, expected: #{what}."
else
raise ParseError, "Expected: #{what}, got: #{@look}."
end
end
# Match a specific input character
def match(char)
if @look == char
get_char
else
expected("'#{char}'")
end
end
# Recognize an alphabetical character
def is_alpha(char)
('A'..'Z') === char.upcase
end
# Recognize a decimal digit
def is_digit(char)
('0'..'9') === char
end
# Get an identifier
def get_name
expected('identifier') unless is_alpha(@look)
c = @look
get_char
return c
end
# Get a number
def get_num
expected('integer') unless is_digit(@look)
c = @look
get_char
return c
end
# Print a tab followed by a string and a newline
def emit(s)
@output.puts("\t#{s}")
end
# Parse and translate a single factor. Result is in eax.
def factor
emit("mov eax, #{get_num}")
end
# Parse and translate a single term. Result is in eax.
def term
factor # Result in eax.
while ['*', '/'].include?(@look)
# Stash the 1st factor on the stack. This is expected by
# multiply & divide. Because they leave their results in eax
# associativity works. Each interim result is pushed on the
# stack here.
emit("push eax")
case @look
when '*': multiply
when '/': divide
else
expected('Multiplication or division operator (* or /)')
end
emit("add esp, 4") # Remove the 1st factor from the stack.
end
end
# Parse and translate a mathematical expression of terms. Result is
# in eax.
def expression
term # Result is in eax.
while ['+', '-'].include?(@look)
# Stash the 1st term on the stack. This is expected by add &
# subtract. Because they leave their results in eax
# associativity works. Each interim result is pushed on the
# stack here.
emit("push eax")
case @look
when '+': add
when '-': subtract
else
expected('Addition or subtraction operator (+ or -)')
end
emit("add esp, 4") # Remove 1st term (a) from the stack.
end
end
# Parse an addition operator and the 2nd term (b). The result is
# left in eax. The 1st term (a) is expected on the stack.
def add
match('+')
term # Result is in eax.
emit("add eax, [esp]") # Add a to b.
end
# Parse a subtraction operator and the 2nd term (b). The result is
# left in eax. The 1st term (a) is expected on the stack.
def subtract
match('-')
term # Result is in eax.
emit("sub eax, [esp]") # Subtract a from b (this is backwards).
emit("neg eax") # Fix things up. -(b-a) == a-b
end
# Parse an addition operator and the 2nd term (b). The result is
# left in eax. The 1st term (a) is expected on the stack.
def multiply
match('*')
factor # Result is in eax.
emit("imul dword [esp]") # Multiply a by b.
end
# Parse a division operator and the divisor (b). The result is
# left in eax. The dividend (a) is expected on the stack.
def divide
match('/')
factor # Result is in eax.
emit("xchg eax, [esp]") # Swap the divisor and dividend into
# the correct places.
emit("idiv dword [esp]") # Divide a (eax) by b ([esp]).
end
def eof?
@input.eof? && @look.nil?
end
end

119
cradle.rb Normal file
View file

@ -0,0 +1,119 @@
# A compiler skeleton, or cradle, as described by Jack Crenshaw in his
# famous book "Let's Build a Compiler". At least in the beginning,
# this code will closely reflect the Pascal code written by Jack.
# Over time it may become more idiomatic, however this is an academic
# exercise.
class ParseError < StandardError; end
class Compiler
def initialize(input=STDIN)
@look = '' # next lookahead char
@input = input # stream to read from
# seed the lexer
get_char
end
# Read the next character from the input stream
def get_char
@look = @input.getc
@look = @look.chr if @look
end
# Report error and halt
def abort(msg)
raise ParseError, msg
end
# Report what was expected
def expected(what)
if eof?
raise ParseError, "Premature end of file, expected: #{what}."
else
raise ParseError, "Expected: #{what}, got: #{@look}."
end
end
# Match a specific input character
def match(char)
if @look == char
get_char
else
expected("'#{char}'")
end
end
# Recognize an alphabetical character
def is_alpha(char)
('A'..'Z') === char.upcase
end
# Recognize a decimal digit
def is_digit(char)
('0'..'9') === char
end
# Get an identifier
def get_name
expected('identifier') unless is_alpha(@look)
c = @look
get_char
return c
end
# Get a number
def get_num
expected('integer') unless is_digit(@look)
c = @look
get_char
return c
end
# Print a tab followed by a string and a newline
def emit(s)
puts "\t#{s}"
end
# Parse and translate a single mathematical term. Result is in eax.
def term
emit("mov eax, #{get_num}")
end
# Parse an addition operator and the 2nd term. The 1st term is
# expected in ebx, and is added to the 2nd term leaving the result
# in eax.
def add
match('+')
term # result in eax
emit("add eax, ebx")
end
# Parse a subtraction operator and the 2nd term (b). The 1st term
# (a) is expected in ebx, and the b is subtracted from a
# leaving the result in eax.
def subtract
match('-')
term # result in eax (b)
emit("sub eax, ebx") # subtract a from b (this is backwards)
emit("neg eax") # fix things up. -(b-a) == a-b
end
# Parse and translate a mathematical expression of terms. Result is
# in eax.
def expression
term # result is in eax
emit("mov ebx, eax") # move 1st term to ebx (expected by
# add & subtract)
case @look
when '+': add
when '-': subtract
else
expected('Addition or subtraction operator (+ or -)')
end
end
def eof?
@input.eof? && @look.nil?
end
end

3
epilogue.asm Normal file
View file

@ -0,0 +1,3 @@
mov ebx, eax
mov eax, 1
int 0x80

3
prologue.asm Normal file
View file

@ -0,0 +1,3 @@
GLOBAL _start
SECTION .text
_start:

39
test.rb Normal file
View file

@ -0,0 +1,39 @@
require 'compiler'
require 'stringio'
MaxRetries = 1
def error(msg) STDERR.puts(msg) end
# Main program
def main
retries = 0
input = StringIO.new(ARGV[0] || '5-5')
output = StringIO.new
parse = Compiler.new(input, output)
until parse.eof?
begin
parse.expression
rescue ParseError => e
error("[error] #{e.message}")
if retries < MaxRetries
retries += 1
error("Skipping token...")
parse.get_char
retry
else
error("Aborting!")
break
end
end
end
output.string
end
code = main
File.open("test.asm", "w") do |f|
f.puts(File.readlines("prologue.asm"))
f.puts(code)
f.puts(File.readlines("epilogue.asm"))
end