From 268c6f6c297fc98a5154470131ecd9fc30359d07 Mon Sep 17 00:00:00 2001 From: sjs Date: Wed, 13 May 2009 23:20:51 -0700 Subject: [PATCH] [NEW] Parse assignment statements. Added template and test code. --- Makefile | 4 +- compiler.rb | 181 +++++++++++++++++++++++++++++++++++++++++---------- template.asm | 12 ++++ test.code | 5 ++ test.rb | 55 ++++++++-------- 5 files changed, 191 insertions(+), 66 deletions(-) create mode 100644 template.asm create mode 100644 test.code diff --git a/Makefile b/Makefile index 8dbcf65..386d3c9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,5 @@ build: -# ruby test.rb '5-5+3-2-1' -# ruby test.rb '5*3-5*2+3-9/3-1*1-8/2' - ruby test.rb '5*(3-5)*2+2-9/3-8/2-4*(5+5+5)' + ruby test.rb test.code nasm -f elf -g -o test.o test.asm ld -o test test.o # $? indicates success as per unix convention diff --git a/compiler.rb b/compiler.rb index ac02b3f..3953178 100644 --- a/compiler.rb +++ b/compiler.rb @@ -9,19 +9,31 @@ class ParseError < StandardError; end class Compiler - def initialize(input=STDIN, output=STDOUT) + attr_reader :data, :bss, :code + + def initialize(input=STDIN) @look = '' # next lookahead char @input = input # stream to read from - @output = output # stream to write to + @data = '' # data section + @bss = '' # bss section + @code = '' # code section # seed the lexer get_char end + def parse + statement until eof? + [@data, @bss, @code] + end + # Read the next character from the input stream def get_char - @look = @input.getc - @look = @look.chr if @look + @look = if @input.eof? + nil + else + @input.readbyte.chr + end end # Report error and halt @@ -34,7 +46,7 @@ class Compiler if eof? raise ParseError, "Premature end of file, expected: #{what}." else - raise ParseError, "Expected: #{what}, got: #{@look}." + raise ParseError, "Expected: #{what}, got: #{@look} (##{@look[0]})." end end @@ -48,18 +60,18 @@ class Compiler end # Recognize an alphabetical character - def is_alpha(char) + def alpha?(char) ('A'..'Z') === char.upcase end # Recognize a decimal digit - def is_digit(char) + def digit?(char) ('0'..'9') === char end # Get an identifier def get_name - expected('identifier') unless is_alpha(@look) + expected('identifier') unless alpha?(@look) c = @look get_char return c @@ -67,29 +79,60 @@ class Compiler # Get a number def get_num - expected('integer') unless is_digit(@look) + expected('integer') unless digit?(@look) c = @look get_char return c end - # Print a tab followed by a string and a newline + # Define a constant in the .data section. + def equ(name, value) + @data << "#{name}\tequ #{value}" + end + + # Define a variable with the given name and size (in dwords). + def var(name, dwords=1) + @bss << "#{name}: resd #{dwords}\n" + end + + # Emit a line of code wrapped between a tab and a newline. def emit(s) - @output.puts("\t#{s}") + @code << "\t#{s}\n" + end + + # Parse and translate an identifier or function call. + def identifier + name = get_name + + if @look == '(' + # function call + match('(') + match(')') + call(name) + else + # variable access + mov("eax", "dword [#{name}]") + end end # Parse and translate a single factor. Result is in eax. def factor - if @look == '(' + case + when @look == '(' match('(') expression match(')') + when alpha?(@look) + identifier + when digit?(@look) + mov("eax", get_num) else - emit("mov eax, #{get_num}") + expected("a number, identifier, or an expression wrapped in parens") end end - # Parse and translate a single term. Result is in eax. + # Parse and translate a single term (factor or mulop). Result is in + # eax. def term factor # Result in eax. while mulop? @@ -97,25 +140,25 @@ class Compiler # multiply & divide. Because they leave their results in eax # associativity works. Each interim result is pushed on the # stack here. - emit("push eax") + push("eax") - case @look - when '*': multiply - when '/': divide + if @look == '*' + multiply else - expected('Multiplication or division operator (* or /)') + divide end - emit("add esp, 4") # Remove the 1st factor from the stack. + + add("esp", 4) # Remove the 1st factor from the stack. end end - # Parse and translate a mathematical expression of terms. Result is + # Parse and translate a general expression of terms. Result is # in eax. def expression if addop? # Clear eax simulating a zero before unary plus and minus # operations. - emit("xor eax, eax") + xor("eax", "eax") else term # Result is in eax. end @@ -125,24 +168,49 @@ class Compiler # subtract. Because they leave their results in eax # associativity works. Each interim result is pushed on the # stack here. - emit("push eax") + push("eax") - case @look - when '+': add - when '-': subtract + if @look == '+' + add else - expected('Addition or subtraction operator (+ or -)') + subtract end - emit("add esp, 4") # Remove 1st term (a) from the stack. + + add("esp", 4) # Remove 1st term (a) from the stack. end end + # Parse an assignment statement. Value is in eax. + def assignment + name = get_name + match('=') + expression + var(name) + mov("dword [#{name}]", "eax") + end + + # Parse one or more newlines. + def newline + if @look == "\n" || @look == "\r" + get_char while @look == "\n" || @look == "\r" + else + expected('newline') + end + end + + # Parse an assignment expression followed by a newline. + def statement + assignment + newline + end + + # Parse an addition operator and the 2nd term (b). The result is # left in eax. The 1st term (a) is expected on the stack. def add match('+') term # Result is in eax. - emit("add eax, [esp]") # Add a to b. + add('eax', '[esp]') # Add a to b. end # Parse a subtraction operator and the 2nd term (b). The result is @@ -150,8 +218,8 @@ class Compiler def subtract match('-') term # Result is in eax. - emit("sub eax, [esp]") # Subtract a from b (this is backwards). - emit("neg eax") # Fix things up. -(b-a) == a-b + sub('eax', '[esp]') # Subtract a from b (this is backwards). + neg('eax') # Fix things up. -(b-a) == a-b end # Parse an addition operator and the 2nd term (b). The result is @@ -159,7 +227,7 @@ class Compiler def multiply match('*') factor # Result is in eax. - emit("imul dword [esp]") # Multiply a by b. + imul('dword [esp]') # Multiply a by b. end # Parse a division operator and the divisor (b). The result is @@ -167,11 +235,17 @@ class Compiler def divide match('/') factor # Result is in eax. - emit("xchg eax, [esp]") # Swap the divisor and dividend into + xchg('eax', '[esp]') # Swap the divisor and dividend into # the correct places. - emit("idiv dword [esp]") # Divide a (eax) by b ([esp]). + idiv('dword [esp]') # Divide a (eax) by b ([esp]). end + + +####### +private +####### + def eof? @input.eof? && @look.nil? end @@ -183,4 +257,43 @@ class Compiler def mulop? @look == '*' || @look == '/' end + + + # Some asm methods for convenience and arity checks. + + def mov(dest, src) + emit("mov #{dest}, #{src}") + end + + def add(dest, src) + emit("add #{dest}, #{src}") + end + + def sub(dest, src) + emit("sub #{dest}, #{src}") + end + + def imul(op) + emit("imul #{op}") + end + + def idiv(op) + emit("idiv #{op}") + end + + def push(reg) + emit("push #{reg}") + end + + def call(label) + emit("call #{label}") + end + + def neg(reg) + emit("neg #{reg}") + end + + def xchg(op1, op2) + emit("xchg #{op1}, #{op2}") + end end diff --git a/template.asm b/template.asm new file mode 100644 index 0000000..bd95f4b --- /dev/null +++ b/template.asm @@ -0,0 +1,12 @@ +GLOBAL _start +SECTION .data +{data} +SECTION .bss +{bss} +SECTION .text +_start: +{code} + ;; The result in eax is the exit code, move it to ebx. + mov ebx, eax + mov eax, 1 ; _exit syscall + int 0x80 ; call Linux diff --git a/test.code b/test.code new file mode 100644 index 0000000..55f0c67 --- /dev/null +++ b/test.code @@ -0,0 +1,5 @@ +a=9 +5*(3-5)*2+2-9/3-8/2-4*(5+5+5) +a-1 +x()+1 +-1 diff --git a/test.rb b/test.rb index d361087..43d59fa 100644 --- a/test.rb +++ b/test.rb @@ -1,39 +1,36 @@ require 'compiler' require 'stringio' -MaxRetries = 1 - def error(msg) STDERR.puts(msg) end -# Main program -def main - retries = 0 - input = StringIO.new(ARGV[0] || '5-5') - output = StringIO.new - parse = Compiler.new(input, output) - until parse.eof? - begin - parse.expression - rescue ParseError => e - error("[error] #{e.message}") - if retries < MaxRetries - retries += 1 - error("Skipping token...") - parse.get_char - retry - else - error("Aborting!") - break - end - end +def parse(input) + compiler = Compiler.new(input) + compiler.parse # tuple of [data, bss, code] + +rescue ParseError => e + error("[error] #{e.message}") + error("Aborting!") + exit(1) +end + +def interpolate(template, data) + data.inject(template) do |template, mapping| + token, replacement = *mapping + template.sub("{#{token}}", replacement) end - output.string end -code = main -File.open("test.asm", "w") do |f| - f.puts(File.readlines("prologue.asm")) - f.puts(code) - f.puts(File.readlines("epilogue.asm")) +def main(arg) + input = if File.readable?(arg) + File.open(arg) + else + # StringIO.new("5*(3-5)*2+2-9/3-8/2-4*(5+5+5)\n") + StringIO.new("a=9\n") + end + data, bss, code = *parse(input) + template = File.read("template.asm") + asm = interpolate(template, :data => data, :bss => bss, :code => code) + File.open("test.asm", "w") { |f| f.puts(asm) } end +main(ARGV[0].to_s)