From bd0408fa2ff9bee4403f146045f70650702c7bc6 Mon Sep 17 00:00:00 2001 From: sjs Date: Thu, 14 May 2009 01:12:48 -0700 Subject: [PATCH] [NEW] ignore most whitespace. Fixed division (use cdq before idiv). --- compiler.rb | 122 ++++++++++++++++++++++++++++++++-------------------- test.code | 14 ++++-- 2 files changed, 86 insertions(+), 50 deletions(-) diff --git a/compiler.rb b/compiler.rb index b64c38e..14ed9bb 100644 --- a/compiler.rb +++ b/compiler.rb @@ -17,18 +17,19 @@ class Compiler @data = '' # data section @bss = '' # bss section @code = '' # code section + @vars = {} # seed the lexer get_char + skip_whitespace end def parse - statement - expression; newline + statement until eof? [@data, @bss, @code] end - # Read the next character from the input stream + # Read the next character from the input stream. def get_char @look = if @input.eof? nil @@ -51,14 +52,7 @@ class Compiler end end - # Match a specific input character - def match(char) - if @look == char - get_char - else - expected("'#{char}'") - end - end + # Recognize an alphabetical character. def alpha?(char) @@ -75,28 +69,48 @@ class Compiler alpha?(char) || digit?(char) end - # Get an identifier. - def get_name - expected('identifier') unless alpha?(@look) + def whitespace?(char) + char == ' ' || char == '\t' + end + + + # Match a specific input character. + def match(char) + expected("'#{char}'") unless @look == char + get_char + skip_whitespace + end + + # Parse zero or more consecutive characters for which the test is + # true. + def many(test) token = '' - while alnum?(@look) + while test.call(@look) token << @look get_char end + skip_whitespace token end + # Get an identifier. + def get_name + expected('identifier') unless alpha?(@look) + many(method(:alnum?)) + end + # Get a number. def get_num expected('integer') unless digit?(@look) - value = '' - while digit?(@look) - value << @look - get_char - end - value + many(method(:digit?)) end + # Skip all leading whitespace. + def skip_whitespace + get_char while whitespace?(@look) + end + + # Define a constant in the .data section. def equ(name, value) @data << "#{name}\tequ #{value}" @@ -104,7 +118,12 @@ class Compiler # Define a variable with the given name and size (in dwords). def var(name, dwords=1) - @bss << "#{name}: resd #{dwords}\n" + unless @vars[name] + @bss << "#{name}: resd #{dwords}\n" + @vars[name] = name + # else + # raise ParseError, "identifier #{name} redefined" + end end # Emit a line of code wrapped between a tab and a newline. @@ -112,6 +131,8 @@ class Compiler @code << "\t#{s}\n" end + + # Parse and translate an identifier or function call. def identifier name = get_name @@ -120,10 +141,10 @@ class Compiler # function call match('(') match(')') - call(name) + x86_call(name) else # variable access - mov("eax", "dword [#{name}]") + x86_mov(:eax, "dword [#{name}]") end end @@ -137,7 +158,7 @@ class Compiler when alpha?(@look) identifier when digit?(@look) - mov("eax", get_num) + x86_mov(:eax, get_num) else expected("a number, identifier, or an expression wrapped in parens") end @@ -152,7 +173,7 @@ class Compiler # multiply & divide. Because they leave their results in eax # associativity works. Each interim result is pushed on the # stack here. - push("eax") + x86_push(:eax) if @look == '*' multiply @@ -160,7 +181,7 @@ class Compiler divide end - add("esp", 4) # Remove the 1st factor from the stack. + x86_add(:esp, 4) # Remove the 1st factor from the stack. end end @@ -170,7 +191,7 @@ class Compiler if addop? # Clear eax simulating a zero before unary plus and minus # operations. - xor("eax", "eax") + x86_xor(:eax, :eax) else term # Result is in eax. end @@ -180,7 +201,7 @@ class Compiler # subtract. Because they leave their results in eax # associativity works. Each interim result is pushed on the # stack here. - push("eax") + x86_push(:eax) if @look == '+' add @@ -188,7 +209,7 @@ class Compiler subtract end - add("esp", 4) # Remove 1st term (a) from the stack. + x86_add(:esp, 4) # Remove 1st term (a) from the stack. end end @@ -198,7 +219,7 @@ class Compiler match('=') expression var(name) - mov("dword [#{name}]", "eax") + x86_mov("dword [#{name}]", :eax) end # Parse one or more newlines. @@ -222,7 +243,7 @@ class Compiler def add match('+') term # Result is in eax. - add('eax', '[esp]') # Add a to b. + x86_add(:eax, '[esp]') # Add a to b. end # Parse a subtraction operator and the 2nd term (b). The result is @@ -230,8 +251,8 @@ class Compiler def subtract match('-') term # Result is in eax. - sub('eax', '[esp]') # Subtract a from b (this is backwards). - neg('eax') # Fix things up. -(b-a) == a-b + x86_sub(:eax, '[esp]') # Subtract a from b (this is backwards). + x86_neg(:eax) # Fix things up. -(b-a) == a-b end # Parse an addition operator and the 2nd term (b). The result is @@ -239,7 +260,7 @@ class Compiler def multiply match('*') factor # Result is in eax. - imul('dword [esp]') # Multiply a by b. + x86_imul('dword [esp]') # Multiply a by b. end # Parse a division operator and the divisor (b). The result is @@ -247,9 +268,14 @@ class Compiler def divide match('/') factor # Result is in eax. - xchg('eax', '[esp]') # Swap the divisor and dividend into + x86_xchg(:eax, '[esp]') # Swap the divisor and dividend into # the correct places. - idiv('dword [esp]') # Divide a (eax) by b ([esp]). + + # idiv uses edx:eax as the dividend so we need to ensure that edx + # is correctly sign-extended w.r.t. eax. + emit('cdq') # Sign-extend eax into edx (Convert Double to + # Quad). + x86_idiv('dword [esp]') # Divide a (eax) by b ([esp]). end @@ -273,39 +299,43 @@ private # Some asm methods for convenience and arity checks. - def mov(dest, src) + def x86_mov(dest, src) emit("mov #{dest}, #{src}") end - def add(dest, src) + def x86_add(dest, src) emit("add #{dest}, #{src}") end - def sub(dest, src) + def x86_sub(dest, src) emit("sub #{dest}, #{src}") end - def imul(op) + def x86_imul(op) emit("imul #{op}") end - def idiv(op) + def x86_idiv(op) emit("idiv #{op}") end - def push(reg) + def x86_push(reg) emit("push #{reg}") end - def call(label) + def x86_call(label) emit("call #{label}") end - def neg(reg) + def x86_neg(reg) emit("neg #{reg}") end - def xchg(op1, op2) + def x86_xchg(op1, op2) emit("xchg #{op1}, #{op2}") end + + def x86_xor(op1, op2) + emit("xor #{op1}, #{op2}") + end end diff --git a/test.code b/test.code index 55f0c67..2144e16 100644 --- a/test.code +++ b/test.code @@ -1,5 +1,11 @@ a=9 -5*(3-5)*2+2-9/3-8/2-4*(5+5+5) -a-1 -x()+1 --1 +aa=10 +somethinglong=65536 +b=5*(3-5) +c=a-1 +d=-1 +e=b*2+2 +f=e-27/9 +g=f-8/2 +h=g-4*(5+5+5) +i=h+85