From dc23e23b92dd99335db32678b4770bc6c334f78c Mon Sep 17 00:00:00 2001 From: sjs Date: Thu, 21 May 2009 13:39:20 -0700 Subject: [PATCH] [NEW] Boolean expressions. Print statement. --- Makefile | 16 ++- compiler.rb | 311 +++++++++++++++++++++++++++++++++++++++++++----- test.code | 21 ++-- test.rb | 2 +- test_lt.asm | 54 +++++++++ test_lt.code | 3 + test_print.asm | 199 +++++++++++++++++++++++++++++++ test_print.code | 8 ++ 8 files changed, 575 insertions(+), 39 deletions(-) create mode 100644 test_lt.asm create mode 100644 test_lt.code create mode 100644 test_print.asm create mode 100644 test_print.code diff --git a/Makefile b/Makefile index 37fc257..5b546d0 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,19 @@ +lt: test.rb test_lt.code + ruby test.rb test_lt.code > test_lt.asm + nasm -f elf -g -o test_lt.o test_lt.asm + ld -o test_lt test_lt.o +# $? indicates success as per unix convention + ./test_lt + +print: test.rb test_print.code + ruby test.rb test_print.code > test_print.asm + nasm -f elf -g -o test_print.o test_print.asm + ld -o test_print test_print.o +# $? indicates success as per unix convention + ./test_print + build: test.rb test.code - ruby test.rb test.code + ruby test.rb test.code > test.asm nasm -f elf -g -o test.o test.asm ld -o test test.o # $? indicates success as per unix convention diff --git a/compiler.rb b/compiler.rb index f6b0739..19925a2 100644 --- a/compiler.rb +++ b/compiler.rb @@ -29,8 +29,9 @@ class Compiler # reserved words (... constant?) # - # if, else, end, while, until, repeat, for, do, break - @keywords = %w[i l e w u r f d b] + # if, else, end, while, until, repeat, do, for, break, true, false, print, + # not, and, or, add, subtract, multiply, divide, xor, bool tests + @keywords = %w[i l e w u r f d b t f p ! & | + - * / ^ = < > #] # seed the lexer get_char @@ -63,12 +64,12 @@ class Compiler def factor if @look == '(' match('(') - expression + boolean_expression match(')') elsif alpha?(@look) identifier # or call elsif digit?(@look) - x86_mov(:eax, get_num) + x86_mov(:eax, get_number) else expected(:'integer, identifier, function call, or parenthesized expression') end @@ -76,15 +77,10 @@ class Compiler # Parse a signed factor. def signed_factor - unary_op = if @look == '-' - match('-') - :neg - elsif @look == '+' - match('+') - :pos - end + sign = @look + match(sign) if sign == '-' || sign == '+' factor - x86_neg(:eax) if unary_op == :neg + x86_neg(:eax) if sign == '-' end # Parse and translate a single term (factor or mulop). Result is in @@ -172,12 +168,162 @@ class Compiler end + ####################### + # boolean expressions # + ####################### + + def boolean_expression + boolean_term + + while orop? + x86_push(:eax) + case @look + when '|': or_expr + when '^': xor_expr + end + x86_add(:esp, 4) + end + end + + def or_expr + match('|') + boolean_term + x86_or(:eax, '[esp]') + end + + def xor_expr + match('^') + boolean_term + x86_xor(:eax, '[esp]') + end + + def boolean_term + not_factor + + while andop? + x86_push(:eax) + # and_expr + match('&') + not_factor + x86_and(:eax, '[esp]') + x86_add(:esp, 4) + end + end + + def boolean_factor + if boolean?(@look) + if get_boolean + x86_mov(:eax, -1) + else + x86_xor(:eax, :eax) + end + else + relation + end + end + + def not_factor + if @look == '!' + match('!') + boolean_factor + make_boolean(:eax) # ensure it is -1 or 0... + x86_not(:eax) # so that not is also boolean not + else + boolean_factor + end + end + + # Convert any identifier to a boolean (-1 or 0). This is + # semantically equivalent to !!reg in C or Ruby. + def make_boolean(reg=:eax) + end_label = unique_label(:endmakebool) + x86_cmp(reg, 0) # if false do nothing + x86_jz(end_label) + x86_mov(reg, -1) # truthy, make it true + emit_label(end_label) + end + + def get_boolean + expected(:boolean) unless boolean?(@look) + value = @look == 't' + get_char + value + end + + def relation + expression + if relop? + x86_push(:eax) + case @look + when '=': eq_relation + when '#': neq_relation + when '>': gt_relation + when '<': lt_relation + # TODO ge, le (needs real tokens) + end + end + end + + def eq_relation + match('=') + expression + x86_pop(:ebx) + x86_sub(:eax, :ebx) + make_boolean + x86_not(:eax) + end + + def neq_relation + match('#') + expression + x86_pop(:ebx) + x86_sub(:eax, :ebx) + make_boolean + end + + def gt_relation + match('>') + gt_label = unique_label(:gt) + end_label = unique_label(:endgt) + expression + x86_pop(:ebx) + x86_cmp(:eax, :ebx) # b - a < 0 if a > b + x86_jl(gt_label) + x86_xor(:eax, :eax) + x86_jmp(end_label) + emit_label(gt_label) + x86_xor(:eax, :eax) + x86_not(:eax) + emit_label(end_label) + end + + def lt_relation + match('<') + lt_label = unique_label(:lt) + end_label = unique_label(:endlt) + expression + x86_pop(:ebx) + x86_cmp(:ebx, :eax) # a - b < 0 if a < b + x86_jl(lt_label) + x86_xor(:eax, :eax) + x86_jmp(end_label) + emit_label(lt_label) + x86_xor(:eax, :eax) + x86_not(:eax) + emit_label(end_label) + end + + + ###################################### + # statements and controls structures # + ###################################### + # Parse an assignment statement. Value is in eax. def assignment name = get_name match('=') - expression - var(name) + boolean_expression + defvar(name) unless var?(name) x86_mov("dword [#{name}]", :eax) end @@ -199,6 +345,9 @@ class Compiler do_stmt when 'b' break_stmt(label) + when 'p' + print_stmt + newline else assignment newline @@ -210,10 +359,10 @@ class Compiler # Parse an if-else statement. def if_else_stmt(label) match('i') - condition - skip_any_whitespace else_label = unique_label(:end_or_else) end_label = else_label # only generated if else clause present + condition + skip_any_whitespace x86_jz(else_label) block(label) if @look == 'l' @@ -278,12 +427,12 @@ class Compiler end_label = unique_label(:endfor) counter = "[#{get_name}]" match('=') - expression # initial value + boolean_expression # initial value x86_sub(:eax, 1) # pre-decrement because of the # following pre-increment x86_mov(counter, :eax) # stash the counter in memory - match('>'); match('>') - expression # final value + match('.'); match('.') + boolean_expression # final value skip_any_whitespace x86_push(:eax) # stash final value on stack final = '[esp]' @@ -307,7 +456,7 @@ class Compiler match('d') start_label = unique_label(:do) end_label = unique_label(:enddo) - expression + boolean_expression skip_any_whitespace x86_mov(:ecx, :eax) x86_push(:ecx) @@ -335,11 +484,54 @@ class Compiler # Evaluates any expression for now. There are no boolean operators. def condition - expression - x86_cmp(:eax, 0) # 0 is false, anything else is true + boolean_expression skip_whitespace + x86_cmp(:eax, 0) # 0 is false, anything else is true end + def print_stmt + match('p') + # define a lookup table of digits + unless var?('DIGITS') + defvar('DIGITS', 4) + x86_mov('dword [DIGITS]', 0x33323130) + x86_mov('dword [DIGITS+4]', 0x37363534) + x86_mov('dword [DIGITS+8]', 0x62613938) + x86_mov('dword [DIGITS+12]', 0x66656463) + end + # 3 dwords == 12 chars + defvar('HEX', 3) unless var?('HEX') + # TODO check sign and prepend '-' if negative + x86_mov('word [HEX]', 0x7830) # "0x" == [48, 120] + x86_mov('word [HEX+10]', 0xa) # newline + null terminator + boolean_expression + # convert eax to a hex string + x86_lea(:esi, '[DIGITS]') + x86_lea(:edi, '[HEX+9]') + # build the string backwards (right to left), byte by byte + x86_mov(:ecx, 4) + emit_label(loop_label=unique_label) + # low nybble of nth byte + x86_movzx(:ebx, :al) + x86_and(:bl, 0x0f) # isolate low nybble + x86_movzx(:edx, 'byte [esi+ebx]') + x86_mov('byte [edi]', :dl) + x86_dec(:edi) + # high nybble of nth byte + x86_movzx(:ebx, :al) + x86_and(:bl, 0xf0) # isolate high nybble + x86_shr(:bl, 4) + x86_mov(:dl, 'byte [esi+ebx]') + x86_mov('byte [edi]', :dl) + x86_dec(:edi) + x86_shr(:eax, 8) + x86_loop(loop_label) + x86_mov(:eax, 4) # SYS_write + x86_mov(:ebx, 1) # STDOUT + x86_lea(:ecx, '[HEX]') + x86_mov(:edx, 11) # excluding term, max # of chars to print + x86_int(0x80) + end ############ @@ -359,6 +551,17 @@ class Compiler @look == '*' || @look == '/' end + def relop? + @look == '=' || @look == '#' || @look == '<' || @look == '>' + end + + def orop? + @look == '|' || @look == '^' + end + + def andop? + @look == '&' + end # Read the next character from the input stream. def get_char @@ -403,6 +606,10 @@ class Compiler alpha?(char) || digit?(char) end + def boolean?(char) + char == 't' || char == 'f' + end + def whitespace?(char) char == ' ' || char == "\t" end @@ -451,7 +658,7 @@ class Compiler end # Parse a number. - def get_num + def get_number expected(:integer) unless digit?(@look) many(method(:digit?)) end @@ -473,15 +680,23 @@ class Compiler end # Define a variable with the given name and size (in dwords). - def var(name, dwords=1) - unless @vars[name] + def defvar(name, dwords=1) + unless var?(name) @bss << "#{name}: resd #{dwords}\n" @vars[name] = name - # else - # raise ParseError, "identifier #{name} redefined" + else + STDERR.puts "[warning] attempted to redefine #{name}" end end + def var?(name) + @vars[name] + end + + def var(name) + @vars[name] + end + # Emit a line of code wrapped between a tab and a newline. def emit(code, options={}) tab = options.has_key?(:tab) ? options[:tab] : "\t" @@ -506,7 +721,11 @@ class Compiler # Some asm methods for convenience and arity checks. def x86_mov(dest, src) - emit("mov #{dest}, #{src}") + emit("mov #{dest}, #{src.is_a?(Numeric) ? "0x#{src.to_s(16)}" : src}") + end + + def x86_movzx(dest, src) + emit("movzx #{dest}, #{src}") end def x86_add(dest, src) @@ -529,10 +748,18 @@ class Compiler emit("inc #{op}") end + def x86_dec(op) + emit("dec #{op}") + end + def x86_push(reg) emit("push #{reg}") end + def x86_pop(reg) + emit("pop #{reg}") + end + def x86_call(label) emit("call #{label}") end @@ -541,10 +768,22 @@ class Compiler emit("neg #{reg}") end + def x86_not(rm32) + emit("not #{rm32}") + end + def x86_xchg(op1, op2) emit("xchg #{op1}, #{op2}") end + def x86_and(op1, op2) + emit("and #{op1}, #{op2}") + end + + def x86_or(op1, op2) + emit("or #{op1}, #{op2}") + end + def x86_xor(op1, op2) emit("xor #{op1}, #{op2}") end @@ -561,11 +800,27 @@ class Compiler emit("jmp #{label}") end + def x86_jl(label) + emit("jl #{label}") + end + def x86_cmp(a, b) emit("cmp #{a}, #{b}") end + def x86_lea(a, b) + emit("lea #{a}, #{b}") + end + + def x86_shr(a, b) + emit("shr #{a}, #{b}") + end + def x86_loop(label) emit("loop #{label}") end + + def x86_int(num) + emit("int 0x#{num.to_s(16)}") + end end diff --git a/test.code b/test.code index 57b94da..966f05f 100644 --- a/test.code +++ b/test.code @@ -1,4 +1,4 @@ -a=9 +a=1 aa=10 somethinglong=65536 x=5*(3-5) @@ -19,7 +19,7 @@ i1x=3 i1c=4 ee -i 1 +i 1 > 2 x=3 i 1 c=4 e @@ -28,25 +28,28 @@ l e w 0 - a = a + 1 + w a < 10 + a = a + 1 + e e -u 1 +u a = 0 a = a - 1 - i 1 + i t b e e +cc = c r - cc = c * 2 - i 1 + cc = cc * 2 + i cc = 32 b e e s=0 -f x = 1 >> 5 +f x = 1 .. 5 s = s + x e @@ -54,4 +57,4 @@ d 10 a = a * a e -xitcode=a-a +xitcode=(1 < 1) diff --git a/test.rb b/test.rb index 2c539f1..5f36542 100644 --- a/test.rb +++ b/test.rb @@ -32,7 +32,7 @@ def main(arg) data, bss, code = *parse(input) template = File.read("template.asm") asm = interpolate(template, :data => data, :bss => bss, :code => code) - File.open("test.asm", "w") { |f| f.puts(asm) } + STDOUT.puts(asm) end main(ARGV[0].to_s) diff --git a/test_lt.asm b/test_lt.asm new file mode 100644 index 0000000..caf56a5 --- /dev/null +++ b/test_lt.asm @@ -0,0 +1,54 @@ +GLOBAL _start +SECTION .data + +SECTION .bss +z: resd 1 +x: resd 1 +y: resd 1 + +SECTION .text +_start: + mov eax, 1 + push eax + mov eax, 1 + pop ebx + cmp ebx, eax + jl L000001_lt_1 + xor eax, eax + jmp L000002_endlt_1 +L000001_lt_1: + xor eax, eax + not eax +L000002_endlt_1: + mov dword [z], eax + mov eax, 1 + push eax + mov eax, 3 + pop ebx + cmp ebx, eax + jl L000003_lt_2 + xor eax, eax + jmp L000004_endlt_2 +L000003_lt_2: + xor eax, eax + not eax +L000004_endlt_2: + mov dword [x], eax + mov eax, 3 + push eax + mov eax, 1 + pop ebx + cmp ebx, eax + jl L000005_lt_3 + xor eax, eax + jmp L000006_endlt_3 +L000005_lt_3: + xor eax, eax + not eax +L000006_endlt_3: + mov dword [y], eax + + ;; The result in eax is the exit code, move it to ebx. + mov ebx, eax + mov eax, 1 ; _exit syscall + int 0x80 ; call Linux diff --git a/test_lt.code b/test_lt.code new file mode 100644 index 0000000..7220ff6 --- /dev/null +++ b/test_lt.code @@ -0,0 +1,3 @@ +x = 1 < 3 +y = 3 < 1 +z = 1 < 1 diff --git a/test_print.asm b/test_print.asm new file mode 100644 index 0000000..2fb5a0a --- /dev/null +++ b/test_print.asm @@ -0,0 +1,199 @@ +GLOBAL _start +SECTION .data + +SECTION .bss +DIGITS: resd 4 +HEX: resd 3 +xitcode: resd 1 + +SECTION .text +_start: + mov dword [DIGITS], 0x33323130 + mov dword [DIGITS+4], 0x37363534 + mov dword [DIGITS+8], 0x62613938 + mov dword [DIGITS+12], 0x66656463 + mov word [HEX], 0x7830 + mov word [HEX+10], 0xa + mov eax, 0 + lea esi, [DIGITS] + lea edi, [HEX+9] + mov ecx, 0x4 +L000001: + movzx ebx, al + and bl, 15 + movzx edx, byte [esi+ebx] + mov byte [edi], dl + dec edi + movzx ebx, al + and bl, 240 + shr bl, 4 + mov dl, byte [esi+ebx] + mov byte [edi], dl + dec edi + shr eax, 8 + loop L000001 + mov eax, 0x4 + mov ebx, 0x1 + lea ecx, [HEX] + mov edx, 0xb + int 0x80 + mov word [HEX], 0x7830 + mov word [HEX+10], 0xa + mov eax, 1 + lea esi, [DIGITS] + lea edi, [HEX+9] + mov ecx, 0x4 +L000002: + movzx ebx, al + and bl, 15 + movzx edx, byte [esi+ebx] + mov byte [edi], dl + dec edi + movzx ebx, al + and bl, 240 + shr bl, 4 + mov dl, byte [esi+ebx] + mov byte [edi], dl + dec edi + shr eax, 8 + loop L000002 + mov eax, 0x4 + mov ebx, 0x1 + lea ecx, [HEX] + mov edx, 0xb + int 0x80 + mov word [HEX], 0x7830 + mov word [HEX+10], 0xa + mov eax, 1 + neg eax + lea esi, [DIGITS] + lea edi, [HEX+9] + mov ecx, 0x4 +L000003: + movzx ebx, al + and bl, 15 + movzx edx, byte [esi+ebx] + mov byte [edi], dl + dec edi + movzx ebx, al + and bl, 240 + shr bl, 4 + mov dl, byte [esi+ebx] + mov byte [edi], dl + dec edi + shr eax, 8 + loop L000003 + mov eax, 0x4 + mov ebx, 0x1 + lea ecx, [HEX] + mov edx, 0xb + int 0x80 + mov word [HEX], 0x7830 + mov word [HEX+10], 0xa + mov eax, 123 + lea esi, [DIGITS] + lea edi, [HEX+9] + mov ecx, 0x4 +L000004: + movzx ebx, al + and bl, 15 + movzx edx, byte [esi+ebx] + mov byte [edi], dl + dec edi + movzx ebx, al + and bl, 240 + shr bl, 4 + mov dl, byte [esi+ebx] + mov byte [edi], dl + dec edi + shr eax, 8 + loop L000004 + mov eax, 0x4 + mov ebx, 0x1 + lea ecx, [HEX] + mov edx, 0xb + int 0x80 + mov word [HEX], 0x7830 + mov word [HEX+10], 0xa + mov eax, 123 + neg eax + lea esi, [DIGITS] + lea edi, [HEX+9] + mov ecx, 0x4 +L000005: + movzx ebx, al + and bl, 15 + movzx edx, byte [esi+ebx] + mov byte [edi], dl + dec edi + movzx ebx, al + and bl, 240 + shr bl, 4 + mov dl, byte [esi+ebx] + mov byte [edi], dl + dec edi + shr eax, 8 + loop L000005 + mov eax, 0x4 + mov ebx, 0x1 + lea ecx, [HEX] + mov edx, 0xb + int 0x80 + mov word [HEX], 0x7830 + mov word [HEX+10], 0xa + mov eax, 4096 + lea esi, [DIGITS] + lea edi, [HEX+9] + mov ecx, 0x4 +L000006: + movzx ebx, al + and bl, 15 + movzx edx, byte [esi+ebx] + mov byte [edi], dl + dec edi + movzx ebx, al + and bl, 240 + shr bl, 4 + mov dl, byte [esi+ebx] + mov byte [edi], dl + dec edi + shr eax, 8 + loop L000006 + mov eax, 0x4 + mov ebx, 0x1 + lea ecx, [HEX] + mov edx, 0xb + int 0x80 + mov word [HEX], 0x7830 + mov word [HEX+10], 0xa + mov eax, 4096 + neg eax + lea esi, [DIGITS] + lea edi, [HEX+9] + mov ecx, 0x4 +L000007: + movzx ebx, al + and bl, 15 + movzx edx, byte [esi+ebx] + mov byte [edi], dl + dec edi + movzx ebx, al + and bl, 240 + shr bl, 4 + mov dl, byte [esi+ebx] + mov byte [edi], dl + dec edi + shr eax, 8 + loop L000007 + mov eax, 0x4 + mov ebx, 0x1 + lea ecx, [HEX] + mov edx, 0xb + int 0x80 + mov eax, 0 + mov dword [xitcode], eax + + ;; The result in eax is the exit code, move it to ebx. + mov ebx, eax + mov eax, 1 ; _exit syscall + int 0x80 ; call Linux diff --git a/test_print.code b/test_print.code new file mode 100644 index 0000000..94503de --- /dev/null +++ b/test_print.code @@ -0,0 +1,8 @@ +p 0 +p 1 +p -1 +p 123 +p -123 +p 4096 +p -4096 +xitcode=0