diff --git a/asm/asm.rb b/asm/asm.rb index 2a63511..4f4383d 100644 --- a/asm/asm.rb +++ b/asm/asm.rb @@ -6,4 +6,46 @@ # may 2009 module Assembler + + # Abstract class for common functionality between different code + # generators. Also defines somewhat of an interface that must be + # implemented to be useful. + class AssemblerBase + + def initialize(*args) + @vars = {} # Symbol table, maps names to locations in BSS. + @num_labels = 0 # Used to generate unique labels. + @num_labels_with_suffix = Hash.new(0) + + # Maps names to locations. + @labels = Hash.new {|h, key| raise "undefined label: #{key}"} + + end + + def block(*args, &blk) + instance_eval(&blk) + end + + def output + raise "#{self.class} is supposed to implement this method!" + end + + def var(name) + @vars[name] + end + alias_method :var?, :var + + # Generate a unique label. + def label(suffix=nil) + @num_labels += 1 + if suffix + @num_labels_with_suffix[suffix] += 1 + suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}" + end + name = "L#{sprintf "%06d", @num_labels}#{suffix}" + return name + end + + end + end diff --git a/asm/binary.rb b/asm/binary.rb index cfbdcae..154c72e 100644 --- a/asm/binary.rb +++ b/asm/binary.rb @@ -5,13 +5,18 @@ # sjs # may 2009 +ROOT = __FILE__.sub(/\/asm\/binary\.rb$/, '') unless defined? ROOT +$LOAD_PATH << ROOT unless $LOAD_PATH.include?(ROOT) + +require 'asm/asm' + module Assembler # Define a method named `emit_byte` and one named `binary_size` and # include this module. Calling the assembler methods will output # x86 machine code ... hopefully. So far it's incomplete and # binaries just segfault. - class Binary + class Binary < AssemblerBase # This structure allows for x86 registers of all sizes. The # number of the register is the index of the array in which it was @@ -44,7 +49,72 @@ module Assembler MaxUnsigned = 2**MachineBits - 1 SignedRange = MinSigned..MaxSigned - + X86_exit = { + 'linux' => [0x89, 0xc3, # mov ebx, eax (exit code) + 0xb8, 1, 0, 0, 0, # mov eax, 1 + 0xcd, 0x80 # int 0x80 + ].pack('c*'), + + 'darwin' => [0x50, # push eax (exit code) + 0xb8, 1, 0, 0, 0, # mov eax, 1 + 0xcd, 0x80 # int 0x80 + ].pack('c*') + } + + def initialize(platform='linux', binformat='elf') + super + @binary = [] # Byte array of machine code. + @platform = platform + @binformat = binformat + init_sections + end + + def init_sections + case @platform + + when 'linux' + raise "unsupported" unless @binformat == 'elf' + @header_size = 0x100 # ELF, Linux + @text_offset = 0x08048000 + @header_size # Offset of text section in memory + + when 'darwin' + raise "unsupported" unless @binformat == 'macho' + @header_size = 0x100 # Mach-O, Darwin + @text_offset = 0x08048000 + @header_size # Offset of text section in memory + else + raise "unsupported platform: #{platform}" + end + @text_size = 0x02be00 # Size of text section. + @data_offset = @text_offset + @text_size # Offset of data section. + @data_size = 0x4e00 # Size of data section. + @bss_offset = @data_offset + @data_size # Offset of bss section. + @bss_size = 0 # Size of bss section. + end + + def output + @binary.pack('c*') + X86_exit[@platform] + end + + # Define a constant in the .data section. + def const(name, value) + raise "unimplemented!" + end + + # Define a variable with the given name and size (in dwords). + def defvar(name, dwords=1) + unless var?(name) + @vars[name] = @bss_size + @bss_size += dwords + else + STDERR.puts "[warning] attempted to redefine #{name}" + end + end + + def label(suffix=nil) + name = super + @labels[name] = bytes_written + return name + end # Count the bytes that were encoded in the given block. def asm @@ -56,7 +126,18 @@ module Assembler # return the number of bytes written bytes_written - instruction_offset end + + def emit_byte(byte) + @binary << byte + end + def bytes_written + @binary.size + end + + def emit_label(name=label) + @labels[name] = @binary.length + end def emit_dword(num) num_to_quad(num).each {|byte| emit_byte(byte)} @@ -119,7 +200,7 @@ module Assembler end def regnum(op) - num = register? + num = register?(op) raise "not a register: #{op.inspect}" unless num num end @@ -146,7 +227,7 @@ module Assembler # 3. mov r/m32, reg32 (0x89, mod r/m, maybe sib) # 3a. mov memoffset32, eax (0xa3, disp32) # 4. mov r/m32, immediate32 (0xc7, mod r/m, maybe sib, imm32) - def x86_mov(dest, src) + def mov(dest, src) dest = dest[6..-1] if dest.is_a?(String) && dest[0..5] == 'dword ' src = src[6..-1] if src.is_a?(String) && src[0..5] == 'dword ' @@ -199,35 +280,35 @@ module Assembler end - def x86_add(dest, src) + def add(dest, src) end - def x86_sub(dest, src) + def sub(dest, src) end - def x86_imul(op) + def imul(op) end - def x86_idiv(op) + def idiv(op) end - def x86_inc(op) + def inc(op) asm do if register?(op) emit_byte(0x40 + regnum(op)) elsif rm32?(op) emit_byte(0xff) - emit_modrm(...) +# emit_modrm(...) else raise "unsupported op #{op}, wanted r32 or r/m32" end end end - def x86_push(reg) + def push(reg) end - def x86_cmp(a, b) + def cmp(a, b) end diff --git a/asm/text.rb b/asm/text.rb index ebb543d..cb152dc 100644 --- a/asm/text.rb +++ b/asm/text.rb @@ -3,123 +3,166 @@ # sjs # may 2009 +ROOT = __FILE__.sub(/\/asm\/text\.rb$/, '') unless defined? ROOT +$LOAD_PATH << ROOT unless $LOAD_PATH.include?(ROOT) + +require 'asm/asm' + module Assembler - # Define a method named `emit` and include this module. Calling - # the assembler methods will output nasm-friendly x86 asm code, - # line by line. This is dead easy and we can trust nasm to - # compile correct machine code, which is tricky. - module Text + # Assembler methods output nasm-friendly x86 asm code, line by + # line. This is dead easy and we can trust nasm to compile + # correct machine code, which isn't trivial. + class Text < AssemblerBase - def self.included(other) - im = other.instance_methods - unless im.include?(:emit) - raise "#{self.name} requires the including class define the emit method" + def initialize(platform='linux') + super + @data = '' + @bss = '' + @code = '' + @templatefile = "#{ROOT}/template.#{platform}.asm" + raise "unsupported platform: #{platform}" unless File.readable?(@templatefile) + end + + # Define a constant in the .data section. + def const(name, value) + @data << "#{name}\tequ #{value}" + end + + # Define a variable with the given name and size (in dwords). + def defvar(name, dwords=1) + unless var?(name) + @bss << "#{name}: resd #{dwords}\n" + @vars[name] = name + else + STDERR.puts "[warning] attempted to redefine #{name}" end end - - def x86_mov(dest, src) - emit("mov #{dest}, #{src.is_a?(Numeric) ? "0x#{src.to_s(16)}" : src}") + # Emit a line of code wrapped between a tab and a newline. + def emit(code, options={}) + tab = options.has_key?(:tab) ? options[:tab] : "\t" + @code << "#{tab}#{code}\n" end - def x86_movzx(dest, src) + def label(suffix=nil) + name = super + @labels[name] = name + return name + end + + def output + File.read(@templatefile). + sub("{data}", @data). + sub("{bss}", @bss). + sub("{code}", @code) + end + + def emit_label(name=label) + emit("#{name}:", :tab => nil) + end + + def mov(dest, src) + emit("mov #{dest}, #{src}#{src.is_a?(Numeric) ? " ; 0x#{src.to_s(16)}" : ''}") + end + + def movzx(dest, src) emit("movzx #{dest}, #{src}") end - def x86_add(dest, src) + def add(dest, src) emit("add #{dest}, #{src}") end - def x86_sub(dest, src) + def sub(dest, src) emit("sub #{dest}, #{src}") end - def x86_imul(op) + def imul(op) emit("imul #{op}") end - def x86_idiv(op) + def idiv(op) emit("idiv #{op}") end - def x86_inc(op) + def inc(op) emit("inc #{op}") end - def x86_dec(op) + def dec(op) emit("dec #{op}") end - def x86_push(reg) + def push(reg) emit("push #{reg}") end - def x86_pop(reg) + def pop(reg) emit("pop #{reg}") end - def x86_call(label) + def call(label) emit("call #{label}") end - def x86_neg(reg) + def neg(reg) emit("neg #{reg}") end - def x86_not(rm32) + def not(rm32) emit("not #{rm32}") end - def x86_xchg(op1, op2) + def xchg(op1, op2) emit("xchg #{op1}, #{op2}") end - def x86_and(op1, op2) + def and_(op1, op2) emit("and #{op1}, #{op2}") end - def x86_or(op1, op2) + def or(op1, op2) emit("or #{op1}, #{op2}") end - def x86_xor(op1, op2) + def xor(op1, op2) emit("xor #{op1}, #{op2}") end - def x86_jz(label) + def jz(label) emit("jz #{label}") end - def x86_jnz(label) + def jnz(label) emit("jnz #{label}") end - def x86_jmp(label) + def jmp(label) emit("jmp #{label}") end - def x86_jl(label) + def jl(label) emit("jl #{label}") end - def x86_cmp(a, b) + def cmp(a, b) emit("cmp #{a}, #{b}") end - def x86_lea(a, b) + def lea(a, b) emit("lea #{a}, #{b}") end - def x86_shr(a, b) + def shr(a, b) emit("shr #{a}, #{b}") end - def x86_loop(label) + def loop_(label) emit("loop #{label}") end - def x86_int(num) + def int(num) emit("int 0x#{num.to_s(16)}") end diff --git a/build.rb b/build.rb index 810a991..c928917 100755 --- a/build.rb +++ b/build.rb @@ -1,14 +1,8 @@ #!/usr/bin/env ruby -ROOT = __FILE__.sub(/\/build\.rb$/, '') unless defined? ROOT - require 'compiler' - - -X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code) - 0xb8, 1, 0, 0, 0, # mov eax, 1 - 0xcd, 0x80 # int 0x80 - ].pack('c*') +require 'asm/text' +require 'asm/binary' def main filename = ARGV[0].to_s @@ -23,39 +17,21 @@ def base(filename) filename.sub(/\.[^.]*$/, '') end -def interpolate(templatefile, data) - template = File.read(templatefile) - data.inject(template) do |template, mapping| - token, replacement = *mapping - template.sub("{#{token}}", replacement) - end -end # filename: input filename -# format: output format, nasm or binary +# asm: assembler to use # returns: output filename -def compile(filename, format='asm') +def compile(filename, asm, binformat='elf') - # compile to asm or binary - output = nil File.open(filename, 'r') do |input| - compiler = Compiler.new(input, format) - output = compiler.compile + compiler = Compiler.new(input, asm, binformat) + compiler.compile end - if format == 'asm' - mode = 'w' - data, bss, code = *output - output = interpolate("#{ROOT}/template.asm", - :data => data, :bss => bss, :code => code) - else - mode = 'wb' - output += X86_exit - end - outfile = "#{base(filename)}.#{format}" - File.open(outfile, mode) do |out| - if format == 'asm' - out.puts(output) - end + + ext = asm.class.name.split('::').last[0,3].downcase == 'bin' ? 'bin' : 'asm' + outfile = "#{base(filename)}.#{ext}" + File.open(outfile, 'wb') do |out| + out.puts(asm.output) end return outfile @@ -68,33 +44,47 @@ rescue ParseError => e end # assemble using nasm, return resulting filename. -def asm(filename) +def asm(filename, binformat='elf') f = base(filename) outfile = "#{f}.o" - output = `nasm -f elf -g -o #{outfile} #{filename}` + output = `nasm -f #{binformat} -g -o #{outfile} #{filename}` if $?.exitstatus != 0 - raise "nasm failed: #{$?.exitstatus}", output + puts output + raise "nasm failed: #{$?.exitstatus}" end return outfile end # link with ld, return resulting filename. -def link(filename) +def link(filename, platform='linux') f = base(filename) - output = `ld -o #{f} #{filename}` + cmd, args = *case platform + when 'darwin': ['gcc', '-arch i386'] + when 'linux': ['ld', ''] + else + raise "unsupported platform: #{platform}" + end + output = `#{cmd} #{args} -o #{f} #{filename}` if $?.exitstatus != 0 - raise "ld failed: #{$?.exitstatus}", output + puts output + raise "ld failed: #{$?.exitstatus}" end `chmod +x #{f}` return f end -def build(filename, format='asm') - if format == 'asm' - link( asm( compile(filename) ) ) - else # binary - link( compile(filename, format='bin') ) - end +# TODO Use a dependency injection framework for the assembler, and +# other parts as things become more modular. +def build(filename, platform='linux', format='asm', binformat='elf') + bin = if format == 'asm' + code = compile(filename, Assembler::Text.new(platform)) + obj = asm( code, binformat ) + link( obj, platform ) + else # binary + obj = compile(filename, Assembler::Binary.new(platform), binformat) + link( obj, platform ) + end + return bin end def run(filename) diff --git a/compiler.rb b/compiler.rb index 88cf403..272acd8 100644 --- a/compiler.rb +++ b/compiler.rb @@ -12,9 +12,6 @@ # require 'rubygems' # require 'unroller' -require 'asm' -require 'opcode' - class ParseError < StandardError attr_reader :caller, :context def initialize(caller, context=nil) @@ -24,43 +21,27 @@ class ParseError < StandardError end class Compiler - # This module uses our `emit_byte` method to output x86 machine code - # directly using the assembler library. - # include Assembler::Binary Keywords = %w[ if else end while until repeat for to do break print ] - attr_reader :data, :bss, :code + attr_reader :asm - def initialize(input, asm=Assembler::Text.new) + def initialize(input, asm, binformat='elf') # XXX for development only! @indent = 0 # for pretty printing + # The only binary format our assembler knows right now is ELF. + unless binformat == 'elf' + raise "Only ELF is supported. Unsupported binary format: #{binformat}." + end + @look = '' # Next lookahead char. @token = nil # Type of last read token. @value = nil # Value of last read token. @input = input # Stream to read from. - @data = '' # Data section. - @bss = '' # BSS section. - @code = '' # Code section. - @binary = [] # Byte array of machine code. - @vars = {} # Symbol table, maps names to locations in BSS. - @num_labels = 0 # Used to generate unique labels. - @num_labels_with_suffix = Hash.new(0) - - @header_size = 0x100 # ELF, Linux, x86 - @text_offset = 0x08048000 + @header_size # Offset of text section in memory (Linux, x86). - @text_size = 0x02be00 # Size of text section. - @data_offset = @text_offset + @text_size # Offset of data section. - @data_size = 0x4e00 # Size of data section. - @bss_offset = @data_offset + @data_size # Offset of bss section. - @bss_size = 0 # Size of bss section. - - # Labels for the assembler. Maps names to locations. - @labels = Hash.new {|h, key| raise "undefined label: #{key}"} @asm = asm @@ -68,10 +49,6 @@ class Compiler get_char end - def asm - @asm - end - def compile block expected(:'end of file') unless eof? @@ -107,10 +84,10 @@ class Compiler match('(') # TODO arg list match(')') - x86_call(name) + asm.call(name) else # variable access - x86_mov(:eax, "dword [#{name}]") + asm.mov(:eax, "dword [#{name}]") end end @@ -123,7 +100,7 @@ class Compiler elsif alpha?(@look) identifier # or call elsif digit?(@look) - x86_mov(:eax, get_number.to_i) + asm.mov(:eax, get_number.to_i) else expected(:'integer, identifier, function call, or parenthesized expression', :got => @look) end @@ -134,7 +111,7 @@ class Compiler sign = @look match(sign) if op?(:unary, sign) factor - x86_neg(:eax) if sign == '-' + asm.neg(:eax) if sign == '-' end # Parse and translate a single term (factor or mulop). Result is in @@ -172,7 +149,7 @@ class Compiler def add match('+') term # Result is in eax. - x86_add(:eax, '[esp]') # Add a to b. + asm.add(:eax, '[esp]') # Add a to b. end # Parse a subtraction operator and the 2nd term (b). The result is @@ -180,8 +157,8 @@ class Compiler def subtract match('-') term # Result, b, is in eax. - x86_neg(:eax) # Fake the subtraction. a - b == a + -b - x86_add(:eax, '[esp]') # Add a and -b. + asm.neg(:eax) # Fake the subtraction. a - b == a + -b + asm.add(:eax, '[esp]') # Add a and -b. end # Parse an addition operator and the 2nd term (b). The result is @@ -189,7 +166,7 @@ class Compiler def multiply match('*') signed_factor # Result is in eax. - x86_imul('dword [esp]') # Multiply a by b. + asm.imul('dword [esp]') # Multiply a by b. end # Parse a division operator and the divisor (b). The result is @@ -197,14 +174,14 @@ class Compiler def divide match('/') signed_factor # Result is in eax. - x86_xchg(:eax, '[esp]') # Swap the divisor and dividend into + asm.xchg(:eax, '[esp]') # Swap the divisor and dividend into # the correct places. # idiv uses edx:eax as the dividend so we need to ensure that edx # is correctly sign-extended w.r.t. eax. - emit('cdq') # Sign-extend eax into edx (Convert Double to - # Quad). - x86_idiv('dword [esp]') # Divide a (eax) by b ([esp]). + asm.cdq # Sign-extend eax into edx (Convert Double to + # Quad). + asm.idiv('dword [esp]') # Divide a (eax) by b ([esp]). end @@ -215,19 +192,19 @@ class Compiler def bitor_expr match('|') term - x86_or(:eax, '[esp]') + asm.or(:eax, '[esp]') end def bitand_expr match('&') signed_factor - x86_and(:eax, '[esp]') + asm.and_(:eax, '[esp]') end def xor_expr match('^') term - x86_xor(:eax, '[esp]') + asm.xor(:eax, '[esp]') end @@ -240,6 +217,7 @@ class Compiler while @look == '|' op '||' do boolean_term + # !!! this method has moved, IMPLEMENT THIS! emit("") end end @@ -250,6 +228,7 @@ class Compiler while @look == '&' op '&&' do not_factor + # !!! this method has moved, IMPLEMENT THIS! emit("") end end @@ -258,9 +237,9 @@ class Compiler def boolean_factor if boolean?(@look) if get_boolean == 'true' - x86_mov(:eax, -1) + asm.mov(:eax, -1) else - x86_xor(:eax, :eax) + asm.xor(:eax, :eax) end scan else @@ -273,7 +252,7 @@ class Compiler match('!') boolean_factor make_boolean(:eax) # ensure it is -1 or 0... - x86_not(:eax) # so that not is also boolean not + asm.not(:eax) # so that not is also boolean not else boolean_factor end @@ -282,11 +261,11 @@ class Compiler # Convert any identifier to a boolean (-1 or 0). This is # semantically equivalent to !!reg in C or Ruby. def make_boolean(reg=:eax) - end_label = unique_label(:endmakebool) - x86_cmp(reg, 0) # if false do nothing - x86_jz(end_label) - x86_mov(reg, -1) # truthy, make it true - emit_label(end_label) + end_label = asm.label(:endmakebool) + asm.cmp(reg, 0) # if false do nothing + asm.jz(end_label) + asm.mov(reg, -1) # truthy, make it true + asm.emit_label(end_label) end def relation @@ -314,14 +293,14 @@ class Compiler # and make_boolean will leave -1 (true) for us in eax. def neq_relation expression - x86_sub(:eax, '[esp]') + asm.sub(:eax, '[esp]') make_boolean end # Invert the != test for equal. def eq_relation neq_relation - x86_not(:eax) + asm.not(:eax) end # > and < are both implemented in terms of jl (jump if less than). @@ -337,20 +316,20 @@ class Compiler # Invert the sense of the test? invert = options[:invert] - true_label = unique_label(:cmp) - end_label = unique_label(:endcmp) - x86_cmp(a, b) - x86_jl(true_label) + true_label = asm.label(:cmp) + end_label = asm.label(:endcmp) + asm.cmp(a, b) + asm.jl(true_label) - x86_xor(:eax, :eax) # return false - x86_not(:eax) if invert # (or true if inverted) - x86_jmp(end_label) + asm.xor(:eax, :eax) # return false + asm.not(:eax) if invert # (or true if inverted) + asm.jmp(end_label) - emit_label(true_label) - x86_xor(:eax, :eax) # return true - x86_not(:eax) unless invert # (or false if inverted) + asm.emit_label(true_label) + asm.xor(:eax, :eax) # return true + asm.not(:eax) unless invert # (or false if inverted) - emit_label(end_label) + asm.emit_label(end_label) end # a: [esp] @@ -401,8 +380,8 @@ class Compiler name = @value match('=') boolean_expression - defvar(name) unless var?(name) - x86_mov("dword [#{name}]", :eax) + asm.defvar(name) unless asm.var?(name) + asm.mov("dword [#{name}]", :eax) end # Parse a code block. @@ -439,26 +418,26 @@ class Compiler # Parse an if-else statement. def if_else_stmt(label) - else_label = unique_label(:end_or_else) + else_label = asm.label(:end_or_else) end_label = else_label # only generated if else clause # present condition skip_any_whitespace - x86_jz(else_label) + asm.jz(else_label) @indent += 1 block(label) @indent -= 1 if @token == :keyword && @value == 'else' skip_any_whitespace - end_label = unique_label(:endif) # now we need the 2nd label - x86_jmp(end_label) - emit_label(else_label) + end_label = asm.label(:endif) # now we need the 2nd label + asm.jmp(end_label) + asm.emit_label(else_label) @indent += 1 block(label) @indent -= 1 end match_word('end') - emit_label(end_label) + asm.emit_label(end_label) end # Used to implement the Two-Label-Loops (while, until, repeat). @@ -467,9 +446,9 @@ class Compiler # block: Code to execute at the start of each iteration. (e.g. a # condition) def simple_loop(name) - start_label = unique_label(:"loop_#{name}") - end_label = unique_label(:"end_#{name}") - emit_label(start_label) + start_label = asm.label(:"loop_#{name}") + end_label = asm.label(:"end_#{name}") + asm.emit_label(start_label) yield(end_label) @@ -477,15 +456,15 @@ class Compiler block(end_label) @indent -= 1 match_word('end') - x86_jmp(start_label) - emit_label(end_label) + asm.jmp(start_label) + asm.emit_label(end_label) end def while_stmt simple_loop('while') do |end_label| condition skip_any_whitespace - x86_jz(end_label) + asm.jz(end_label) end end @@ -493,7 +472,7 @@ class Compiler simple_loop('until') do |end_label| condition skip_any_whitespace - x86_jnz(end_label) + asm.jnz(end_label) end end @@ -511,24 +490,24 @@ class Compiler counter = "[#{get_name}]" match('=') boolean_expression # initial value - x86_sub(:eax, 1) # pre-decrement because of the + asm.sub(:eax, 1) # pre-decrement because of the # following pre-increment - x86_mov(counter, :eax) # stash the counter in memory + asm.mov(counter, :eax) # stash the counter in memory match_word('to', :scan => true) boolean_expression # final value skip_any_whitespace - x86_push(:eax) # stash final value on stack + asm.push(:eax) # stash final value on stack final = '[esp]' simple_loop('for') do |end_label| - x86_mov(:ecx, counter) # get the counter - x86_add(:ecx, 1) # increment - x86_mov(counter, :ecx) # store the counter - x86_cmp(final, :ecx) # check if we're done - x86_jz(end_label) # if so jump to the end + asm.mov(:ecx, counter) # get the counter + asm.add(:ecx, 1) # increment + asm.mov(counter, :ecx) # store the counter + asm.cmp(final, :ecx) # check if we're done + asm.jz(end_label) # if so jump to the end end - x86_add(:esp, 4) # clean up the stack + asm.add(:esp, 4) # clean up the stack end # do 5 @@ -538,39 +517,38 @@ class Compiler boolean_expression skip_any_whitespace - x86_mov(:ecx, :eax) - x86_push(:ecx) + asm.mov(:ecx, :eax) - start_label = unique_label(:do) - end_label = unique_label(:enddo) - emit_label(start_label) + start_label = asm.label(:do) + end_label = asm.label(:enddo) + asm.emit_label(start_label) - x86_push(:ecx) + asm.push(:ecx) @indent += 1 block(end_label) @indent -= 1 - x86_pop(:ecx) + asm.pop(:ecx) match_word('end') - x86_loop(start_label) + asm.loop_(start_label) # Phony push! break needs to clean up the stack, but since we # don't know if there is a break at this point we fake a push and # always clean up the stack after. - x86_sub(:esp, 4) + asm.sub(:esp, 4) - emit_label(end_label) + asm.emit_label(end_label) # If there was a break we have to clean up the stack here. If # there was no break we clean up the phony push above. - x86_add(:esp, 4) + asm.add(:esp, 4) end def break_stmt(label) if label - x86_jmp(label) + asm.jmp(label) else expected(:'break to be somewhere useful', :got => :'a break outside a loop') @@ -581,51 +559,57 @@ class Compiler def condition boolean_expression skip_whitespace - x86_cmp(:eax, 0) # 0 is false, anything else is true + asm.cmp(:eax, 0) # 0 is false, anything else is true end # print eax in hex format def print_stmt - # define a lookup table of digits - unless var?('DIGITS') - defvar('DIGITS', 4) - x86_mov('dword [DIGITS]', 0x33323130) - x86_mov('dword [DIGITS+4]', 0x37363534) - x86_mov('dword [DIGITS+8]', 0x62613938) - x86_mov('dword [DIGITS+12]', 0x66656463) + asm.block do + # define a lookup table of digits + unless var?('DIGITS') + defvar('DIGITS', 4) + mov('dword [DIGITS]', 0x33323130) + mov('dword [DIGITS+4]', 0x37363534) + mov('dword [DIGITS+8]', 0x62613938) + mov('dword [DIGITS+12]', 0x66656463) + end + # 3 dwords == 12 chars + defvar('HEX', 3) unless var?('HEX') + # TODO check sign and prepend '-' if negative + mov('word [HEX]', 0x7830) # "0x" == [48, 120] + mov('word [HEX+10]', 0xa) # newline + null terminator end - # 3 dwords == 12 chars - defvar('HEX', 3) unless var?('HEX') - # TODO check sign and prepend '-' if negative - x86_mov('word [HEX]', 0x7830) # "0x" == [48, 120] - x86_mov('word [HEX+10]', 0xa) # newline + null terminator boolean_expression - # convert eax to a hex string - x86_lea(:esi, '[DIGITS]') - x86_lea(:edi, '[HEX+9]') - # build the string backwards (right to left), byte by byte - x86_mov(:ecx, 4) - emit_label(loop_label=unique_label) - # low nybble of nth byte - x86_movzx(:ebx, :al) - x86_and(:bl, 0x0f) # isolate low nybble - x86_movzx(:edx, 'byte [esi+ebx]') - x86_mov('byte [edi]', :dl) - x86_dec(:edi) - # high nybble of nth byte - x86_movzx(:ebx, :al) - x86_and(:bl, 0xf0) # isolate high nybble - x86_shr(:bl, 4) - x86_mov(:dl, 'byte [esi+ebx]') - x86_mov('byte [edi]', :dl) - x86_dec(:edi) - x86_shr(:eax, 8) - x86_loop(loop_label) - x86_mov(:eax, 4) # SYS_write - x86_mov(:ebx, 1) # STDOUT - x86_lea(:ecx, '[HEX]') - x86_mov(:edx, 11) # excluding term, max # of chars to print - x86_int(0x80) + asm.block do + # convert eax to a hex string + lea(:esi, '[DIGITS]') + lea(:edi, '[HEX+9]') + # build the string backwards (right to left), byte by byte + mov(:ecx, 4) + end + asm.emit_label(loop_label=asm.label) + asm.block do + # low nybble of nth byte + movzx(:ebx, :al) + and_(:bl, 0x0f) # isolate low nybble + movzx(:edx, 'byte [esi+ebx]') + mov('byte [edi]', :dl) + dec(:edi) + # high nybble of nth byte + movzx(:ebx, :al) + and_(:bl, 0xf0) # isolate high nybble + shr(:bl, 4) + mov(:dl, 'byte [esi+ebx]') + mov('byte [edi]', :dl) + dec(:edi) + shr(:eax, 8) + loop_(loop_label) + mov(:eax, 4) # SYS_write + mov(:ebx, 1) # STDOUT + lea(:ecx, '[HEX]') + mov(:edx, 11) # excluding term, max # of chars to print + int(0x80) + end end @@ -802,67 +786,7 @@ class Compiler get_char while any_whitespace?(@look) end - - # Define a constant in the .data section. - def equ(name, value) - @data << "#{name}\tequ #{value}" - end - - # Define a variable with the given name and size (in dwords). - def defvar(name, dwords=1) - unless var?(name) - @bss << "#{name}: resd #{dwords}\n" - @vars[name] = @bss_size - @bss_size += dwords - else - STDERR.puts "[warning] attempted to redefine #{name}" - end - end - - def var?(name) - @vars[name] - end - - def var(name) - @vars[name] - end - - # Emit a line of code wrapped between a tab and a newline. Required - # by Assembler::Text. - def emit(code, options={}) - tab = options.has_key?(:tab) ? options[:tab] : "\t" - @code << "#{tab}#{code}\n" - end - - # emit_byte and bytes_written are required by Assembler::Binary. - def emit_byte(byte) - @binary << byte - end - def bytes_written - @binary.size - end - - - def emit_label(name=unique_label) - emit("#{name}:", :tab => nil) - - @labels[name] = @binary.length - end - - def resolve_label(label) - @labels[label] - end - - # Generate a unique label. - def unique_label(suffix=nil) - @num_labels += 1 - if suffix - @num_labels_with_suffix[suffix] += 1 - suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}" - end - "L#{sprintf "%06d", @num_labels}#{suffix}" - end - + def indent real_indent = if @value == 'else' || @value == 'end' @indent - 1 @@ -872,16 +796,10 @@ class Compiler ' ' * (real_indent * 4) end - # Pack the array into a byte string. - def binary - @binary.pack('c*') - end - - def pushing(reg) - x86_push(reg) + asm.push(reg) yield - x86_add(:esp, 4) + asm.add(:esp, 4) end def op(name) diff --git a/epilogue.asm b/epilogue.asm deleted file mode 100644 index 15fa155..0000000 --- a/epilogue.asm +++ /dev/null @@ -1,3 +0,0 @@ - mov ebx, eax - mov eax, 1 - int 0x80 diff --git a/min.darwin.asm b/min.darwin.asm new file mode 100644 index 0000000..4556521 --- /dev/null +++ b/min.darwin.asm @@ -0,0 +1,6 @@ +BITS 32 +SECTION .text +GLOBAL _main +_main: + mov eax, 0 + ret diff --git a/min.asm b/min.linux.asm similarity index 100% rename from min.asm rename to min.linux.asm diff --git a/min2.asm b/min2.asm deleted file mode 100644 index 8238ae5..0000000 --- a/min2.asm +++ /dev/null @@ -1,4 +0,0 @@ -BITS 32 -mov ebx,eax -mov eax,1 -int 0x80 diff --git a/opcode.rb b/opcode.rb deleted file mode 100644 index 772bd47..0000000 --- a/opcode.rb +++ /dev/null @@ -1,25 +0,0 @@ -class OpCode - Attrs = [:prefix, :op, :modrm, :sib, :extra] - attr_accessor *Attrs - - def initialize(attrs) - Attrs.each do |attr| - send("#{attr}=", attrs[attr]) - end - end - - def size - Attrs.inject(0) {|sum, attr| - iv = instance_variable_get("@#{attr}") - if iv.is_a?(Enumerable) - sum + iv.size - else - sum + 1 - end - } - end - - def binary - Attrs.map {|attr| send(attr)}.flatten.pack('c*') - end -end diff --git a/prologue.asm b/prologue.asm deleted file mode 100644 index 6f69371..0000000 --- a/prologue.asm +++ /dev/null @@ -1,4 +0,0 @@ -BITS 32 -GLOBAL _start -SECTION .text -_start: diff --git a/template.darwin.asm b/template.darwin.asm new file mode 100644 index 0000000..673f104 --- /dev/null +++ b/template.darwin.asm @@ -0,0 +1,11 @@ +BITS 32 +GLOBAL _main +SECTION .data +{data} +SECTION .bss +{bss} +SECTION .text +_main: +{code} + ;; The result in eax is the exit code, just return. + ret diff --git a/template.asm b/template.linux.asm similarity index 100% rename from template.asm rename to template.linux.asm diff --git a/test.rb b/test.rb deleted file mode 100644 index a004c9a..0000000 --- a/test.rb +++ /dev/null @@ -1,47 +0,0 @@ -require 'compiler' -require 'stringio' - -X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code) - 0xb8, 1, 0, 0, 0, # mov eax, 1 - 0xcd, 0x80 # int 0x80 - ].pack('c*') - -def error(msg) STDERR.puts(msg) end - -def parse(input) - compiler = Compiler.new(input) - compiler.parse # tuple of [data, bss, code, binary] - -rescue ParseError => e - error("[error] #{e.message}") - error("[context] #{e.context}") - # error("Aborting!") - error(e.caller) - exit(1) -end - -def interpolate(template, data) - data.inject(template) do |template, mapping| - token, replacement = *mapping - template.sub("{#{token}}", replacement) - end -end - -def main(arg) - input = if File.readable?(arg) - File.open(arg) - else - # StringIO.new("5*(3-5)*2+2-9/3-8/2-4*(5+5+5)\n") - StringIO.new("abc=999\nabc-888\n") - end - data, bss, code, binary = *parse(input) - template = File.read("template.asm") - asm = interpolate(template, :data => data, :bss => bss, :code => code) - File.open("test.asm", "w") { |f| f.puts(asm) } - File.open("test.bin", "wb") { |f| - f.write(binary) - f.write(X86_exit) - } -end - -main(ARGV[0].to_s) diff --git a/test/Makefile b/test/Makefile index 4bbf149..bff7a29 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,50 +1,63 @@ +PLATFORM=$(shell uname -s) +BINFORMAT="bin" +ifeq ($(PLATFORM), Darwin) + BINFORMAT="macho" +endif +ifeq ($(PLATFORM), Linux) + BINFORMAT="elf" +endif +ifeq ($(BINFORMAT), bin) + @echo "WARNING: binary format is 'bin', this is probably not what you want!" + @echo " Your platform, " $(PLATFORM) ", is unsupported." +endif + all: lt gt ge le eq neq if while until repeat for do break print - @echo -n + @true lt: test.rb test_lt.code - @./test.rb lt + @./test.rb lt $(BINFORMAT) gt: test.rb test_gt.code - @./test.rb gt + @./test.rb gt $(BINFORMAT) ge: test.rb test_ge.code - @./test.rb ge + @./test.rb ge $(BINFORMAT) le: test.rb test_le.code - @./test.rb le + @./test.rb le $(BINFORMAT) eq: test.rb test_eq.code - @./test.rb eq + @./test.rb eq $(BINFORMAT) neq: test.rb test_neq.code - @./test.rb neq + @./test.rb neq $(BINFORMAT) if: test.rb test_if.code - @./test.rb if + @./test.rb if $(BINFORMAT) while: test.rb test_while.code - @./test.rb while + @./test.rb while $(BINFORMAT) until: test.rb test_until.code - @./test.rb until + @./test.rb until $(BINFORMAT) repeat: test.rb test_repeat.code - @./test.rb repeat + @./test.rb repeat $(BINFORMAT) for: test.rb test_for.code - @./test.rb for + @./test.rb for $(BINFORMAT) do: test.rb test_do.code - @./test.rb do + @./test.rb do $(BINFORMAT) break: test.rb test_break.code - @./test.rb break + @./test.rb break $(BINFORMAT) print: test.rb test_print.code - @./test.rb print + @./test.rb print $(BINFORMAT) big_test: test.rb big_test.code - @./test.rb big + @./test.rb big $(BINFORMAT) clean: @rm -f test*.asm test*.o diff --git a/test/test.rb b/test/test.rb index 16cf0ef..6227086 100755 --- a/test/test.rb +++ b/test/test.rb @@ -5,12 +5,21 @@ $LOAD_PATH << ROOT require 'build' +# usage: build.rb [binformat] +# +# ([format] will go before [binformat]) + def main func = ARGV[0].to_s + format = 'asm' # 'bin' only assembles one or two + # instructions right now, but support + # is in place + binformat = (ARGV[1] ? ARGV[1] : 'elf').downcase + platform = `uname -s`.chomp.downcase print "testing #{func} ... " - success = run( build("test_#{func}.code") ) + success = run( build("test_#{func}.code", platform, format, binformat) ) puts success == 0 ? "pass" : "FAIL! (#{success})" - exit(success) + exit(success.to_i) end main if $0 == __FILE__