[NEW] First hints of cross-platform support. Compiles to Mach-O on Darwin with nasm and gcc.

There is no binary assembler support for Darwin yet! I'm not sure when I will dive into the details
of generating a Mach-O binary from Ruby or C.

[MERGED] Binary assembler support.  It *should* work on ELF but it needs testing on Linux.
This commit is contained in:
Sami Samhuri 2009-05-25 16:26:21 -07:00
parent 76d4d2be3a
commit a4506bab10
16 changed files with 437 additions and 407 deletions

View file

@ -6,4 +6,46 @@
# may 2009
module Assembler
# Abstract class for common functionality between different code
# generators. Also defines somewhat of an interface that must be
# implemented to be useful.
class AssemblerBase
def initialize(*args)
@vars = {} # Symbol table, maps names to locations in BSS.
@num_labels = 0 # Used to generate unique labels.
@num_labels_with_suffix = Hash.new(0)
# Maps names to locations.
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
end
def block(*args, &blk)
instance_eval(&blk)
end
def output
raise "#{self.class} is supposed to implement this method!"
end
def var(name)
@vars[name]
end
alias_method :var?, :var
# Generate a unique label.
def label(suffix=nil)
@num_labels += 1
if suffix
@num_labels_with_suffix[suffix] += 1
suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}"
end
name = "L#{sprintf "%06d", @num_labels}#{suffix}"
return name
end
end
end

View file

@ -5,13 +5,18 @@
# sjs
# may 2009
ROOT = __FILE__.sub(/\/asm\/binary\.rb$/, '') unless defined? ROOT
$LOAD_PATH << ROOT unless $LOAD_PATH.include?(ROOT)
require 'asm/asm'
module Assembler
# Define a method named `emit_byte` and one named `binary_size` and
# include this module. Calling the assembler methods will output
# x86 machine code ... hopefully. So far it's incomplete and
# binaries just segfault.
class Binary
class Binary < AssemblerBase
# This structure allows for x86 registers of all sizes. The
# number of the register is the index of the array in which it was
@ -44,7 +49,72 @@ module Assembler
MaxUnsigned = 2**MachineBits - 1
SignedRange = MinSigned..MaxSigned
X86_exit = {
'linux' => [0x89, 0xc3, # mov ebx, eax (exit code)
0xb8, 1, 0, 0, 0, # mov eax, 1
0xcd, 0x80 # int 0x80
].pack('c*'),
'darwin' => [0x50, # push eax (exit code)
0xb8, 1, 0, 0, 0, # mov eax, 1
0xcd, 0x80 # int 0x80
].pack('c*')
}
def initialize(platform='linux', binformat='elf')
super
@binary = [] # Byte array of machine code.
@platform = platform
@binformat = binformat
init_sections
end
def init_sections
case @platform
when 'linux'
raise "unsupported" unless @binformat == 'elf'
@header_size = 0x100 # ELF, Linux
@text_offset = 0x08048000 + @header_size # Offset of text section in memory
when 'darwin'
raise "unsupported" unless @binformat == 'macho'
@header_size = 0x100 # Mach-O, Darwin
@text_offset = 0x08048000 + @header_size # Offset of text section in memory
else
raise "unsupported platform: #{platform}"
end
@text_size = 0x02be00 # Size of text section.
@data_offset = @text_offset + @text_size # Offset of data section.
@data_size = 0x4e00 # Size of data section.
@bss_offset = @data_offset + @data_size # Offset of bss section.
@bss_size = 0 # Size of bss section.
end
def output
@binary.pack('c*') + X86_exit[@platform]
end
# Define a constant in the .data section.
def const(name, value)
raise "unimplemented!"
end
# Define a variable with the given name and size (in dwords).
def defvar(name, dwords=1)
unless var?(name)
@vars[name] = @bss_size
@bss_size += dwords
else
STDERR.puts "[warning] attempted to redefine #{name}"
end
end
def label(suffix=nil)
name = super
@labels[name] = bytes_written
return name
end
# Count the bytes that were encoded in the given block.
def asm
@ -57,6 +127,17 @@ module Assembler
bytes_written - instruction_offset
end
def emit_byte(byte)
@binary << byte
end
def bytes_written
@binary.size
end
def emit_label(name=label)
@labels[name] = @binary.length
end
def emit_dword(num)
num_to_quad(num).each {|byte| emit_byte(byte)}
@ -119,7 +200,7 @@ module Assembler
end
def regnum(op)
num = register?
num = register?(op)
raise "not a register: #{op.inspect}" unless num
num
end
@ -146,7 +227,7 @@ module Assembler
# 3. mov r/m32, reg32 (0x89, mod r/m, maybe sib)
# 3a. mov memoffset32, eax (0xa3, disp32)
# 4. mov r/m32, immediate32 (0xc7, mod r/m, maybe sib, imm32)
def x86_mov(dest, src)
def mov(dest, src)
dest = dest[6..-1] if dest.is_a?(String) && dest[0..5] == 'dword '
src = src[6..-1] if src.is_a?(String) && src[0..5] == 'dword '
@ -199,35 +280,35 @@ module Assembler
end
def x86_add(dest, src)
def add(dest, src)
end
def x86_sub(dest, src)
def sub(dest, src)
end
def x86_imul(op)
def imul(op)
end
def x86_idiv(op)
def idiv(op)
end
def x86_inc(op)
def inc(op)
asm do
if register?(op)
emit_byte(0x40 + regnum(op))
elsif rm32?(op)
emit_byte(0xff)
emit_modrm(...)
# emit_modrm(...)
else
raise "unsupported op #{op}, wanted r32 or r/m32"
end
end
end
def x86_push(reg)
def push(reg)
end
def x86_cmp(a, b)
def cmp(a, b)
end

View file

@ -3,123 +3,166 @@
# sjs
# may 2009
ROOT = __FILE__.sub(/\/asm\/text\.rb$/, '') unless defined? ROOT
$LOAD_PATH << ROOT unless $LOAD_PATH.include?(ROOT)
require 'asm/asm'
module Assembler
# Define a method named `emit` and include this module. Calling
# the assembler methods will output nasm-friendly x86 asm code,
# line by line. This is dead easy and we can trust nasm to
# compile correct machine code, which is tricky.
module Text
# Assembler methods output nasm-friendly x86 asm code, line by
# line. This is dead easy and we can trust nasm to compile
# correct machine code, which isn't trivial.
class Text < AssemblerBase
def self.included(other)
im = other.instance_methods
unless im.include?(:emit)
raise "#{self.name} requires the including class define the emit method"
def initialize(platform='linux')
super
@data = ''
@bss = ''
@code = ''
@templatefile = "#{ROOT}/template.#{platform}.asm"
raise "unsupported platform: #{platform}" unless File.readable?(@templatefile)
end
# Define a constant in the .data section.
def const(name, value)
@data << "#{name}\tequ #{value}"
end
# Define a variable with the given name and size (in dwords).
def defvar(name, dwords=1)
unless var?(name)
@bss << "#{name}: resd #{dwords}\n"
@vars[name] = name
else
STDERR.puts "[warning] attempted to redefine #{name}"
end
end
def x86_mov(dest, src)
emit("mov #{dest}, #{src.is_a?(Numeric) ? "0x#{src.to_s(16)}" : src}")
# Emit a line of code wrapped between a tab and a newline.
def emit(code, options={})
tab = options.has_key?(:tab) ? options[:tab] : "\t"
@code << "#{tab}#{code}\n"
end
def x86_movzx(dest, src)
def label(suffix=nil)
name = super
@labels[name] = name
return name
end
def output
File.read(@templatefile).
sub("{data}", @data).
sub("{bss}", @bss).
sub("{code}", @code)
end
def emit_label(name=label)
emit("#{name}:", :tab => nil)
end
def mov(dest, src)
emit("mov #{dest}, #{src}#{src.is_a?(Numeric) ? " ; 0x#{src.to_s(16)}" : ''}")
end
def movzx(dest, src)
emit("movzx #{dest}, #{src}")
end
def x86_add(dest, src)
def add(dest, src)
emit("add #{dest}, #{src}")
end
def x86_sub(dest, src)
def sub(dest, src)
emit("sub #{dest}, #{src}")
end
def x86_imul(op)
def imul(op)
emit("imul #{op}")
end
def x86_idiv(op)
def idiv(op)
emit("idiv #{op}")
end
def x86_inc(op)
def inc(op)
emit("inc #{op}")
end
def x86_dec(op)
def dec(op)
emit("dec #{op}")
end
def x86_push(reg)
def push(reg)
emit("push #{reg}")
end
def x86_pop(reg)
def pop(reg)
emit("pop #{reg}")
end
def x86_call(label)
def call(label)
emit("call #{label}")
end
def x86_neg(reg)
def neg(reg)
emit("neg #{reg}")
end
def x86_not(rm32)
def not(rm32)
emit("not #{rm32}")
end
def x86_xchg(op1, op2)
def xchg(op1, op2)
emit("xchg #{op1}, #{op2}")
end
def x86_and(op1, op2)
def and_(op1, op2)
emit("and #{op1}, #{op2}")
end
def x86_or(op1, op2)
def or(op1, op2)
emit("or #{op1}, #{op2}")
end
def x86_xor(op1, op2)
def xor(op1, op2)
emit("xor #{op1}, #{op2}")
end
def x86_jz(label)
def jz(label)
emit("jz #{label}")
end
def x86_jnz(label)
def jnz(label)
emit("jnz #{label}")
end
def x86_jmp(label)
def jmp(label)
emit("jmp #{label}")
end
def x86_jl(label)
def jl(label)
emit("jl #{label}")
end
def x86_cmp(a, b)
def cmp(a, b)
emit("cmp #{a}, #{b}")
end
def x86_lea(a, b)
def lea(a, b)
emit("lea #{a}, #{b}")
end
def x86_shr(a, b)
def shr(a, b)
emit("shr #{a}, #{b}")
end
def x86_loop(label)
def loop_(label)
emit("loop #{label}")
end
def x86_int(num)
def int(num)
emit("int 0x#{num.to_s(16)}")
end

View file

@ -1,14 +1,8 @@
#!/usr/bin/env ruby
ROOT = __FILE__.sub(/\/build\.rb$/, '') unless defined? ROOT
require 'compiler'
X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code)
0xb8, 1, 0, 0, 0, # mov eax, 1
0xcd, 0x80 # int 0x80
].pack('c*')
require 'asm/text'
require 'asm/binary'
def main
filename = ARGV[0].to_s
@ -23,39 +17,21 @@ def base(filename)
filename.sub(/\.[^.]*$/, '')
end
def interpolate(templatefile, data)
template = File.read(templatefile)
data.inject(template) do |template, mapping|
token, replacement = *mapping
template.sub("{#{token}}", replacement)
end
end
# filename: input filename
# format: output format, nasm or binary
# asm: assembler to use
# returns: output filename
def compile(filename, format='asm')
def compile(filename, asm, binformat='elf')
# compile to asm or binary
output = nil
File.open(filename, 'r') do |input|
compiler = Compiler.new(input, format)
output = compiler.compile
end
if format == 'asm'
mode = 'w'
data, bss, code = *output
output = interpolate("#{ROOT}/template.asm",
:data => data, :bss => bss, :code => code)
else
mode = 'wb'
output += X86_exit
end
outfile = "#{base(filename)}.#{format}"
File.open(outfile, mode) do |out|
if format == 'asm'
out.puts(output)
compiler = Compiler.new(input, asm, binformat)
compiler.compile
end
ext = asm.class.name.split('::').last[0,3].downcase == 'bin' ? 'bin' : 'asm'
outfile = "#{base(filename)}.#{ext}"
File.open(outfile, 'wb') do |out|
out.puts(asm.output)
end
return outfile
@ -68,33 +44,47 @@ rescue ParseError => e
end
# assemble using nasm, return resulting filename.
def asm(filename)
def asm(filename, binformat='elf')
f = base(filename)
outfile = "#{f}.o"
output = `nasm -f elf -g -o #{outfile} #{filename}`
output = `nasm -f #{binformat} -g -o #{outfile} #{filename}`
if $?.exitstatus != 0
raise "nasm failed: #{$?.exitstatus}", output
puts output
raise "nasm failed: #{$?.exitstatus}"
end
return outfile
end
# link with ld, return resulting filename.
def link(filename)
def link(filename, platform='linux')
f = base(filename)
output = `ld -o #{f} #{filename}`
cmd, args = *case platform
when 'darwin': ['gcc', '-arch i386']
when 'linux': ['ld', '']
else
raise "unsupported platform: #{platform}"
end
output = `#{cmd} #{args} -o #{f} #{filename}`
if $?.exitstatus != 0
raise "ld failed: #{$?.exitstatus}", output
puts output
raise "ld failed: #{$?.exitstatus}"
end
`chmod +x #{f}`
return f
end
def build(filename, format='asm')
if format == 'asm'
link( asm( compile(filename) ) )
# TODO Use a dependency injection framework for the assembler, and
# other parts as things become more modular.
def build(filename, platform='linux', format='asm', binformat='elf')
bin = if format == 'asm'
code = compile(filename, Assembler::Text.new(platform))
obj = asm( code, binformat )
link( obj, platform )
else # binary
link( compile(filename, format='bin') )
obj = compile(filename, Assembler::Binary.new(platform), binformat)
link( obj, platform )
end
return bin
end
def run(filename)

View file

@ -12,9 +12,6 @@
# require 'rubygems'
# require 'unroller'
require 'asm'
require 'opcode'
class ParseError < StandardError
attr_reader :caller, :context
def initialize(caller, context=nil)
@ -24,43 +21,27 @@ class ParseError < StandardError
end
class Compiler
# This module uses our `emit_byte` method to output x86 machine code
# directly using the assembler library.
# include Assembler::Binary
Keywords = %w[
if else end while until repeat for to do break
print
]
attr_reader :data, :bss, :code
attr_reader :asm
def initialize(input, asm=Assembler::Text.new)
def initialize(input, asm, binformat='elf')
# XXX for development only!
@indent = 0 # for pretty printing
# The only binary format our assembler knows right now is ELF.
unless binformat == 'elf'
raise "Only ELF is supported. Unsupported binary format: #{binformat}."
end
@look = '' # Next lookahead char.
@token = nil # Type of last read token.
@value = nil # Value of last read token.
@input = input # Stream to read from.
@data = '' # Data section.
@bss = '' # BSS section.
@code = '' # Code section.
@binary = [] # Byte array of machine code.
@vars = {} # Symbol table, maps names to locations in BSS.
@num_labels = 0 # Used to generate unique labels.
@num_labels_with_suffix = Hash.new(0)
@header_size = 0x100 # ELF, Linux, x86
@text_offset = 0x08048000 + @header_size # Offset of text section in memory (Linux, x86).
@text_size = 0x02be00 # Size of text section.
@data_offset = @text_offset + @text_size # Offset of data section.
@data_size = 0x4e00 # Size of data section.
@bss_offset = @data_offset + @data_size # Offset of bss section.
@bss_size = 0 # Size of bss section.
# Labels for the assembler. Maps names to locations.
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
@asm = asm
@ -68,10 +49,6 @@ class Compiler
get_char
end
def asm
@asm
end
def compile
block
expected(:'end of file') unless eof?
@ -107,10 +84,10 @@ class Compiler
match('(')
# TODO arg list
match(')')
x86_call(name)
asm.call(name)
else
# variable access
x86_mov(:eax, "dword [#{name}]")
asm.mov(:eax, "dword [#{name}]")
end
end
@ -123,7 +100,7 @@ class Compiler
elsif alpha?(@look)
identifier # or call
elsif digit?(@look)
x86_mov(:eax, get_number.to_i)
asm.mov(:eax, get_number.to_i)
else
expected(:'integer, identifier, function call, or parenthesized expression', :got => @look)
end
@ -134,7 +111,7 @@ class Compiler
sign = @look
match(sign) if op?(:unary, sign)
factor
x86_neg(:eax) if sign == '-'
asm.neg(:eax) if sign == '-'
end
# Parse and translate a single term (factor or mulop). Result is in
@ -172,7 +149,7 @@ class Compiler
def add
match('+')
term # Result is in eax.
x86_add(:eax, '[esp]') # Add a to b.
asm.add(:eax, '[esp]') # Add a to b.
end
# Parse a subtraction operator and the 2nd term (b). The result is
@ -180,8 +157,8 @@ class Compiler
def subtract
match('-')
term # Result, b, is in eax.
x86_neg(:eax) # Fake the subtraction. a - b == a + -b
x86_add(:eax, '[esp]') # Add a and -b.
asm.neg(:eax) # Fake the subtraction. a - b == a + -b
asm.add(:eax, '[esp]') # Add a and -b.
end
# Parse an addition operator and the 2nd term (b). The result is
@ -189,7 +166,7 @@ class Compiler
def multiply
match('*')
signed_factor # Result is in eax.
x86_imul('dword [esp]') # Multiply a by b.
asm.imul('dword [esp]') # Multiply a by b.
end
# Parse a division operator and the divisor (b). The result is
@ -197,14 +174,14 @@ class Compiler
def divide
match('/')
signed_factor # Result is in eax.
x86_xchg(:eax, '[esp]') # Swap the divisor and dividend into
asm.xchg(:eax, '[esp]') # Swap the divisor and dividend into
# the correct places.
# idiv uses edx:eax as the dividend so we need to ensure that edx
# is correctly sign-extended w.r.t. eax.
emit('cdq') # Sign-extend eax into edx (Convert Double to
asm.cdq # Sign-extend eax into edx (Convert Double to
# Quad).
x86_idiv('dword [esp]') # Divide a (eax) by b ([esp]).
asm.idiv('dword [esp]') # Divide a (eax) by b ([esp]).
end
@ -215,19 +192,19 @@ class Compiler
def bitor_expr
match('|')
term
x86_or(:eax, '[esp]')
asm.or(:eax, '[esp]')
end
def bitand_expr
match('&')
signed_factor
x86_and(:eax, '[esp]')
asm.and_(:eax, '[esp]')
end
def xor_expr
match('^')
term
x86_xor(:eax, '[esp]')
asm.xor(:eax, '[esp]')
end
@ -240,6 +217,7 @@ class Compiler
while @look == '|'
op '||' do
boolean_term
# !!! this method has moved, IMPLEMENT THIS!
emit("<logical or>")
end
end
@ -250,6 +228,7 @@ class Compiler
while @look == '&'
op '&&' do
not_factor
# !!! this method has moved, IMPLEMENT THIS!
emit("<logical and>")
end
end
@ -258,9 +237,9 @@ class Compiler
def boolean_factor
if boolean?(@look)
if get_boolean == 'true'
x86_mov(:eax, -1)
asm.mov(:eax, -1)
else
x86_xor(:eax, :eax)
asm.xor(:eax, :eax)
end
scan
else
@ -273,7 +252,7 @@ class Compiler
match('!')
boolean_factor
make_boolean(:eax) # ensure it is -1 or 0...
x86_not(:eax) # so that not is also boolean not
asm.not(:eax) # so that not is also boolean not
else
boolean_factor
end
@ -282,11 +261,11 @@ class Compiler
# Convert any identifier to a boolean (-1 or 0). This is
# semantically equivalent to !!reg in C or Ruby.
def make_boolean(reg=:eax)
end_label = unique_label(:endmakebool)
x86_cmp(reg, 0) # if false do nothing
x86_jz(end_label)
x86_mov(reg, -1) # truthy, make it true
emit_label(end_label)
end_label = asm.label(:endmakebool)
asm.cmp(reg, 0) # if false do nothing
asm.jz(end_label)
asm.mov(reg, -1) # truthy, make it true
asm.emit_label(end_label)
end
def relation
@ -314,14 +293,14 @@ class Compiler
# and make_boolean will leave -1 (true) for us in eax.
def neq_relation
expression
x86_sub(:eax, '[esp]')
asm.sub(:eax, '[esp]')
make_boolean
end
# Invert the != test for equal.
def eq_relation
neq_relation
x86_not(:eax)
asm.not(:eax)
end
# > and < are both implemented in terms of jl (jump if less than).
@ -337,20 +316,20 @@ class Compiler
# Invert the sense of the test?
invert = options[:invert]
true_label = unique_label(:cmp)
end_label = unique_label(:endcmp)
x86_cmp(a, b)
x86_jl(true_label)
true_label = asm.label(:cmp)
end_label = asm.label(:endcmp)
asm.cmp(a, b)
asm.jl(true_label)
x86_xor(:eax, :eax) # return false
x86_not(:eax) if invert # (or true if inverted)
x86_jmp(end_label)
asm.xor(:eax, :eax) # return false
asm.not(:eax) if invert # (or true if inverted)
asm.jmp(end_label)
emit_label(true_label)
x86_xor(:eax, :eax) # return true
x86_not(:eax) unless invert # (or false if inverted)
asm.emit_label(true_label)
asm.xor(:eax, :eax) # return true
asm.not(:eax) unless invert # (or false if inverted)
emit_label(end_label)
asm.emit_label(end_label)
end
# a: [esp]
@ -401,8 +380,8 @@ class Compiler
name = @value
match('=')
boolean_expression
defvar(name) unless var?(name)
x86_mov("dword [#{name}]", :eax)
asm.defvar(name) unless asm.var?(name)
asm.mov("dword [#{name}]", :eax)
end
# Parse a code block.
@ -439,26 +418,26 @@ class Compiler
# Parse an if-else statement.
def if_else_stmt(label)
else_label = unique_label(:end_or_else)
else_label = asm.label(:end_or_else)
end_label = else_label # only generated if else clause
# present
condition
skip_any_whitespace
x86_jz(else_label)
asm.jz(else_label)
@indent += 1
block(label)
@indent -= 1
if @token == :keyword && @value == 'else'
skip_any_whitespace
end_label = unique_label(:endif) # now we need the 2nd label
x86_jmp(end_label)
emit_label(else_label)
end_label = asm.label(:endif) # now we need the 2nd label
asm.jmp(end_label)
asm.emit_label(else_label)
@indent += 1
block(label)
@indent -= 1
end
match_word('end')
emit_label(end_label)
asm.emit_label(end_label)
end
# Used to implement the Two-Label-Loops (while, until, repeat).
@ -467,9 +446,9 @@ class Compiler
# block: Code to execute at the start of each iteration. (e.g. a
# condition)
def simple_loop(name)
start_label = unique_label(:"loop_#{name}")
end_label = unique_label(:"end_#{name}")
emit_label(start_label)
start_label = asm.label(:"loop_#{name}")
end_label = asm.label(:"end_#{name}")
asm.emit_label(start_label)
yield(end_label)
@ -477,15 +456,15 @@ class Compiler
block(end_label)
@indent -= 1
match_word('end')
x86_jmp(start_label)
emit_label(end_label)
asm.jmp(start_label)
asm.emit_label(end_label)
end
def while_stmt
simple_loop('while') do |end_label|
condition
skip_any_whitespace
x86_jz(end_label)
asm.jz(end_label)
end
end
@ -493,7 +472,7 @@ class Compiler
simple_loop('until') do |end_label|
condition
skip_any_whitespace
x86_jnz(end_label)
asm.jnz(end_label)
end
end
@ -511,24 +490,24 @@ class Compiler
counter = "[#{get_name}]"
match('=')
boolean_expression # initial value
x86_sub(:eax, 1) # pre-decrement because of the
asm.sub(:eax, 1) # pre-decrement because of the
# following pre-increment
x86_mov(counter, :eax) # stash the counter in memory
asm.mov(counter, :eax) # stash the counter in memory
match_word('to', :scan => true)
boolean_expression # final value
skip_any_whitespace
x86_push(:eax) # stash final value on stack
asm.push(:eax) # stash final value on stack
final = '[esp]'
simple_loop('for') do |end_label|
x86_mov(:ecx, counter) # get the counter
x86_add(:ecx, 1) # increment
x86_mov(counter, :ecx) # store the counter
x86_cmp(final, :ecx) # check if we're done
x86_jz(end_label) # if so jump to the end
asm.mov(:ecx, counter) # get the counter
asm.add(:ecx, 1) # increment
asm.mov(counter, :ecx) # store the counter
asm.cmp(final, :ecx) # check if we're done
asm.jz(end_label) # if so jump to the end
end
x86_add(:esp, 4) # clean up the stack
asm.add(:esp, 4) # clean up the stack
end
# do 5
@ -538,39 +517,38 @@ class Compiler
boolean_expression
skip_any_whitespace
x86_mov(:ecx, :eax)
x86_push(:ecx)
asm.mov(:ecx, :eax)
start_label = unique_label(:do)
end_label = unique_label(:enddo)
emit_label(start_label)
start_label = asm.label(:do)
end_label = asm.label(:enddo)
asm.emit_label(start_label)
x86_push(:ecx)
asm.push(:ecx)
@indent += 1
block(end_label)
@indent -= 1
x86_pop(:ecx)
asm.pop(:ecx)
match_word('end')
x86_loop(start_label)
asm.loop_(start_label)
# Phony push! break needs to clean up the stack, but since we
# don't know if there is a break at this point we fake a push and
# always clean up the stack after.
x86_sub(:esp, 4)
asm.sub(:esp, 4)
emit_label(end_label)
asm.emit_label(end_label)
# If there was a break we have to clean up the stack here. If
# there was no break we clean up the phony push above.
x86_add(:esp, 4)
asm.add(:esp, 4)
end
def break_stmt(label)
if label
x86_jmp(label)
asm.jmp(label)
else
expected(:'break to be somewhere useful',
:got => :'a break outside a loop')
@ -581,51 +559,57 @@ class Compiler
def condition
boolean_expression
skip_whitespace
x86_cmp(:eax, 0) # 0 is false, anything else is true
asm.cmp(:eax, 0) # 0 is false, anything else is true
end
# print eax in hex format
def print_stmt
asm.block do
# define a lookup table of digits
unless var?('DIGITS')
defvar('DIGITS', 4)
x86_mov('dword [DIGITS]', 0x33323130)
x86_mov('dword [DIGITS+4]', 0x37363534)
x86_mov('dword [DIGITS+8]', 0x62613938)
x86_mov('dword [DIGITS+12]', 0x66656463)
mov('dword [DIGITS]', 0x33323130)
mov('dword [DIGITS+4]', 0x37363534)
mov('dword [DIGITS+8]', 0x62613938)
mov('dword [DIGITS+12]', 0x66656463)
end
# 3 dwords == 12 chars
defvar('HEX', 3) unless var?('HEX')
# TODO check sign and prepend '-' if negative
x86_mov('word [HEX]', 0x7830) # "0x" == [48, 120]
x86_mov('word [HEX+10]', 0xa) # newline + null terminator
mov('word [HEX]', 0x7830) # "0x" == [48, 120]
mov('word [HEX+10]', 0xa) # newline + null terminator
end
boolean_expression
asm.block do
# convert eax to a hex string
x86_lea(:esi, '[DIGITS]')
x86_lea(:edi, '[HEX+9]')
lea(:esi, '[DIGITS]')
lea(:edi, '[HEX+9]')
# build the string backwards (right to left), byte by byte
x86_mov(:ecx, 4)
emit_label(loop_label=unique_label)
mov(:ecx, 4)
end
asm.emit_label(loop_label=asm.label)
asm.block do
# low nybble of nth byte
x86_movzx(:ebx, :al)
x86_and(:bl, 0x0f) # isolate low nybble
x86_movzx(:edx, 'byte [esi+ebx]')
x86_mov('byte [edi]', :dl)
x86_dec(:edi)
movzx(:ebx, :al)
and_(:bl, 0x0f) # isolate low nybble
movzx(:edx, 'byte [esi+ebx]')
mov('byte [edi]', :dl)
dec(:edi)
# high nybble of nth byte
x86_movzx(:ebx, :al)
x86_and(:bl, 0xf0) # isolate high nybble
x86_shr(:bl, 4)
x86_mov(:dl, 'byte [esi+ebx]')
x86_mov('byte [edi]', :dl)
x86_dec(:edi)
x86_shr(:eax, 8)
x86_loop(loop_label)
x86_mov(:eax, 4) # SYS_write
x86_mov(:ebx, 1) # STDOUT
x86_lea(:ecx, '[HEX]')
x86_mov(:edx, 11) # excluding term, max # of chars to print
x86_int(0x80)
movzx(:ebx, :al)
and_(:bl, 0xf0) # isolate high nybble
shr(:bl, 4)
mov(:dl, 'byte [esi+ebx]')
mov('byte [edi]', :dl)
dec(:edi)
shr(:eax, 8)
loop_(loop_label)
mov(:eax, 4) # SYS_write
mov(:ebx, 1) # STDOUT
lea(:ecx, '[HEX]')
mov(:edx, 11) # excluding term, max # of chars to print
int(0x80)
end
end
@ -803,66 +787,6 @@ class Compiler
end
# Define a constant in the .data section.
def equ(name, value)
@data << "#{name}\tequ #{value}"
end
# Define a variable with the given name and size (in dwords).
def defvar(name, dwords=1)
unless var?(name)
@bss << "#{name}: resd #{dwords}\n"
@vars[name] = @bss_size
@bss_size += dwords
else
STDERR.puts "[warning] attempted to redefine #{name}"
end
end
def var?(name)
@vars[name]
end
def var(name)
@vars[name]
end
# Emit a line of code wrapped between a tab and a newline. Required
# by Assembler::Text.
def emit(code, options={})
tab = options.has_key?(:tab) ? options[:tab] : "\t"
@code << "#{tab}#{code}\n"
end
# emit_byte and bytes_written are required by Assembler::Binary.
def emit_byte(byte)
@binary << byte
end
def bytes_written
@binary.size
end
def emit_label(name=unique_label)
emit("#{name}:", :tab => nil)
@labels[name] = @binary.length
end
def resolve_label(label)
@labels[label]
end
# Generate a unique label.
def unique_label(suffix=nil)
@num_labels += 1
if suffix
@num_labels_with_suffix[suffix] += 1
suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}"
end
"L#{sprintf "%06d", @num_labels}#{suffix}"
end
def indent
real_indent = if @value == 'else' || @value == 'end'
@indent - 1
@ -872,16 +796,10 @@ class Compiler
' ' * (real_indent * 4)
end
# Pack the array into a byte string.
def binary
@binary.pack('c*')
end
def pushing(reg)
x86_push(reg)
asm.push(reg)
yield
x86_add(:esp, 4)
asm.add(:esp, 4)
end
def op(name)

View file

@ -1,3 +0,0 @@
mov ebx, eax
mov eax, 1
int 0x80

6
min.darwin.asm Normal file
View file

@ -0,0 +1,6 @@
BITS 32
SECTION .text
GLOBAL _main
_main:
mov eax, 0
ret

View file

@ -1,4 +0,0 @@
BITS 32
mov ebx,eax
mov eax,1
int 0x80

View file

@ -1,25 +0,0 @@
class OpCode
Attrs = [:prefix, :op, :modrm, :sib, :extra]
attr_accessor *Attrs
def initialize(attrs)
Attrs.each do |attr|
send("#{attr}=", attrs[attr])
end
end
def size
Attrs.inject(0) {|sum, attr|
iv = instance_variable_get("@#{attr}")
if iv.is_a?(Enumerable)
sum + iv.size
else
sum + 1
end
}
end
def binary
Attrs.map {|attr| send(attr)}.flatten.pack('c*')
end
end

View file

@ -1,4 +0,0 @@
BITS 32
GLOBAL _start
SECTION .text
_start:

11
template.darwin.asm Normal file
View file

@ -0,0 +1,11 @@
BITS 32
GLOBAL _main
SECTION .data
{data}
SECTION .bss
{bss}
SECTION .text
_main:
{code}
;; The result in eax is the exit code, just return.
ret

47
test.rb
View file

@ -1,47 +0,0 @@
require 'compiler'
require 'stringio'
X86_exit = [0x89, 0xc3, # mov ebx, eax (exit code)
0xb8, 1, 0, 0, 0, # mov eax, 1
0xcd, 0x80 # int 0x80
].pack('c*')
def error(msg) STDERR.puts(msg) end
def parse(input)
compiler = Compiler.new(input)
compiler.parse # tuple of [data, bss, code, binary]
rescue ParseError => e
error("[error] #{e.message}")
error("[context] #{e.context}")
# error("Aborting!")
error(e.caller)
exit(1)
end
def interpolate(template, data)
data.inject(template) do |template, mapping|
token, replacement = *mapping
template.sub("{#{token}}", replacement)
end
end
def main(arg)
input = if File.readable?(arg)
File.open(arg)
else
# StringIO.new("5*(3-5)*2+2-9/3-8/2-4*(5+5+5)\n")
StringIO.new("abc=999\nabc-888\n")
end
data, bss, code, binary = *parse(input)
template = File.read("template.asm")
asm = interpolate(template, :data => data, :bss => bss, :code => code)
File.open("test.asm", "w") { |f| f.puts(asm) }
File.open("test.bin", "wb") { |f|
f.write(binary)
f.write(X86_exit)
}
end
main(ARGV[0].to_s)

View file

@ -1,50 +1,63 @@
PLATFORM=$(shell uname -s)
BINFORMAT="bin"
ifeq ($(PLATFORM), Darwin)
BINFORMAT="macho"
endif
ifeq ($(PLATFORM), Linux)
BINFORMAT="elf"
endif
ifeq ($(BINFORMAT), bin)
@echo "WARNING: binary format is 'bin', this is probably not what you want!"
@echo " Your platform, " $(PLATFORM) ", is unsupported."
endif
all: lt gt ge le eq neq if while until repeat for do break print
@echo -n
@true
lt: test.rb test_lt.code
@./test.rb lt
@./test.rb lt $(BINFORMAT)
gt: test.rb test_gt.code
@./test.rb gt
@./test.rb gt $(BINFORMAT)
ge: test.rb test_ge.code
@./test.rb ge
@./test.rb ge $(BINFORMAT)
le: test.rb test_le.code
@./test.rb le
@./test.rb le $(BINFORMAT)
eq: test.rb test_eq.code
@./test.rb eq
@./test.rb eq $(BINFORMAT)
neq: test.rb test_neq.code
@./test.rb neq
@./test.rb neq $(BINFORMAT)
if: test.rb test_if.code
@./test.rb if
@./test.rb if $(BINFORMAT)
while: test.rb test_while.code
@./test.rb while
@./test.rb while $(BINFORMAT)
until: test.rb test_until.code
@./test.rb until
@./test.rb until $(BINFORMAT)
repeat: test.rb test_repeat.code
@./test.rb repeat
@./test.rb repeat $(BINFORMAT)
for: test.rb test_for.code
@./test.rb for
@./test.rb for $(BINFORMAT)
do: test.rb test_do.code
@./test.rb do
@./test.rb do $(BINFORMAT)
break: test.rb test_break.code
@./test.rb break
@./test.rb break $(BINFORMAT)
print: test.rb test_print.code
@./test.rb print
@./test.rb print $(BINFORMAT)
big_test: test.rb big_test.code
@./test.rb big
@./test.rb big $(BINFORMAT)
clean:
@rm -f test*.asm test*.o

View file

@ -5,12 +5,21 @@ $LOAD_PATH << ROOT
require 'build'
# usage: build.rb <func> [binformat]
#
# ([format] will go before [binformat])
def main
func = ARGV[0].to_s
format = 'asm' # 'bin' only assembles one or two
# instructions right now, but support
# is in place
binformat = (ARGV[1] ? ARGV[1] : 'elf').downcase
platform = `uname -s`.chomp.downcase
print "testing #{func} ... "
success = run( build("test_#{func}.code") )
success = run( build("test_#{func}.code", platform, format, binformat) )
puts success == 0 ? "pass" : "FAIL! (#{success})"
exit(success)
exit(success.to_i)
end
main if $0 == __FILE__