mirror of
https://github.com/samsonjs/compiler.git
synced 2026-06-24 04:49:09 +00:00
WIP: re-organize into lib/ dir
This commit is contained in:
parent
5da06f938c
commit
a12bdafde4
34 changed files with 4280 additions and 0 deletions
7
bin/compile
Executable file
7
bin/compile
Executable file
|
|
@ -0,0 +1,7 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
$LOAD_PATH.unshift('../lib')
|
||||
|
||||
require 'compiler'
|
||||
|
||||
???
|
||||
116
lib/compiler.rb
Normal file
116
lib/compiler.rb
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
this_dir = File.dirname(__FILE__)
|
||||
Dir.chdir(File.expand_path('..', this_dir))
|
||||
$LOAD_PATH.unshift(this_dir) unless $LOAD_PATH.include?(this_dir)
|
||||
|
||||
require 'compiler/parser'
|
||||
|
||||
class Compiler
|
||||
|
||||
attr_reader :platform, :arch_name, :format, :binformat
|
||||
|
||||
attr_reader :arch, :asm, :symbol_table_factory, :object_file_factor
|
||||
|
||||
# platform [String] "linux" or "darwin"
|
||||
# arch_name [String] "x86" or "arm"
|
||||
# format [String] "text" or "bin"
|
||||
# binform [String, nil] "elf" or "macho", only used when format is "bin"
|
||||
def initialize(platform, arch_name, format, binformat = nil)
|
||||
@platform = platform
|
||||
@arch_name = arch_name
|
||||
@format = format
|
||||
@binformat = binformat
|
||||
wire
|
||||
end
|
||||
|
||||
def compile(input)
|
||||
parser = Parser.new(input, asm)
|
||||
parser.parse
|
||||
parser.compile
|
||||
end
|
||||
|
||||
|
||||
#######
|
||||
private
|
||||
#######
|
||||
|
||||
def wire
|
||||
if format == 'bin'
|
||||
case binformat
|
||||
|
||||
when 'elf'
|
||||
wire_elf
|
||||
|
||||
when 'macho'
|
||||
wire_macho
|
||||
|
||||
else
|
||||
raise "unsupported binary format: #{binformat}"
|
||||
end
|
||||
end
|
||||
|
||||
case @arch_name
|
||||
|
||||
when 'x86'
|
||||
wire_x86
|
||||
|
||||
when 'arm'
|
||||
wire_arm
|
||||
|
||||
else
|
||||
raise "unsupported arch: #{arch}"
|
||||
end
|
||||
end
|
||||
|
||||
def wire_elf
|
||||
require 'compiler/asm/elf/object_file'
|
||||
require 'compiler/asm/elf/symbol_table'
|
||||
|
||||
@symbol_table_factory = ASM::ELF::SymbolTable
|
||||
@object_file_factory = ASM::ELF::ObjectFile
|
||||
end
|
||||
|
||||
def wire_macho
|
||||
require 'compiler/asm/macho/object_file'
|
||||
require 'compiler/asm/macho/symbol_table'
|
||||
|
||||
@symbol_table_factory = ASM::MachO::SymbolTable
|
||||
@object_file_factory = ASM::MachO::ObjectFile
|
||||
end
|
||||
|
||||
def wire_arm
|
||||
require 'compiler/asm/arm/binary_assembler'
|
||||
require 'compiler/asm/arm/text_assembler'
|
||||
|
||||
@arch = ASM::ARM::Arch.instance
|
||||
@asm =
|
||||
case format
|
||||
when 'text'
|
||||
ASM::ARM::TextAssembler.new(self)
|
||||
|
||||
when 'bin'
|
||||
ASM::ARM::BinaryAssembler.new(self)
|
||||
|
||||
else
|
||||
raise "unsupported output format: #{format}"
|
||||
end
|
||||
end
|
||||
|
||||
def wire_x86
|
||||
require 'compiler/asm/x86/binary_assembler'
|
||||
require 'compiler/asm/x86/text_assembler'
|
||||
|
||||
@arch = ASM::X86::Arch.instance
|
||||
@asm =
|
||||
case format
|
||||
when 'text'
|
||||
ASM::X86::TextAssembler.new(self)
|
||||
|
||||
when 'bin'
|
||||
ASM::X86::BinaryAssembler.new(self)
|
||||
|
||||
else
|
||||
raise "unsupported output format: #{format}"
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
67
lib/compiler/asm/arch.rb
Normal file
67
lib/compiler/asm/arch.rb
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
class Compiler
|
||||
module ASM
|
||||
|
||||
class Arch
|
||||
|
||||
attr_reader :bits, :word_bits
|
||||
attr_reader :preamble, :postamble
|
||||
attr_reader :endianness
|
||||
|
||||
# config:
|
||||
# - bits: native register / pointer size
|
||||
# - word_bits: number of bits in a word
|
||||
# - endianness: "big" or "little"
|
||||
# - preamble: binary preamble
|
||||
# - postamble: binary postamble
|
||||
def initialize(config)
|
||||
@bits = config['bits']
|
||||
@word_bits = config['word_bits']
|
||||
@endianness = config['endianness']
|
||||
@preamble = config['preamble']
|
||||
@postamble = config['postamble']
|
||||
end
|
||||
|
||||
def bytes
|
||||
bits / 8
|
||||
end
|
||||
|
||||
def word_bytes
|
||||
word_bits / 8
|
||||
end
|
||||
|
||||
def big_endian?
|
||||
endianness == 'big'
|
||||
end
|
||||
|
||||
def little_endian?
|
||||
endianness == 'little'
|
||||
end
|
||||
|
||||
def pointer_bytes
|
||||
bytes
|
||||
end
|
||||
|
||||
def min_signed
|
||||
-1 * 2 ** (bits - 1)
|
||||
end
|
||||
|
||||
def max_signed
|
||||
2 ** (bits - 1) - 1
|
||||
end
|
||||
|
||||
def min_unsigned
|
||||
0
|
||||
end
|
||||
|
||||
def max_unsigned
|
||||
2 ** bits - 1
|
||||
end
|
||||
|
||||
def signed_int
|
||||
@signed_int ||= min_signed..max_signed
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
82
lib/compiler/asm/assembler.rb
Normal file
82
lib/compiler/asm/assembler.rb
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
# sjs
|
||||
# may 2009
|
||||
|
||||
class Compiler
|
||||
module ASM
|
||||
|
||||
# Abstract class for common functionality between different code
|
||||
# generators. Also defines somewhat of an interface that must be
|
||||
# implemented to be useful.
|
||||
class Assembler
|
||||
|
||||
def initialize(delegate)
|
||||
@delegate = delegate
|
||||
end
|
||||
|
||||
def arch
|
||||
delegate.arch
|
||||
end
|
||||
|
||||
def block(*args, &block)
|
||||
instance_eval(&block)
|
||||
end
|
||||
|
||||
def load(n)
|
||||
end
|
||||
|
||||
def load_var(name)
|
||||
end
|
||||
|
||||
def store_var(name, reg)
|
||||
end
|
||||
|
||||
def neg(reg)
|
||||
end
|
||||
|
||||
def stack_add(reg)
|
||||
end
|
||||
|
||||
def stack_sub(reg)
|
||||
end
|
||||
|
||||
def stack_mul_signed(reg)
|
||||
end
|
||||
|
||||
def stack_div(reg)
|
||||
end
|
||||
|
||||
def stack_or(reg)
|
||||
end
|
||||
|
||||
def stack_xor(reg)
|
||||
end
|
||||
|
||||
def stack_and(reg)
|
||||
end
|
||||
|
||||
def not_(reg)
|
||||
end
|
||||
alias_method :not, :not_
|
||||
|
||||
def compare(reg, n)
|
||||
end
|
||||
|
||||
def je(label)
|
||||
end
|
||||
|
||||
def jne(label)
|
||||
end
|
||||
|
||||
def jmp(label)
|
||||
end
|
||||
|
||||
def mov_reg_imm(reg, n)
|
||||
end
|
||||
|
||||
def call(label)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
322
lib/compiler/asm/binary_assembler.rb
Normal file
322
lib/compiler/asm/binary_assembler.rb
Normal file
|
|
@ -0,0 +1,322 @@
|
|||
require 'compiler/asm/assembler'
|
||||
require 'compiler/asm/constant_proxy'
|
||||
require 'compiler/asm/variable_proxy'
|
||||
|
||||
class Compiler
|
||||
module ASM
|
||||
|
||||
class BinaryAssembler < Assembler
|
||||
|
||||
DEBUG_OUTPUT = false
|
||||
|
||||
attr_reader :ip
|
||||
|
||||
def initialize(delegate)
|
||||
super(delegate)
|
||||
|
||||
@symtab = delegate.symbol_table_factory.new
|
||||
|
||||
# Almost a byte array, except for addresses.
|
||||
#
|
||||
# Addresses take the form [:<type>, <name>]
|
||||
# where <type> is one of: var, const, or label
|
||||
#
|
||||
# NOTE the type is redundant because of VariableProxy#const?
|
||||
# and labels are just strings.
|
||||
#
|
||||
# however, we could accept strings for variable names
|
||||
# if we keep the type tag. something to think about.
|
||||
@ir = []
|
||||
|
||||
# Our instruction pointer, or the number of bytes written.
|
||||
@ip = 0
|
||||
|
||||
# Map locations in the byte array to var proxies so we can
|
||||
# resolve address operations on the 2nd pass.
|
||||
@proxies = {}
|
||||
|
||||
emit_entry_point
|
||||
emit_preamble
|
||||
end
|
||||
|
||||
# register for return values
|
||||
def return_reg
|
||||
raise 'subclasses must override #return_reg'
|
||||
end
|
||||
|
||||
def emit_entry_point
|
||||
end
|
||||
|
||||
def emit_preamble
|
||||
arch.preamble[delegate.platform].each { |byte| emit_byte(byte) }
|
||||
end
|
||||
|
||||
def emit_postamble
|
||||
arch.postamble[delegate.platform].each { |byte| emit_byte(byte) }
|
||||
end
|
||||
|
||||
def output
|
||||
emit_postamble
|
||||
|
||||
byte_array = resolve_labels
|
||||
|
||||
#puts "1st pass: " + byte_array.inspect if DEBUG_OUTPUT
|
||||
|
||||
binary = package(byte_array)
|
||||
|
||||
@symtab.calculate_offsets(binary.length)
|
||||
if DEBUG_OUTPUT
|
||||
puts ">>> text offset: 0x#{@symtab.text_offset.to_s(16)}"
|
||||
puts ">>> const offset: 0x#{@symtab.const_offset.to_s(16)}"
|
||||
puts ">>> bss offset: 0x#{@symtab.bss_offset.to_s(16)}"
|
||||
end
|
||||
|
||||
# Now that we know where everything lies do the 2nd pass
|
||||
# calculating and filling in final var and const addresses.
|
||||
#
|
||||
# outline:
|
||||
# - resolve all variable proxies in @proxies replacing
|
||||
# the placeholder bytes (0xff) with the real address
|
||||
|
||||
bss_offset = @symtab.bss_offset
|
||||
const_offset = @symtab.const_offset
|
||||
@proxies.each do |i, proxy|
|
||||
#puts ">>> Resolving #{proxy.name}" if DEBUG_OUTPUT
|
||||
var = @symtab.var(proxy.name)
|
||||
base_addr = if proxy.const?
|
||||
const_offset + @symtab.const(proxy.name)
|
||||
else
|
||||
bss_offset + @symtab.var(proxy.name)
|
||||
end
|
||||
#puts ">>> Replacing #{byte_array[i,4].map{|x|'0x' + x.to_s(16)}.inspect} with #{num_to_quad(proxy.resolve(base_addr)).map{|x|'0x' + x.to_s(16)}.inspect}" if DEBUG_OUTPUT
|
||||
byte_array[i, arch.pointer_size] = num_to_quad(proxy.resolve(base_addr))
|
||||
end
|
||||
|
||||
binary = package(byte_array)
|
||||
|
||||
#puts "2nd pass: " + byte_array.inspect if DEBUG_OUTPUT
|
||||
|
||||
objwriter = delegate.object_file_factory.new
|
||||
objwriter.text(binary)
|
||||
objwriter.const(@symtab.const_data) if @symtab.const_size > 0
|
||||
objwriter.bss(@symtab.bss_size) if @symtab.bss_size > 0
|
||||
objwriter.reloc(@symtab.reloc_info)
|
||||
objwriter.symtab(@symtab)
|
||||
objwriter.serialize
|
||||
end
|
||||
|
||||
def resolve_labels
|
||||
bytes_read = 0
|
||||
bytes = []
|
||||
@ir.each_with_index do |x, i|
|
||||
if x.is_a?(Numeric)
|
||||
bytes << x
|
||||
bytes_read += 1
|
||||
|
||||
elsif addr?(x)
|
||||
# remember this so we can replace the bogus addr later
|
||||
@proxies[bytes_read] = x[1]
|
||||
|
||||
# add a relocation entry for this address
|
||||
@symtab.reloc(bytes_read)
|
||||
|
||||
# fill in said bogus addr
|
||||
bogus_addr = [0xff] * arch.pointer_size
|
||||
bytes += bogus_addr
|
||||
bytes_read += bogus_addr.length
|
||||
|
||||
|
||||
# TODO find out if we should calculate addrs as offsets rather than
|
||||
# absolute as they are done now. (ok for Mach-O, maybe not ELF)
|
||||
elsif label?(x)
|
||||
# the actual eip points to the next instruction already, so should we.
|
||||
real_ip = bytes_read + arch.bytes
|
||||
name = x[1]
|
||||
addr = @symtab.lookup_label(name) - real_ip # dest - src to get relative addr
|
||||
#puts "resolved label: #{x} = 0x#{@symtab.lookup_label(name).to_s(16)} (rel: 0x#{addr.to_s(16)}, ip = 0x#{real_ip.to_s(16)}, bytes_read = 0x#{bytes_read.to_s(16)})" if DEBUG_OUTPUT
|
||||
|
||||
|
||||
addr_bytes = addr_to_bytes(addr)
|
||||
bytes += addr_bytes
|
||||
bytes_read += addr_bytes.length
|
||||
|
||||
else
|
||||
raise "unknown value in the IR at #{bytes_read} - #{x.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
return bytes
|
||||
end
|
||||
|
||||
def package(bytes)
|
||||
bytes.pack('c*')
|
||||
end
|
||||
|
||||
# Silly semantics, but labels don't count as an address since they
|
||||
# don't need to be deferred.
|
||||
def addr?(x)
|
||||
x.is_a?(Array) && [:var, :const].include?(x[0])
|
||||
end
|
||||
|
||||
def label?(x)
|
||||
x.is_a?(Array) && x[0] == :label
|
||||
end
|
||||
|
||||
# XXX this should probably evaluate the value somehow
|
||||
def define_const(name, bytes, value)
|
||||
@symtab.define_const(name, bytes, value)
|
||||
return const(name)
|
||||
end
|
||||
|
||||
# Define a variable with the given name and size in bytes.
|
||||
def define_var(name, bytes = arch.word_bytes)
|
||||
unless @symtab.var?(name)
|
||||
@symtab.define_var(name, bytes)
|
||||
else
|
||||
STDERR.puts "[warning] attempted to redefine #{name}"
|
||||
end
|
||||
return var(name)
|
||||
end
|
||||
|
||||
def var(name)
|
||||
STDERR.puts "[error] undefined variable #{name}" unless var?(name)
|
||||
VariableProxy.new(name)
|
||||
end
|
||||
|
||||
def const(name)
|
||||
STDERR.puts "[error] undefined constant #{name}" unless const?(name)
|
||||
ConstantProxy.new(name)
|
||||
end
|
||||
|
||||
def var?(name)
|
||||
@symtab.var?(name)
|
||||
end
|
||||
|
||||
def const?(name)
|
||||
@symtab.const?(name)
|
||||
end
|
||||
|
||||
# Define a variable unless it exists.
|
||||
def var!(name, bytes = arch.word_bytes)
|
||||
if var?(name)
|
||||
var(name)
|
||||
else
|
||||
define_var(name, bytes)
|
||||
end
|
||||
end
|
||||
|
||||
# Count the bytes that were encoded in the given block.
|
||||
def asm
|
||||
# stash the current number of bytes written
|
||||
instruction_offset = @ip
|
||||
|
||||
print "0x#{@ip.to_s(16).rjust(4, '0')}\t" if DEBUG_OUTPUT
|
||||
|
||||
yield
|
||||
|
||||
# return the number of bytes written
|
||||
@ip - instruction_offset
|
||||
|
||||
puts if DEBUG_OUTPUT
|
||||
end
|
||||
|
||||
|
||||
def emit_byte(byte)
|
||||
|
||||
##### The joke's on me! Array#pack('c*') already does this. It is nice to see
|
||||
# in the debugging output though, so it stays for now.
|
||||
#
|
||||
# Convert negative native ints into signed bytes.
|
||||
#
|
||||
# Calculate the signed byte as the difference between -1 (0xff) and some
|
||||
# number, X. When byte == -1 we want X == 0, so X == -byte - 1.
|
||||
# Since -byte == ~byte + 1, then -byte - 1 == ~byte + 1 - 1 == ~byte,
|
||||
# and X == ~byte. We want the *signed byte* -1, so we use 0xff,
|
||||
# *not* -1. Ruby sees our signed bytes as positive ints 0-255.
|
||||
#
|
||||
byte = 0xff - ~byte if byte < 0 && byte >= -128
|
||||
|
||||
# make sure it's a byte
|
||||
raise "not a byte: #{byte.inspect}" unless byte == byte & 0xff
|
||||
|
||||
byte = byte & 0xff
|
||||
### end of pointless code
|
||||
|
||||
print (byte >= 0 && byte < 0x10 ? '0' : '') + byte.to_s(16) + ' ' if DEBUG_OUTPUT
|
||||
|
||||
@ir << byte
|
||||
@ip += 1
|
||||
end
|
||||
|
||||
# addresses are emited as arrays of bytes, prefixed with :var, :const, or :label
|
||||
def emit_addr(type, name)
|
||||
placeholder = [type, name]
|
||||
puts placeholder.inspect if DEBUG_OUTPUT
|
||||
@ir << placeholder
|
||||
|
||||
# addresses are a constant size
|
||||
@ip += arch.pointer_bytes
|
||||
end
|
||||
|
||||
def emit_var(name_or_proxy)
|
||||
proxy = name_or_proxy.is_a?(VariableProxy) ? name_or_proxy : var(name_or_proxy)
|
||||
emit_addr(:var, proxy)
|
||||
end
|
||||
|
||||
def emit_const(name)
|
||||
proxy = name_or_proxy.is_a?(VariableProxy) ? name_or_proxy : const(name_or_proxy)
|
||||
emit_addr(:const, proxy)
|
||||
end
|
||||
|
||||
def emit_label(name)
|
||||
print "<#{name}> " if DEBUG_OUTPUT
|
||||
emit_addr(:label, name)
|
||||
end
|
||||
|
||||
def make_label(suffix = nil)
|
||||
@symtab.unique_label(suffix)
|
||||
end
|
||||
|
||||
def define_label(name)
|
||||
puts "\n#{name} (0x#{@ip.to_s(16)}):" if DEBUG_OUTPUT
|
||||
@symtab.define_label(name, @ip)
|
||||
end
|
||||
|
||||
def addr_to_bytes
|
||||
if big_endian?
|
||||
num_to_big_endian
|
||||
elsif little_endian?
|
||||
num_to_little_endian
|
||||
else
|
||||
raise 'oops'
|
||||
end
|
||||
end
|
||||
|
||||
# Convert a number to an array of bytes, discarding excess bits.
|
||||
def num_to_big_endian(num)
|
||||
case arch.pointer_size
|
||||
when 4
|
||||
[
|
||||
# high
|
||||
(num >> 16) & 0xff,
|
||||
(num >> 24) & 0xff,
|
||||
|
||||
# low
|
||||
num & 0xff,
|
||||
(num >> 8) & 0xff
|
||||
]
|
||||
else
|
||||
raise 'unimplemented'
|
||||
end
|
||||
end
|
||||
|
||||
# Convert a number to an array of bytes, discarding excess bits.
|
||||
def num_to_little_endian(num)
|
||||
bytes = num_to_big_endian
|
||||
bytes.each_slice(2).to_a.reverse.flatten
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
13
lib/compiler/asm/constant_proxy.rb
Normal file
13
lib/compiler/asm/constant_proxy.rb
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
class Compiler
|
||||
module ASM
|
||||
|
||||
class ConstantProxy < VariableProxy
|
||||
|
||||
def const?
|
||||
true
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
342
lib/compiler/asm/cstruct.rb
Normal file
342
lib/compiler/asm/cstruct.rb
Normal file
|
|
@ -0,0 +1,342 @@
|
|||
# Struct does some trickery with custom allocators so we can't
|
||||
# subclass it without writing C. Instead we define a CStruct class
|
||||
# that does something similar enough for our purpose. It is
|
||||
# subclassed just like any other class. A nice side-effect of this
|
||||
# syntax is that it is always clear that a CStruct is just a class and
|
||||
# instances of the struct are objects.
|
||||
#
|
||||
# Some light metaprogramming is used to make the following syntax possible:
|
||||
#
|
||||
# class MachHeader < CStruct
|
||||
# uint :magic
|
||||
# int :cputype
|
||||
# int :cpusubtype
|
||||
# ...
|
||||
# int :flags
|
||||
# end
|
||||
#
|
||||
# Inheritance works as you would expect.
|
||||
#
|
||||
# class LoadCommand < CStruct
|
||||
# uint32 :cmd
|
||||
# uint32 :cmdsize
|
||||
# end
|
||||
#
|
||||
# # inherits cmd and cmdsize as the first 2 fields
|
||||
# class SegmentCommand < LoadCommand
|
||||
# string :segname, 16
|
||||
# uint32 :vmaddr
|
||||
# uint32
|
||||
# end
|
||||
#
|
||||
# Nothing tricky or confusing there. Members of a CStruct class are
|
||||
# declared in the class definition. A different definition using a
|
||||
# more static approach probably wouldn't be very hard... if
|
||||
# performance is critical ... but then why are you using Ruby? ;-)
|
||||
#
|
||||
#
|
||||
# TODO support bit fields
|
||||
#
|
||||
# Bit fields should be supported by passing the number of bits a field
|
||||
# should occupy. Perhaps we could use the size 'pack' for the rest of
|
||||
# the field.
|
||||
#
|
||||
# class RelocationInfo < CStruct
|
||||
# int32 :address
|
||||
# uint32 :symbolnum, 24
|
||||
# pack :pcrel, 1
|
||||
# pack :length, 2
|
||||
# pack :extern, 1
|
||||
# pack :type, 4
|
||||
# end
|
||||
|
||||
class CStruct
|
||||
|
||||
|
||||
###################
|
||||
# Class Constants #
|
||||
###################
|
||||
|
||||
# Size in bytes.
|
||||
SIZE_MAP = {
|
||||
:int8 => 1,
|
||||
:uint8 => 1,
|
||||
:int16 => 2,
|
||||
:uint16 => 2,
|
||||
:int32 => 4,
|
||||
:uint32 => 4,
|
||||
:string => lambda { |*opts| opts.first }, # first opt is size
|
||||
# the last 3 are to make the language more C-like
|
||||
:int => 4,
|
||||
:uint => 4,
|
||||
:char => 1
|
||||
}
|
||||
|
||||
# 32-bit
|
||||
PACK_MAP = {
|
||||
:int8 => 'c',
|
||||
:uint8 => 'C',
|
||||
:int16 => 's',
|
||||
:uint16 => 'S',
|
||||
:int32 => 'i',
|
||||
:uint32 => 'I',
|
||||
:string => lambda do |str, *opts|
|
||||
len = opts.first
|
||||
str.ljust(len, "\0")[0, len]
|
||||
end,
|
||||
# a few C-like names
|
||||
:int => 'i',
|
||||
:uint => 'I',
|
||||
:char => 'C'
|
||||
}
|
||||
|
||||
# Only needed when unpacking is different from packing, i.e. strings w/ lambdas in PACK_MAP.
|
||||
UNPACK_MAP = {
|
||||
:string => lambda do |str, *opts|
|
||||
len = opts.first
|
||||
val = str[0, len-1].sub(/\0*$/, '')
|
||||
str.slice!((len-1)..-1)
|
||||
val
|
||||
end
|
||||
}
|
||||
|
||||
##########################
|
||||
# Class Instance Methods #
|
||||
##########################
|
||||
|
||||
# Note: const_get and const_set are used so the constants are bound
|
||||
# at runtime, to the real class that has subclassed CStruct.
|
||||
# I figured Ruby would do this but I haven't looked at the
|
||||
# implementation of constants so it might be tricky.
|
||||
#
|
||||
# All of this could probably be avoided with Ruby 1.9 and
|
||||
# private class variables. That is definitely something to
|
||||
# experiment with.
|
||||
|
||||
class <<self
|
||||
|
||||
def inherited(subclass)
|
||||
subclass.instance_eval do
|
||||
|
||||
# These "constants" are only constant references. Structs can
|
||||
# be modified. After the struct is defined it is still open,
|
||||
# but good practice would be not to change a struct after it
|
||||
# has been defined.
|
||||
#
|
||||
# To support inheritance properly we try to get these
|
||||
# constants from the enclosing scope (and clone them before
|
||||
# modifying them!), and default to empty, er, defaults.
|
||||
|
||||
members = const_get(:Members).clone rescue []
|
||||
member_index = const_get(:MemberIndex).clone rescue {}
|
||||
member_sizes = const_get(:MemberSizes).clone rescue {}
|
||||
member_opts = const_get(:MemberOptions).clone rescue {}
|
||||
|
||||
const_set(:Members, members)
|
||||
const_set(:MemberIndex, member_index)
|
||||
const_set(:MemberSizes, member_sizes)
|
||||
const_set(:MemberOptions, member_opts)
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Define a method for each size name, and when that method is called it updates
|
||||
# the struct class accordingly.
|
||||
SIZE_MAP.keys.each do |type|
|
||||
|
||||
define_method(type) do |name, *args|
|
||||
name = name.to_sym
|
||||
const_get(:MemberIndex)[name] = const_get(:Members).size
|
||||
const_get(:MemberSizes)[name] = type
|
||||
const_get(:MemberOptions)[name] = args
|
||||
const_get(:Members) << name
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
# Return the number of members.
|
||||
def size
|
||||
const_get(:Members).size
|
||||
end
|
||||
alias_method :length, :size
|
||||
|
||||
# Return the number of bytes occupied in memory or on disk.
|
||||
def bytesize
|
||||
const_get(:Members).inject(0) { |size, name| size + sizeof(name) }
|
||||
end
|
||||
|
||||
def sizeof(name)
|
||||
value = SIZE_MAP[const_get(:MemberSizes)[name]]
|
||||
value.respond_to?(:call) ? value.call(*const_get(:MemberOptions)[name]) : value
|
||||
end
|
||||
|
||||
def new_from_bin(bin)
|
||||
new_struct = new
|
||||
new_struct.unserialize(bin)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
####################
|
||||
# Instance Methods #
|
||||
####################
|
||||
|
||||
attr_reader :values
|
||||
|
||||
def initialize(*args)
|
||||
@values = args
|
||||
end
|
||||
|
||||
def serialize
|
||||
vals = @values.clone
|
||||
membs = members.clone
|
||||
pack_pattern.map do |patt|
|
||||
name = membs.shift
|
||||
if patt.is_a?(String)
|
||||
[vals.shift].pack(patt)
|
||||
else
|
||||
patt.call(vals.shift, *member_options[name])
|
||||
end
|
||||
end.join
|
||||
end
|
||||
|
||||
def unserialize(bin)
|
||||
bin = bin.clone
|
||||
@values = []
|
||||
membs = members.clone
|
||||
unpack_pattern.each do |patt|
|
||||
name = membs.shift
|
||||
if patt.is_a?(String)
|
||||
@values += bin.unpack(patt)
|
||||
bin.slice!(0, sizeof(name))
|
||||
else
|
||||
@values << patt.call(bin, *member_options[name])
|
||||
end
|
||||
end
|
||||
self
|
||||
end
|
||||
|
||||
def pack_pattern
|
||||
members.map { |name| PACK_MAP[member_sizes[name]] }
|
||||
end
|
||||
|
||||
def unpack_pattern
|
||||
members.map { |name| UNPACK_MAP[member_sizes[name]] || PACK_MAP[member_sizes[name]] }
|
||||
end
|
||||
|
||||
def [](name_or_idx)
|
||||
case name_or_idx
|
||||
|
||||
when Numeric
|
||||
idx = name_or_idx
|
||||
@values[idx]
|
||||
|
||||
when String, Symbol
|
||||
name = name_or_idx.to_sym
|
||||
@values[member_index[name]]
|
||||
|
||||
else
|
||||
raise ArgumentError.new("expected name or index, got #{name_or_idx.inspect}")
|
||||
end
|
||||
end
|
||||
|
||||
def []=(name_or_idx, value)
|
||||
case name_or_idx
|
||||
|
||||
when Numeric
|
||||
idx = name_or_idx
|
||||
@values[idx] = value
|
||||
|
||||
when String, Symbol
|
||||
name = name_or_idx.to_sym
|
||||
@values[member_index[name]] = value
|
||||
|
||||
else
|
||||
raise ArgumentError.new("expected name or index, got #{name_or_idx.inspect}")
|
||||
end
|
||||
end
|
||||
|
||||
def ==(other)
|
||||
puts @values.inspect
|
||||
puts other.values.inspect
|
||||
other.is_a?(self.class) && other.values == @values
|
||||
end
|
||||
|
||||
# Some of these are just to quack like Ruby's built-in Struct. YAGNI, but can't hurt either.
|
||||
|
||||
def each(&block)
|
||||
@values.each(&block)
|
||||
end
|
||||
|
||||
def each_pair(&block)
|
||||
members.zip(@values).each(&block)
|
||||
end
|
||||
|
||||
def size
|
||||
members.size
|
||||
end
|
||||
alias_method :length, :size
|
||||
|
||||
def sizeof(name)
|
||||
self.class.sizeof(name)
|
||||
end
|
||||
|
||||
def bytesize
|
||||
self.class.bytesize
|
||||
end
|
||||
|
||||
alias_method :to_a, :values
|
||||
|
||||
|
||||
# A few convenience methods.
|
||||
|
||||
def members
|
||||
self.class::Members
|
||||
end
|
||||
|
||||
def member_index
|
||||
self.class::MemberIndex
|
||||
end
|
||||
|
||||
def member_sizes
|
||||
self.class::MemberSizes
|
||||
end
|
||||
|
||||
def member_options
|
||||
self.class::MemberOptions
|
||||
end
|
||||
|
||||
# The last expression is returned, so return self instead of junk.
|
||||
self
|
||||
end
|
||||
|
||||
|
||||
# a small test
|
||||
if $0 == __FILE__
|
||||
class MachHeader < CStruct
|
||||
uint :magic
|
||||
int :cputype
|
||||
int :cpusubtype
|
||||
string :segname, 16
|
||||
end
|
||||
puts MachHeader::Members.inspect
|
||||
puts MachHeader::MemberIndex.inspect
|
||||
puts MachHeader::MemberSizes.inspect
|
||||
puts "# of MachHeader members: " + MachHeader.size.to_s + ", size in bytes: " + MachHeader.bytesize.to_s
|
||||
mh = MachHeader.new(0xfeedface, 7, 3, "foobar")
|
||||
%w[magic cputype cpusubtype segname].each do |field|
|
||||
puts "#{field}(#{MachHeader.sizeof(field.to_sym)}): #{mh[field.to_sym].inspect}"
|
||||
end
|
||||
puts mh.pack_pattern.inspect
|
||||
binstr = mh.serialize
|
||||
puts "values: " + mh.values.inspect
|
||||
newmh = MachHeader.new_from_bin(binstr)
|
||||
puts "new values: " + newmh.values.inspect
|
||||
newbinstr = newmh.serialize
|
||||
puts "serialized: " + binstr.inspect
|
||||
puts "unserialized: " + newbinstr.inspect
|
||||
puts "new == old ? " + (newbinstr == binstr).to_s
|
||||
end
|
||||
10
lib/compiler/asm/elf.rb
Normal file
10
lib/compiler/asm/elf.rb
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
require 'compiler/asm/elf/structs'
|
||||
|
||||
class Compiler
|
||||
module ASM
|
||||
|
||||
module ELF
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
7
lib/compiler/asm/elf/elfsymtab.rb
Normal file
7
lib/compiler/asm/elf/elfsymtab.rb
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
module ASM
|
||||
|
||||
class ELFSymbolTable < SymbolTable
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
9
lib/compiler/asm/elf/elfwriter.rb
Normal file
9
lib/compiler/asm/elf/elfwriter.rb
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
module ASM
|
||||
|
||||
class ELFWriter < ObjWriter
|
||||
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
10
lib/compiler/asm/macho.rb
Normal file
10
lib/compiler/asm/macho.rb
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
require 'compiler/asm/macho/structs'
|
||||
|
||||
class Compiler
|
||||
module ASM
|
||||
|
||||
module MachO
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
61
lib/compiler/asm/macho/load_commands.rb
Normal file
61
lib/compiler/asm/macho/load_commands.rb
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
require 'compiler/cstruct'
|
||||
|
||||
# The MachO module contains constants and structures related to the
|
||||
# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
|
||||
#
|
||||
# Constants and structures as defined in /usr/include/mach-o/loader.h
|
||||
# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
|
||||
# <mach-o/nlist.h>, and <mach-o/reloc.h>.
|
||||
|
||||
class Compiler
|
||||
module MachO
|
||||
|
||||
class LoadCommand < CStruct
|
||||
uint32 :cmd
|
||||
uint32 :cmdsize
|
||||
end
|
||||
|
||||
# Values for the cmd member of LoadCommand CStructs (incomplete!).
|
||||
LC_SEGMENT = 0x1
|
||||
LC_SYMTAB = 0x2
|
||||
LC_SYMSEG = 0x3
|
||||
LC_THREAD = 0x4
|
||||
LC_UNIXTHREAD = 0x5
|
||||
|
||||
class SegmentCommand < LoadCommand
|
||||
string :segname, 16
|
||||
uint32 :vmaddr
|
||||
uint32 :vmsize
|
||||
uint32 :fileoff
|
||||
uint32 :filesize
|
||||
int32 :maxprot
|
||||
int32 :initprot
|
||||
uint32 :nsects
|
||||
uint32 :flags
|
||||
end
|
||||
|
||||
|
||||
# Values for protection fields, maxprot and initprot.
|
||||
VM_PROT_NONE = 0x00
|
||||
VM_PROT_READ = 0x01
|
||||
VM_PROT_WRITE = 0x02
|
||||
VM_PROT_EXECUTE = 0x04
|
||||
VM_PROT_NO_CHANGE = 0x08
|
||||
VM_PROT_COPY = 0x10
|
||||
|
||||
|
||||
class SymbolTableCommand < LoadCommand
|
||||
uint32 :symoff # Points to an array of Nlist structs.
|
||||
uint32 :nsyms # Number of entries in said array.
|
||||
uint32 :stroff # Offset of the string table.
|
||||
uint32 :strsize # Size of the string table in bytes.
|
||||
end
|
||||
|
||||
|
||||
LOAD_COMMAND_STRUCT_MAP = {
|
||||
LC_SEGMENT => SegmentCommand,
|
||||
LC_SYMTAB => SymbolTableCommand
|
||||
}
|
||||
|
||||
end
|
||||
end
|
||||
46
lib/compiler/asm/macho/mach_header.rb
Normal file
46
lib/compiler/asm/macho/mach_header.rb
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
require 'compiler/cstruct'
|
||||
|
||||
# The MachO module contains constants and structures related to the
|
||||
# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
|
||||
#
|
||||
# Constants and structures as defined in /usr/include/mach-o/loader.h
|
||||
# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
|
||||
# <mach-o/nlist.h>, and <mach-o/reloc.h>.
|
||||
|
||||
class Compiler
|
||||
module MachO
|
||||
|
||||
# Appears at the beginning of every Mach object file.
|
||||
class MachHeader < CStruct
|
||||
uint32 :magic
|
||||
int32 :cputype
|
||||
int32 :cpusubtype
|
||||
uint32 :filetype
|
||||
uint32 :ncmds
|
||||
uint32 :sizeofcmds
|
||||
uint32 :flags
|
||||
end
|
||||
|
||||
# Values for the magic field.
|
||||
MH_MAGIC = 0xfeedface # Mach magic number (big-endian).
|
||||
MH_CIGAM = 0xcefaedfe # Little-endian version.
|
||||
|
||||
# Values for the filetype field.
|
||||
MH_OBJECT = 0x1
|
||||
MH_EXECUTE = 0x2
|
||||
MH_FVMLIB = 0x3
|
||||
MH_CORE = 0x4
|
||||
MH_PRELOAD = 0x5
|
||||
MH_DYLIB = 0x6
|
||||
MH_DYLINKER = 0x7
|
||||
MH_BUNDLE = 0x8
|
||||
MH_DYLIB_STUB = 0x9
|
||||
MH_DSYM = 0xa
|
||||
|
||||
# CPU types and subtypes (only Intel for now).
|
||||
CPU_TYPE_X86 = 7
|
||||
CPU_TYPE_I386 = CPU_TYPE_X86
|
||||
CPU_SUBTYPE_X86_ALL = 3
|
||||
|
||||
end
|
||||
end
|
||||
50
lib/compiler/asm/macho/nlist.rb
Normal file
50
lib/compiler/asm/macho/nlist.rb
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
require 'compiler/cstruct'
|
||||
|
||||
# The MachO module contains constants and structures related to the
|
||||
# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
|
||||
#
|
||||
# Constants and structures as defined in /usr/include/mach-o/loader.h
|
||||
# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
|
||||
# <mach-o/nlist.h>, and <mach-o/reloc.h>.
|
||||
|
||||
class Compiler
|
||||
module MachO
|
||||
|
||||
########################
|
||||
# Symbol table support #
|
||||
########################
|
||||
|
||||
# Nlist is used to describe symbols.
|
||||
class Nlist < CStruct
|
||||
uint32 :n_strx # Index into string table. Index of zero is the empty string.
|
||||
uint8 :n_type # Type flag (see below).
|
||||
uint8 :n_sect # Section number (from 1) or NO_SECT.
|
||||
uint16 :n_desc # TODO See <mach-o/stab.h>.
|
||||
uint32 :n_value # The symbol's value (or stab offset).
|
||||
end
|
||||
|
||||
# Type flag (see <mach-o/nlist.h> for more details)
|
||||
# ---------
|
||||
#
|
||||
# This field consists of four bitfields:
|
||||
#
|
||||
# uchar N_STAB : 3
|
||||
# uchar N_PEXT : 1
|
||||
# uchar N_TYPE : 3
|
||||
# uchar N_EXT : 1
|
||||
#
|
||||
N_STAB = 0xe0 # if any bits set => symbolic debugging info
|
||||
N_PEXT = 0x10 # private external symbol bit
|
||||
N_TYPE = 0x0e # mask for the type bits
|
||||
N_EXT = 0x01 # external symbol bit, set for external symbols (e.g. globals)
|
||||
|
||||
# Values for N_TYPE. (incomplete!)
|
||||
N_UNDF = 0x0 # undefined, n_sect == NO_SECT
|
||||
N_ABS = 0x2 # absolute, n_sect == NO_SECT
|
||||
N_SECT = 0xe # defined in section number n_sect
|
||||
|
||||
NO_SECT = 0
|
||||
MAX_SECT = 255
|
||||
|
||||
end
|
||||
end
|
||||
373
lib/compiler/asm/macho/object_file.rb
Normal file
373
lib/compiler/asm/macho/object_file.rb
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
require 'asm/macho'
|
||||
|
||||
class Compiler
|
||||
module MachO
|
||||
|
||||
class ObjectFile
|
||||
|
||||
attr_accessor :header, :load_commands, :sections, :data
|
||||
attr_accessor :current_segment
|
||||
|
||||
def initialize(filetype = MH_OBJECT)
|
||||
@header = MachHeader.new(MH_MAGIC, CPU_TYPE_X86, CPU_SUBTYPE_X86_ALL, filetype, 0, 0, 0)
|
||||
@load_commands = [] # All defined segments.
|
||||
@sections = {} # Map of segment names to lists of sections.
|
||||
@section_disk_size = Hash.new(0) # Sections store their VM size so we need their sizes on disk.
|
||||
@section_offset = 0 # Offset of the next section's data, in bytes.
|
||||
@data = [] # Blobs of data that appear at the end of the file.
|
||||
# (text, data, relocation info, symtab, ...)
|
||||
@current_segment = nil # An alias for the last defined segment.
|
||||
@text_segname = nil # Name of __TEXT segement
|
||||
@text_sect_index = nil # Index of __text section
|
||||
@text_data_index = nil # Index into @data of __text section data
|
||||
@reloc_info = nil # Copy of relocation info array
|
||||
end
|
||||
|
||||
|
||||
# Define a LoadCommand in this file. The header's ncmds and sizeofcmds
|
||||
# fields are updated automatically to keep things in sync. If a block is
|
||||
# given it is passed the new LoadCommand struct after all other
|
||||
# initialization has been done.
|
||||
#
|
||||
# Other methods that create any type of load command should use this
|
||||
# method to do so. Right now the only types supported are LC_SEGMENT
|
||||
# and LC_SYMTAB. Modify asm/macho.rb to add structs for other types, and
|
||||
# add them to LOAD_COMMAND_STRUCT_MAP.
|
||||
|
||||
def load_command(cmdtype)
|
||||
struct = LOAD_COMMAND_STRUCT_MAP[cmdtype]
|
||||
unless struct
|
||||
raise "unsupported load command type: #{cmdtype.inspect}," +
|
||||
" supported types: #{LOAD_COMMAND_STRUCT_MAP.keys.sort.inspect}"
|
||||
end
|
||||
|
||||
# Fill in all the unknown fields with 0, this is nonsense for
|
||||
# string fields but that doesn't really matter.
|
||||
dummy_vals = [0] * (struct::Members.size - 2)
|
||||
|
||||
# cmd cmdsize ...
|
||||
command = struct.new(cmdtype, struct.bytesize, *dummy_vals)
|
||||
|
||||
@load_commands << command
|
||||
|
||||
@header[:ncmds] += 1
|
||||
@header[:sizeofcmds] += command.bytesize
|
||||
|
||||
yield(command) if block_given?
|
||||
|
||||
return command
|
||||
end
|
||||
|
||||
|
||||
# Define a segment in this file. If a block is given it is passed
|
||||
# the new segment. You can chain calls to segment, it returns self.
|
||||
#
|
||||
# Mach object files should only contain one anonymous segment. This
|
||||
# is not checked but should be kept in mind when crafting files.
|
||||
def segment(name, &block)
|
||||
@current_segment = load_command(LC_SEGMENT) do |seg|
|
||||
seg[:segname] = name
|
||||
block.call(seg) if block
|
||||
end
|
||||
return self
|
||||
end
|
||||
|
||||
|
||||
# Define a section under the given segment. nsects and cmdsize are
|
||||
# updated automatically. segname can't be derived from the segment
|
||||
# that this section is defined under, as they can differ.
|
||||
#
|
||||
# Mach object files have the __text, __data, and other common
|
||||
# sections all defined under one anonymous segment, but their segment
|
||||
# names reflect their final positions after linking. The linker plonks
|
||||
# them in the segment that they name.
|
||||
def section(name, segname, data = '', vmsize=data.size,
|
||||
segment = @current_segment, type = S_REGULAR)
|
||||
|
||||
# Create the new section.
|
||||
section = Section.new(name, segname, @section_offset, vmsize, 0, 0, 0, 0, 0, 0, type)
|
||||
|
||||
# Add this section to the map of segment names to sections.
|
||||
(@sections[segment[:segname]] ||= []) << section
|
||||
@section_disk_size[name] = data.size
|
||||
@section_offset += data.size
|
||||
@data << data if data.size > 0
|
||||
|
||||
# Update the header.
|
||||
@header[:sizeofcmds] += section.bytesize
|
||||
|
||||
# Update the segment.
|
||||
segment[:nsects] += 1
|
||||
segment[:cmdsize] += section.bytesize
|
||||
|
||||
yield(section) if block_given?
|
||||
|
||||
return section
|
||||
end
|
||||
|
||||
|
||||
|
||||
# Define a standard text section under the current segment (if present).
|
||||
#
|
||||
# If there is no current segment then we act according to the file's type
|
||||
# (specified in the header). Segments are created if they do not exist.
|
||||
#
|
||||
# When it is MH_OBJECT the text section is defined under a single,
|
||||
# nameless segment, but the section's segment name is set to the name
|
||||
# given here.
|
||||
#
|
||||
# For MH_EXECUTE files the text section goes under the segment with the
|
||||
# name given (__TEXT).
|
||||
|
||||
def text(data, sectname = '__text', segname='__TEXT')
|
||||
real_segname = nil
|
||||
unless @current_segment
|
||||
real_segname = segname_based_on_filetype(segname)
|
||||
segment(real_segname) do |seg|
|
||||
seg[:maxprot] = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE
|
||||
seg[:initprot] = VM_PROT_READ | VM_PROT_EXECUTE
|
||||
end
|
||||
end
|
||||
|
||||
section(sectname, segname, data) do |sect|
|
||||
# reloff and nreloc are calculated later (in calculate_offsets)
|
||||
sect[:flags] = 0x400 # S_ATTR_SOME_INSTRUCTIONS
|
||||
end
|
||||
|
||||
# Remember where section and data are so we can update them later.
|
||||
@text_segname = real_segname || segname
|
||||
@text_sect_index = @sections[@text_segname].length-1
|
||||
@text_data_index = @data.length-1
|
||||
|
||||
return self
|
||||
end
|
||||
|
||||
def update_text(data)
|
||||
raise 'no __text segment defined yet' unless @text_data_index
|
||||
@data[@text_data_index] = data
|
||||
end
|
||||
|
||||
# Basis for #data, #const, and #bss methods.
|
||||
def segment_based_on_filetype(segname, options = {})
|
||||
unless @current_segment
|
||||
permissions = VM_PROT_READ
|
||||
permisions |= VM_PROT_WRITE if options.delete(:writable)
|
||||
segment(segname_based_on_filetype(segname)) do |seg|
|
||||
seg[:initprot] = seg[:maxprot] = permissions
|
||||
end
|
||||
end
|
||||
yield if block_given?
|
||||
return self
|
||||
end
|
||||
|
||||
# Define a standard data section under the current segment (if present).
|
||||
# This behaves similarly to the text method.
|
||||
#
|
||||
def data(data, sectname = '__data', segname='__DATA')
|
||||
segment_based_on_filetype(segname, :writable => true) do
|
||||
section(sectname, segname, data)
|
||||
end
|
||||
end
|
||||
|
||||
# Define a standard const section under the current segment (if present).
|
||||
# This behaves similarly to the data method.
|
||||
#
|
||||
def const(data, sectname = '__const', segname='__DATA')
|
||||
segment_based_on_filetype(segname) do
|
||||
section(sectname, segname, data)
|
||||
end
|
||||
end
|
||||
|
||||
# Define a standard BSS section under the current segment (if present).
|
||||
# This behaves similarly to the data method but accepts a VM size instead
|
||||
# of a blob, and no data is written to file since this section is for
|
||||
# uninitialized data.
|
||||
#
|
||||
def bss(vmsize, sectname = '__bss', segname='__DATA')
|
||||
segment_based_on_filetype(segname, :writable => true) do
|
||||
section(sectname, segname, '', vmsize)
|
||||
end
|
||||
end
|
||||
|
||||
# Define a relocation table. Usually between segments and the
|
||||
# symbol table.
|
||||
#
|
||||
# Accepts an array of relocation info structs.
|
||||
def reloc(reloc_info)
|
||||
@data << if reloc_info.respond_to?(:join)
|
||||
reloc_info.map {|r| r.serialize}.join
|
||||
else
|
||||
reloc_info
|
||||
end
|
||||
@reloc_info = reloc_info.map {|x| x.clone}
|
||||
return self
|
||||
end
|
||||
|
||||
# Define a symbol table. This should usually be placed at the end of the
|
||||
# file.
|
||||
#
|
||||
# This function is overloaded to accept either an array of Nlist structs
|
||||
# packed into a byte string (i.e. a C array) and a string table, or a
|
||||
# single parameter: any type of SymbolTable.
|
||||
|
||||
def symtab(nlist_ary_or_symtab, stab = nil)
|
||||
if stab.nil?
|
||||
symtab = nlist_ary_or_symtab
|
||||
stab = symtab.stab
|
||||
nlist_ary = symtab.nlist_ary
|
||||
else
|
||||
nlist_ary = nlist_ary_or_symtab
|
||||
end
|
||||
|
||||
load_command(LC_SYMTAB) do |st|
|
||||
st[:nsyms] = nlist_ary.size
|
||||
st[:strsize] = stab.size
|
||||
# symoff and stroff are filled in when offsets are recalculated.
|
||||
end
|
||||
|
||||
# puts ">>> Defining symbol table:"
|
||||
# puts ">>> #{nlist_ary.size} symbols"
|
||||
# puts ">>> stab = #{stab.inspect}"
|
||||
# puts ">>> nlist_ary = #{nlist_ary.inspect}"
|
||||
# puts ">>> (serialized) = #{nlist_ary.map{|n|n.serialize}.join.inspect}"
|
||||
|
||||
@data << nlist_ary.map {|n| n.serialize}.join
|
||||
@data << stab
|
||||
return self
|
||||
end
|
||||
|
||||
|
||||
# Serialize the entire MachO file into a byte string. This is simple
|
||||
# thanks to CStruct#serialize.
|
||||
|
||||
def serialize
|
||||
# TODO sanity checks, e.g. assert(@header[:ncmds] == @load_command.size)
|
||||
# ... perhaps an option to recalculate such data as well.
|
||||
|
||||
# Now that we have all the pieces of the file defined we can calculate
|
||||
# the file offsets of segments and sections.
|
||||
calculate_offsets
|
||||
|
||||
###################################
|
||||
# Mach-O file Part 1: Mach Header #
|
||||
###################################
|
||||
@header.serialize +
|
||||
|
||||
#####################################
|
||||
# Mach-O file Part 2: Load Commands #
|
||||
#####################################
|
||||
# dump each load command (which include the section headers under them)
|
||||
@load_commands.map do |cmd|
|
||||
sects = @sections[cmd[:segname]] rescue []
|
||||
sects.inject(cmd.serialize) do |data, sect|
|
||||
data + sect.serialize
|
||||
end
|
||||
end.join +
|
||||
|
||||
###################################
|
||||
# Mach-O file Part 3: Binary data #
|
||||
###################################
|
||||
@data.join
|
||||
end
|
||||
|
||||
|
||||
# Update the file offsets in segments and sections.
|
||||
|
||||
def calculate_offsets
|
||||
|
||||
# Maintain the offset into the the file on disk. This is used
|
||||
# to update the various structures.
|
||||
offset = @header.bytesize
|
||||
|
||||
# First pass over load commands. Most sizes are filled in here.
|
||||
@load_commands.each do |cmd|
|
||||
case cmd[:cmd]
|
||||
|
||||
when LC_SEGMENT
|
||||
seg = cmd
|
||||
sections = @sections[seg[:segname]]
|
||||
section_size = sections.size * Section.bytesize
|
||||
section_vm_size = sections.inject(0) { |total, sect| total + sect[:size] }
|
||||
section_disk_size = sections.inject(0) do |total, sect|
|
||||
total + @section_disk_size[sect[:sectname]]
|
||||
end
|
||||
|
||||
### TODO this should be redundant. try commenting it out one day.
|
||||
seg[:nsects] = sections.size
|
||||
seg[:cmdsize] = seg.bytesize + section_size
|
||||
###
|
||||
|
||||
seg[:vmsize] = section_vm_size
|
||||
seg[:filesize] = section_disk_size
|
||||
|
||||
when LC_SYMTAB
|
||||
# nop
|
||||
|
||||
else
|
||||
raise "unsupported load command: #{cmd.inspect}"
|
||||
end
|
||||
|
||||
offset += cmd[:cmdsize]
|
||||
end
|
||||
|
||||
|
||||
# offset now points to the end of the Mach-O headers, or the beginning
|
||||
# of the binary blobs of section data at the end.
|
||||
|
||||
# Second pass over load commands. Fill in file offsets.
|
||||
@load_commands.each do |cmd|
|
||||
case cmd[:cmd]
|
||||
|
||||
when LC_SEGMENT
|
||||
seg = cmd
|
||||
sections = @sections[seg[:segname]]
|
||||
seg[:fileoff] = offset
|
||||
sections.each do |sect|
|
||||
sect[:offset] = offset
|
||||
offset += @section_disk_size[sect[:sectname]]
|
||||
end
|
||||
|
||||
when LC_SYMTAB
|
||||
if @reloc_info
|
||||
# update text section with relocation info
|
||||
__text = @sections[@text_segname][@text_sect_index]
|
||||
__text[:reloff] = offset
|
||||
__text[:nreloc] = @reloc_info.length
|
||||
offset += @reloc_info.first.bytesize * @reloc_info.length
|
||||
end
|
||||
st = cmd
|
||||
st[:symoff] = offset
|
||||
offset += st[:nsyms] * Nlist.bytesize
|
||||
st[:stroff] = offset
|
||||
offset += st[:strsize]
|
||||
|
||||
|
||||
# No else clause is necessary, the first iteration should have caught them.
|
||||
|
||||
end
|
||||
|
||||
end # @load_commands.each
|
||||
|
||||
end # def calculate_offsets
|
||||
|
||||
|
||||
#######
|
||||
private
|
||||
#######
|
||||
|
||||
def segname_based_on_filetype(segname)
|
||||
case @header[:filetype]
|
||||
when MH_OBJECT
|
||||
''
|
||||
when MH_EXECUTE
|
||||
segname
|
||||
else
|
||||
raise "unsupported MachO file type: #{@header.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
35
lib/compiler/asm/macho/relocation_info.rb
Normal file
35
lib/compiler/asm/macho/relocation_info.rb
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
require 'compiler/cstruct'
|
||||
|
||||
# The MachO module contains constants and structures related to the
|
||||
# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
|
||||
#
|
||||
# Constants and structures as defined in /usr/include/mach-o/loader.h
|
||||
# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
|
||||
# <mach-o/nlist.h>, and <mach-o/reloc.h>.
|
||||
|
||||
class Compiler
|
||||
module MachO
|
||||
|
||||
class RelocationInfo < CStruct
|
||||
int32 :r_address # offset in the section to what is being relocated
|
||||
uint32 :r_info
|
||||
end
|
||||
|
||||
# NOTE: r_info is a packed bit field with the following members:
|
||||
#
|
||||
# (CStruct should eventually support bitfields, but doesn't right now.)
|
||||
#
|
||||
# r_symbolnum : 24 -- symbol index if r_extern == 1 or section ordinal if r_extern == 0
|
||||
# r_pcrel : 1 -- was relocated pc relative already
|
||||
# r_length : 2 -- 0=byte, 1=word, 2=long, 3=quad
|
||||
# r_extern : 1 -- 1 for exported symbols, 0 othewise
|
||||
# r_type : 4 -- if not 0, machine specific relocation type (always 0)
|
||||
|
||||
R_ABS = 0 # Absolute relocation type
|
||||
# (r_symbolnum == R_ABS for absolute symbols that don't need reloc)
|
||||
|
||||
# Relocation types (r_type)
|
||||
GENERIC_RELOC_VANILLA = 0
|
||||
|
||||
end
|
||||
end
|
||||
34
lib/compiler/asm/macho/section.rb
Normal file
34
lib/compiler/asm/macho/section.rb
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
require 'compiler/cstruct'
|
||||
|
||||
# The MachO module contains constants and structures related to the
|
||||
# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
|
||||
#
|
||||
# Constants and structures as defined in /usr/include/mach-o/loader.h
|
||||
# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
|
||||
# <mach-o/nlist.h>, and <mach-o/reloc.h>.
|
||||
|
||||
class Compiler
|
||||
module MachO
|
||||
|
||||
class Section < CStruct
|
||||
string :sectname, 16
|
||||
string :segname, 16
|
||||
uint32 :addr
|
||||
uint32 :size
|
||||
uint32 :offset
|
||||
uint32 :align
|
||||
uint32 :reloff
|
||||
uint32 :nreloc
|
||||
uint32 :flags
|
||||
uint32 :reserved1
|
||||
uint32 :reserved2
|
||||
end
|
||||
|
||||
# Values for the type bitfield (mask 0x000000ff) of the flags field.
|
||||
# (incomplete!)
|
||||
S_REGULAR = 0x0
|
||||
S_ZEROFILL = 0x1
|
||||
S_CSTRING_LITERALS = 0x2
|
||||
|
||||
end
|
||||
end
|
||||
53
lib/compiler/asm/macho/structs.rb
Normal file
53
lib/compiler/asm/macho/structs.rb
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
require 'compiler/macho/mach_header'
|
||||
require 'compiler/macho/load_commands'
|
||||
require 'compiler/macho/section'
|
||||
require 'compiler/macho/relocation_info'
|
||||
|
||||
# The MachO module contains constants and structures related to the
|
||||
# Mach Object format (Mach-O). They are relevant to Darwin on OS X.
|
||||
#
|
||||
# Constants and structures as defined in /usr/include/mach-o/loader.h
|
||||
# on Mac OS X Leopard (10.5.7). Also see <mach-o/stab.h>,
|
||||
# <mach-o/nlist.h>, and <mach-o/reloc.h>.
|
||||
|
||||
class Compiler
|
||||
module MachO
|
||||
|
||||
########################
|
||||
# Symbol table support #
|
||||
########################
|
||||
|
||||
# Nlist is used to describe symbols.
|
||||
class Nlist < CStruct
|
||||
uint32 :n_strx # Index into string table. Index of zero is the empty string.
|
||||
uint8 :n_type # Type flag (see below).
|
||||
uint8 :n_sect # Section number (from 1) or NO_SECT.
|
||||
uint16 :n_desc # TODO See <mach-o/stab.h>.
|
||||
uint32 :n_value # The symbol's value (or stab offset).
|
||||
end
|
||||
|
||||
# Type flag (see <mach-o/nlist.h> for more details)
|
||||
# ---------
|
||||
#
|
||||
# This field consists of four bitfields:
|
||||
#
|
||||
# uchar N_STAB : 3
|
||||
# uchar N_PEXT : 1
|
||||
# uchar N_TYPE : 3
|
||||
# uchar N_EXT : 1
|
||||
#
|
||||
N_STAB = 0xe0 # if any bits set => symbolic debugging info
|
||||
N_PEXT = 0x10 # private external symbol bit
|
||||
N_TYPE = 0x0e # mask for the type bits
|
||||
N_EXT = 0x01 # external symbol bit, set for external symbols (e.g. globals)
|
||||
|
||||
# Values for N_TYPE. (incomplete!)
|
||||
N_UNDF = 0x0 # undefined, n_sect == NO_SECT
|
||||
N_ABS = 0x2 # absolute, n_sect == NO_SECT
|
||||
N_SECT = 0xe # defined in section number n_sect
|
||||
|
||||
NO_SECT = 0
|
||||
MAX_SECT = 255
|
||||
|
||||
end
|
||||
end
|
||||
31
lib/compiler/asm/macho/symbol.rb
Normal file
31
lib/compiler/asm/macho/symbol.rb
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
require 'compiler/macho'
|
||||
|
||||
class Compiler
|
||||
module MachO
|
||||
|
||||
class Symbol
|
||||
|
||||
attr_accessor :name, :type, :segnum, :desc, :value
|
||||
|
||||
def initialize(name, type, segnum, desc, value)
|
||||
@name = name
|
||||
@type = type
|
||||
@segnum = segnum
|
||||
@desc = desc
|
||||
@value = value
|
||||
end
|
||||
|
||||
|
||||
def to_nlist(strx)
|
||||
Nlist.new(strx, @type, @segnum, @desc, @value)
|
||||
end
|
||||
|
||||
|
||||
def to_s
|
||||
@name
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
88
lib/compiler/asm/macho/symbol_table.rb
Normal file
88
lib/compiler/asm/macho/symbol_table.rb
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
require 'compiler/macho/structs'
|
||||
require 'compiler/macho/symbol'
|
||||
require 'compiler/asm/symbol_table'
|
||||
|
||||
class Compiler
|
||||
module MachO
|
||||
|
||||
class SymbolTable < Assembler::SymbolTable
|
||||
|
||||
def make_symbols(vars, base_addr, type, segnum)
|
||||
# Note: Sorting a Ruby hash gives an alist, e.g. [[<key>, <value>], ...]
|
||||
# We can use map on it as if it were a hash so it works nicely.
|
||||
vars.sort { |a,b| a[1] <=> b[1] }.
|
||||
map do |name, offset|
|
||||
Symbol.new(name, type, segnum, 0, base_addr + offset)
|
||||
end
|
||||
end
|
||||
|
||||
def all_symbols
|
||||
# TODO FIXME:
|
||||
# - the last var exported ends up after main somewhere... WTF?!
|
||||
# - All labels are exported. This should be changed and only functions exported!
|
||||
|
||||
section = 1
|
||||
|
||||
# Functions (section #1, __text)
|
||||
symbols = make_symbols(@labels, text_offset, N_SECT | N_EXT, section)
|
||||
section += 1
|
||||
|
||||
# Constants (section #2, __const)
|
||||
if @consts.size > 0
|
||||
symbols += make_symbols(@consts, const_offset, N_SECT, section)
|
||||
section += 1
|
||||
end
|
||||
|
||||
# Variables (section #3, __bss)
|
||||
if @vars.size > 0
|
||||
symbols += make_symbols(@vars, bss_offset, N_SECT, section)
|
||||
end
|
||||
|
||||
return symbols
|
||||
end
|
||||
|
||||
# this is fairly stupid but works
|
||||
def bss_section
|
||||
@consts.size > 0 ? 3 : 2
|
||||
end
|
||||
|
||||
def nlist_ary
|
||||
symbols = {}
|
||||
strx = 1
|
||||
ary = []
|
||||
all_symbols.each do |sym|
|
||||
key = sym.name.to_sym
|
||||
unless symbols.has_key?(key)
|
||||
symbols[key] = strx
|
||||
strx += sym.name.length + 1 # +1 for the null byte
|
||||
end
|
||||
ary << sym.to_nlist(symbols[key])
|
||||
end
|
||||
return ary
|
||||
end
|
||||
|
||||
def stab
|
||||
# The empty strings result in a string that begins and ends with a null byte
|
||||
['', all_symbols, ''].flatten.map { |sym| sym.to_s }.join("\0")
|
||||
end
|
||||
|
||||
def reloc(r_address, r_symbolnum = 0, r_length = 2, r_extern = 0, r_pcrel = 0, r_type = 0)
|
||||
r_info = (r_type << 28) | (r_extern << 27) | (r_length << 25) |
|
||||
(r_pcrel << 24) | r_symbolnum
|
||||
@reloc_info << RelocationInfo.new(r_address, r_info)
|
||||
end
|
||||
|
||||
def reloc_info
|
||||
n = bss_section
|
||||
@reloc_info.each {|r| r[:r_info] |= n}
|
||||
end
|
||||
|
||||
def calculate_offsets(text_size)
|
||||
@const_offset = @text_offset + text_size
|
||||
@bss_offset = @const_offset + @const_size
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
28
lib/compiler/asm/object_file.rb
Normal file
28
lib/compiler/asm/object_file.rb
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
class Compiler
|
||||
module ASM
|
||||
|
||||
class UnimplementedMethodError < RuntimeError; end
|
||||
|
||||
|
||||
# Abstract base class.
|
||||
class ObjWriter
|
||||
|
||||
def write!(filename)
|
||||
File.open(filename, 'wb') do |file|
|
||||
file.print(serialize)
|
||||
end
|
||||
end
|
||||
|
||||
def fail(name)
|
||||
raise UnimplementedMethodError.new(name)
|
||||
end
|
||||
|
||||
# These methods must be defined for most uses of the library.
|
||||
%w[header segment section text data bss symtab serialize].each do |name|
|
||||
define_method(name) { fail(name) }
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
70
lib/compiler/asm/register_proxy.rb
Normal file
70
lib/compiler/asm/register_proxy.rb
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
class Compiler
|
||||
module ASM
|
||||
|
||||
# Acts like a register and can be used as the base or index in an
|
||||
# effective address.
|
||||
#
|
||||
# e.g. [EAX] or [ESI+EBX] or [EAX + 0xff] or [EAX + EDX * 2]
|
||||
class RegisterProxy
|
||||
|
||||
attr_reader :name, :size, :regnum
|
||||
attr_reader :base, :index, :scale
|
||||
|
||||
|
||||
def initialize(name, size, regnum)
|
||||
@name = name # attrs are read-only so sharing is ok
|
||||
@size = size
|
||||
@regnum = regnum
|
||||
@base = self
|
||||
end
|
||||
|
||||
|
||||
def +(index)
|
||||
raise "index already specified" if @index
|
||||
new_reg = self.clone
|
||||
new_reg.instance_variable_set('@index', index)
|
||||
new_reg
|
||||
end
|
||||
|
||||
|
||||
def *(scale)
|
||||
raise "index must come first" unless @index
|
||||
raise "scale already specified" if scale
|
||||
raise "unsupported scale: #{scale}" unless scale.to_s.match(/^[1248]$/)
|
||||
@scale = scale
|
||||
self
|
||||
end
|
||||
|
||||
|
||||
def scale?
|
||||
@scale
|
||||
end
|
||||
|
||||
|
||||
def index?
|
||||
@index
|
||||
end
|
||||
|
||||
|
||||
def register?
|
||||
@scale.nil? && @index.nil?
|
||||
end
|
||||
|
||||
|
||||
|
||||
def to_s
|
||||
[ @name.to_s,
|
||||
@index && "+#{@index}",
|
||||
@scale && "*#{@scale}"
|
||||
].compact.join
|
||||
end
|
||||
|
||||
|
||||
def inspect
|
||||
to_s
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
99
lib/compiler/asm/symbol_table.rb
Normal file
99
lib/compiler/asm/symbol_table.rb
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
class Compiler
|
||||
module ASM
|
||||
|
||||
|
||||
# Abstract symbol table.
|
||||
#
|
||||
# Basically a big map of variable, constant, and label names to
|
||||
# offsets within their respective sections. Final addresses are
|
||||
# calculated from these offsets on the 2nd pass when we know where
|
||||
# things will actually live in memory.
|
||||
|
||||
class SymbolTable
|
||||
|
||||
attr_accessor :text_offset, :bss_offset, :const_offset
|
||||
attr_reader :const_data, :const_size, :bss_size, :reloc_info
|
||||
|
||||
def initialize
|
||||
@vars = {} # Map of variable names to offsets. (bss vars)
|
||||
@consts = {} # Map of constant names to offsets.
|
||||
@funcs = {} # map of function names to offsets.
|
||||
|
||||
# Initial data to load into memory (data for __DATA segment).
|
||||
@const_data = ''
|
||||
|
||||
@const_size = 0 # Size of const section.
|
||||
@bss_size = 0 # Size of bss section.
|
||||
|
||||
# Map names to locations.
|
||||
@labels = Hash.new {|h, key| raise "undefined label: #{key}"}
|
||||
@num_labels = 0 # Used to generate unique labels.
|
||||
@num_labels_with_suffix = Hash.new(0)
|
||||
|
||||
# Relocation info. Subclasses should define a reloc method.
|
||||
@reloc_info = []
|
||||
|
||||
@text_offset = 0
|
||||
@bss_offset = 0
|
||||
@const_offset = 0
|
||||
end
|
||||
|
||||
# Generate a unique label.
|
||||
def unique_label(suffix = nil)
|
||||
@num_labels += 1
|
||||
if suffix
|
||||
@num_labels_with_suffix[suffix] += 1
|
||||
suffix = "_#{suffix}_#{@num_labels_with_suffix[suffix]}"
|
||||
end
|
||||
name = "L#{sprintf "%06d", @num_labels}#{suffix}"
|
||||
return name
|
||||
end
|
||||
|
||||
def define_label(name, offset)
|
||||
@labels[name] = offset
|
||||
return name
|
||||
end
|
||||
|
||||
|
||||
def lookup_label(name)
|
||||
@labels[name]
|
||||
end
|
||||
|
||||
|
||||
def define_var(name, bytes)
|
||||
@vars[name] = @bss_size
|
||||
@bss_size += bytes
|
||||
end
|
||||
|
||||
|
||||
def define_const(name, value, bytes)
|
||||
@consts[name] = @const_size
|
||||
@const_size += bytes
|
||||
@const_data << [value].pack('i')
|
||||
end
|
||||
|
||||
def define_func(name, offset)
|
||||
@funcs[name] = offset
|
||||
end
|
||||
|
||||
|
||||
def var(name)
|
||||
@vars[name]
|
||||
end
|
||||
|
||||
def var?(name)
|
||||
@vars.has_key?(name)
|
||||
end
|
||||
|
||||
def const(name)
|
||||
@consts[name]
|
||||
end
|
||||
|
||||
def const?(name)
|
||||
@consts.has_key?(name)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
73
lib/compiler/asm/text_assembler.rb
Normal file
73
lib/compiler/asm/text_assembler.rb
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
# sjs
|
||||
# may 2009
|
||||
|
||||
require 'compiler/asm/assembler'
|
||||
|
||||
class Compiler
|
||||
module ASM
|
||||
|
||||
class TextAssembler < Assembler
|
||||
|
||||
def initialize(delegate)
|
||||
super(delegate)
|
||||
|
||||
@vars = {} # Symbol table, maps names to locations in BSS.
|
||||
@data = ''
|
||||
@bss = ''
|
||||
@code = ''
|
||||
|
||||
unless File.readable?(template_filename)
|
||||
raise "unsupported platform/arch: #{delegate.platform}/#{arch.name}"
|
||||
end
|
||||
end
|
||||
|
||||
def template_filename
|
||||
@template_filename ||= File.join(File.dirname(__FILE__), arch.name, "template.#{delegate.platform}.asm")
|
||||
end
|
||||
|
||||
# Define a constant
|
||||
def const(name, value)
|
||||
end
|
||||
|
||||
# Define a variable with the given name and size in bytes.
|
||||
def define_var(name, bytes = arch.bytes)
|
||||
unless var?(name)
|
||||
define_var_impl(name, bytes)
|
||||
else
|
||||
STDERR.puts "[warning] attempted to redefine #{name}"
|
||||
end
|
||||
end
|
||||
|
||||
def define_var_impl(name, bytes = arch.bytes)
|
||||
end
|
||||
|
||||
def var(name)
|
||||
@vars[name]
|
||||
end
|
||||
alias_method :var?, :var
|
||||
|
||||
|
||||
# Emit a line of code wrapped between a tab and a newline.
|
||||
def emit(code, options = {})
|
||||
tab = options.has_key?(:tab) ? options[:tab] : "\t"
|
||||
@code << "#{tab}#{code}\n"
|
||||
end
|
||||
|
||||
def label(name = nil)
|
||||
# FIXME
|
||||
name = super
|
||||
@labels[name] = name
|
||||
return name
|
||||
end
|
||||
|
||||
def output
|
||||
end
|
||||
|
||||
def emit_label(name = label)
|
||||
emit("#{name}:", tab: nil)
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
43
lib/compiler/asm/variable_proxy.rb
Normal file
43
lib/compiler/asm/variable_proxy.rb
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
class Compiler
|
||||
module ASM
|
||||
|
||||
# Wrap a variable's address so that we can perform arithmetic on it
|
||||
# before resolving it when we know where things will go in memory.
|
||||
# All we do is catch arithmetic ops and then provide a means to
|
||||
# resolve a final addres by replaying them later.
|
||||
#
|
||||
# e.g. [symtab.var('i')] or [symtab.var('i') * 2]
|
||||
class VariableProxy
|
||||
|
||||
attr_reader :name
|
||||
attr_accessor :ops
|
||||
|
||||
def initialize(name)
|
||||
@name = name
|
||||
@ops = []
|
||||
end
|
||||
|
||||
%w[+ * / - % & |].each do |op|
|
||||
define_method(op) do |*args|
|
||||
new_proxy = self.class.new(@name)
|
||||
new_proxy.ops << [op, *args]
|
||||
return new_proxy
|
||||
end
|
||||
end
|
||||
|
||||
# XXX should this perhaps use the offset instead?
|
||||
def resolve(base_addr)
|
||||
@ops.inject(base_addr) do |addr, op|
|
||||
addr.send(*op)
|
||||
end
|
||||
end
|
||||
|
||||
# Overriden by ConstantProxy
|
||||
def const?
|
||||
false
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
42
lib/compiler/asm/x86/arch.rb
Normal file
42
lib/compiler/asm/x86/arch.rb
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
require 'compiler/asm/arch'
|
||||
|
||||
class Compiler
|
||||
module ASM
|
||||
module X86
|
||||
|
||||
module Arch
|
||||
|
||||
BINARY_PREAMBLE = {
|
||||
'linux' => [],
|
||||
|
||||
'darwin' => [ 0x55, # push ebp
|
||||
0x89, 0xe5, # mov ebp, esp
|
||||
0x81, 0xec, 8, 0, 0, 0 # sub esp, 8
|
||||
]
|
||||
}
|
||||
|
||||
BINARY_POSTAMBLE = {
|
||||
'linux' => [ 0x89, 0xc3, # mov ebx, eax (exit code)
|
||||
0xb8, 1, 0, 0, 0, # mov eax, 1
|
||||
0xcd, 0x80 # int 0x80
|
||||
],
|
||||
|
||||
'darwin' => [ 0xc9, # leave
|
||||
0xc3 # ret
|
||||
]
|
||||
}
|
||||
|
||||
def self.instance
|
||||
@instance ||= ASM::Arch.new({
|
||||
'bits' => 32,
|
||||
'word_bits' => 16,
|
||||
'preamble' => BINARY_PREAMBLE,
|
||||
'postamble' => BINARY_POSTAMBLE
|
||||
})
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
866
lib/compiler/asm/x86/binary_assembler.rb
Normal file
866
lib/compiler/asm/x86/binary_assembler.rb
Normal file
|
|
@ -0,0 +1,866 @@
|
|||
# A very basic x86 assembler library for Ruby. Generally the
|
||||
# instructions implemented are the minimum needed by the compiler this
|
||||
# is written for. x86 is just too big.
|
||||
#
|
||||
# sjs
|
||||
# may 2009
|
||||
#
|
||||
# Refer to the Intel[1] or AMD documentationon on x86 for explanations
|
||||
# of Mod-R/M encoding, the Scale-Index-Base (SIB) byte, opcode groups.
|
||||
#
|
||||
# The start and exit shell codes were obtained by disassembling
|
||||
# minimal binaries on the respective platforms.
|
||||
|
||||
require 'json'
|
||||
require 'compiler/asm/binary_assembler'
|
||||
require 'compiler/asm/x86/arch'
|
||||
require 'compiler/asm/x86/registers'
|
||||
|
||||
class Compiler
|
||||
module ASM
|
||||
module X86
|
||||
|
||||
class BinaryAssembler < ASM::BinaryAssembler
|
||||
|
||||
include Registers
|
||||
|
||||
DEBUG_OUTPUT = false
|
||||
|
||||
SIGNED_BYTE = -128..127
|
||||
|
||||
# This is used for encoding instructions. Just as the equivalent
|
||||
# assembly would contain "BITS 32", binary is generated for 32-bit
|
||||
# protected mode.
|
||||
DEFAULT_OPERAND_SIZE = :dword
|
||||
|
||||
SIZE_MAP = {
|
||||
byte: 8,
|
||||
word: 16,
|
||||
dword: 32
|
||||
}
|
||||
|
||||
def emit_entry_point
|
||||
# Always include the _main entry point in our symbol table. It begins at the
|
||||
# beginning of the __TEXT segment, 0x0.
|
||||
define_label('_main')
|
||||
end
|
||||
|
||||
# register for return values
|
||||
def return_reg
|
||||
EAX
|
||||
end
|
||||
|
||||
|
||||
### Virtual ISA used by parser.
|
||||
|
||||
def load(n)
|
||||
mov(return_reg, n)
|
||||
end
|
||||
|
||||
def load_var(name)
|
||||
mov(return_reg, [var(name)])
|
||||
end
|
||||
|
||||
def store_var(name, reg)
|
||||
mov([name], reg)
|
||||
end
|
||||
|
||||
|
||||
# stack_* methods expect op1 on the stack
|
||||
|
||||
def stack_add(reg)
|
||||
pop(EBX)
|
||||
add(reg, EBX)
|
||||
end
|
||||
|
||||
def stack_sub(reg)
|
||||
pop(EBX)
|
||||
sub(reg, EBX)
|
||||
end
|
||||
|
||||
def stack_mul_signed(reg)
|
||||
pop(EBX)
|
||||
imul(EBX)
|
||||
end
|
||||
|
||||
def stack_div(reg)
|
||||
pop(EBX) # Get op1
|
||||
xchg(reg, EBX) # Swap the divisor and dividend into
|
||||
# the correct places.
|
||||
|
||||
# idiv uses edx:eax as the dividend so we need to ensure that edx
|
||||
# is correctly sign-extended w.r.t. eax.
|
||||
cdq # Sign-extend eax into edx (Convert Double to Quad).
|
||||
|
||||
idiv(EBX) # Divide a (eax) by b (ebx).
|
||||
end
|
||||
|
||||
def stack_or(reg)
|
||||
pop(EBX)
|
||||
self.or(reg)
|
||||
end
|
||||
|
||||
def stack_xor(reg)
|
||||
pop(EBX)
|
||||
xor(reg)
|
||||
end
|
||||
|
||||
def stack_and(reg)
|
||||
pop(EBX)
|
||||
self.and(reg)
|
||||
end
|
||||
|
||||
def compare(reg, n)
|
||||
cmp(reg, n)
|
||||
end
|
||||
|
||||
def mov_reg_imm(reg, imm)
|
||||
mov(reg, imm)
|
||||
end
|
||||
|
||||
|
||||
############################
|
||||
### Instruction Encoding ###
|
||||
############################
|
||||
|
||||
def emit_dword(num)
|
||||
num_to_quad(num).each { |byte| emit_byte(byte) }
|
||||
end
|
||||
|
||||
def emit_modrm(addr, reg = 0)
|
||||
mod = 0
|
||||
rm = 0
|
||||
disp8 = nil
|
||||
disp32 = nil
|
||||
sib = nil
|
||||
var = nil # variable proxy
|
||||
|
||||
# effective address
|
||||
if addr.is_a?(Array)
|
||||
eff_addr = addr[1] || addr[0] # works with or without size prefix
|
||||
raise "invalid effective address: #{addr.inspect}" unless eff_addr
|
||||
case eff_addr
|
||||
|
||||
when RegisterProxy
|
||||
|
||||
# Simple register addressing, e.g. [ESI].
|
||||
#
|
||||
# mod == 00
|
||||
if eff_addr.register?
|
||||
mod = 0
|
||||
|
||||
# [ESP] and [EBP] can't be encoded directly. The
|
||||
# workaround is to use SIB to emit the code for [ESP+0]
|
||||
# and [EBP+0] instead.
|
||||
#
|
||||
# To emit [ESP+0] we use SIB with scale=1 index=0 base=ESP.
|
||||
if eff_addr == ESP
|
||||
rm = 4 # SIB
|
||||
sib = make_sib(1, 0, eff_addr)
|
||||
|
||||
# For [EBP+0] we can encode [EBP]+disp8 directly.
|
||||
elsif eff_addr == EBP
|
||||
mod = 1
|
||||
rm = eff_addr.regnum
|
||||
disp8 = 0
|
||||
else
|
||||
rm = eff_addr.regnum
|
||||
end
|
||||
|
||||
# Bare displacements, e.g. [32] or [0x1234abcd]
|
||||
elsif eff_addr.index? && eff_addr.index.is_a?(Numeric)
|
||||
|
||||
# disp8, mod == 01
|
||||
if SIGNED_BYTE === eff_addr.index
|
||||
mod = 1
|
||||
disp8 = eff_addr.index
|
||||
|
||||
# disp32, mod == 10
|
||||
elsif SignedRange === eff_addr.index
|
||||
mod = 2
|
||||
disp32 = eff_addr.index
|
||||
|
||||
else
|
||||
raise "address must fit in 32 bits, this doesn't: #{eff_addr.index}"
|
||||
end
|
||||
|
||||
# SIB
|
||||
elsif eff_addr.index?
|
||||
# scale-index-base, mod == 00 and rm == 100
|
||||
rm = 4
|
||||
sib = make_sib(eff_addr.scale || 1, eff_addr.index, eff_addr.base)
|
||||
|
||||
else
|
||||
raise "unsupported effective address: #{addr.inspect}"
|
||||
end
|
||||
|
||||
# disp32, mod == 00
|
||||
when Numeric
|
||||
mod = 0
|
||||
rm = 5 # 101
|
||||
disp32 = eff_addr
|
||||
|
||||
when VariableProxy
|
||||
mod = 0
|
||||
rm = 5
|
||||
var = eff_addr
|
||||
|
||||
else
|
||||
raise "unsupported effective address: #{addr.inspect}"
|
||||
end
|
||||
|
||||
# register content, mod == 11
|
||||
elsif addr.register?
|
||||
mod = 3
|
||||
rm = addr.regnum
|
||||
|
||||
# XXX TODO elsif addr.respond_to?(:name)
|
||||
# (VariableProxy) => [:(var|const), addr.name]
|
||||
#
|
||||
# i.e. a pointer to that var
|
||||
|
||||
else
|
||||
raise "unsupported effective address: #{addr.inspect}"
|
||||
end
|
||||
|
||||
emit_byte((mod << 6) | (reg << 3) | rm)
|
||||
emit_byte(sib) if sib
|
||||
|
||||
emit_byte(disp8) if disp8
|
||||
|
||||
emit_dword(disp32) if disp32
|
||||
emit_var(var) if var
|
||||
end
|
||||
|
||||
|
||||
def make_sib(scale, index, base)
|
||||
if [1,2,4,8].include?(scale)
|
||||
scale = log2(scale).to_i
|
||||
else
|
||||
raise "unsupported SIB scale: #{scale}, should be 1, 2, 4, or 8"
|
||||
end
|
||||
if index == 0
|
||||
index = 4
|
||||
elsif index.respond_to?(:regnum)
|
||||
index = index.regnum
|
||||
end
|
||||
base = base.regnum if base.respond_to?(:regnum)
|
||||
return (scale << 6) | (index << 3) | base
|
||||
end
|
||||
|
||||
|
||||
def register?(op, size = DEFAULT_OPERAND_SIZE)
|
||||
op.is_a?(RegisterProxy) && op.size == size ||
|
||||
op.respond_to?(:size) && op.size == SIZE_MAP[size]
|
||||
end
|
||||
|
||||
def immediate?(op, size = DEFAULT_OPERAND_SIZE)
|
||||
bits = SIZE_MAP[size] || size
|
||||
op.is_a?(Numeric) && op >= -(2 ** bits / 2) && op <= (2 ** bits - 1)
|
||||
end
|
||||
|
||||
# Return true if op is a valid operand of the specified size.
|
||||
# (:byte, :word, :dword)
|
||||
#
|
||||
# Valid operands are:
|
||||
#
|
||||
# * registers
|
||||
#
|
||||
# * effective addresses (wrapped in an array to look like nasm code)
|
||||
#
|
||||
# XXX This method is pretty ugly.
|
||||
def rm?(op, size = DEFAULT_OPERAND_SIZE)
|
||||
is_register = register?(op, size)
|
||||
|
||||
if op.is_a?(Array)
|
||||
case op.size
|
||||
|
||||
# [register/memory]
|
||||
when 1
|
||||
is_reg_or_mem = [Numeric, RegisterProxy, VariableProxy].include?(op[0].class)
|
||||
|
||||
# [<size>, memory]
|
||||
when 2
|
||||
is_size_and_mem = op[0] == size && [Numeric, RegisterProxy, VariableProxy].include?(op[1].class)
|
||||
|
||||
end
|
||||
|
||||
else
|
||||
is_reg_or_mem = false
|
||||
is_size_and_mem = false
|
||||
end
|
||||
|
||||
is_register || is_reg_or_mem || is_size_and_mem
|
||||
end
|
||||
|
||||
def offset?(addr, size = DEFAULT_OPERAND_SIZE)
|
||||
addr.is_a?(Array) && (addr[0].is_a?(Numeric) || addr[0].is_a?(VariableProxy))
|
||||
end
|
||||
|
||||
def constant?(op)
|
||||
immediate?(op) || offset?(op)
|
||||
end
|
||||
|
||||
def log2(x, tol = 1e-13)
|
||||
result = 0.0
|
||||
|
||||
# Integer part
|
||||
while x < 1
|
||||
resultp -= 1
|
||||
x *= 2
|
||||
end
|
||||
while x >= 2
|
||||
result += 1
|
||||
x /= 2
|
||||
end
|
||||
|
||||
# Fractional part
|
||||
fp = 1.0
|
||||
while fp >= tol
|
||||
fp /= 2
|
||||
x *= x
|
||||
if x >= 2
|
||||
x /= 2
|
||||
result += fp
|
||||
end
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
|
||||
# 9 versions of the mov instruction are supported:
|
||||
# 1. mov reg32, immediate32
|
||||
# 2a. mov reg32, r/m32
|
||||
# 2b. mov eax, memoffset32
|
||||
# 3a. mov r/m32, reg32
|
||||
# 3b. mov memoffset32, eax
|
||||
# 4. mov r/m32, immediate32
|
||||
# 5. mov r/m8, imm8
|
||||
# 6. mov reg8, r/m8
|
||||
# 7. mov r/m8, reg8
|
||||
def mov(dest, src)
|
||||
|
||||
# These 2 are used in the same way, just the name differs to make the
|
||||
# meaning clear. They are 4-byte values that are emited at the end if
|
||||
# they are non-nil. Only one of them will be emited, and if both are
|
||||
# non-nil that one is immediate.
|
||||
immediate = nil
|
||||
offset = nil
|
||||
|
||||
# This is an array of arguments to be passed to emit_modrm, if it is set.
|
||||
modrm = nil
|
||||
|
||||
# version 1: mov r32, imm32
|
||||
if register?(dest) && immediate?(src)
|
||||
opcode = 0xb8 + dest.regnum # dest encoded in instruction
|
||||
immediate = src
|
||||
|
||||
# version 2a: mov r32, r/m32
|
||||
elsif register?(dest) && rm?(src)
|
||||
# version 2b: mov eax, moffs32
|
||||
if dest == EAX && offset?(src)
|
||||
opcode = 0xa1
|
||||
offset = src[0]
|
||||
else
|
||||
opcode = 0x8b
|
||||
modrm = [src, dest.regnum]
|
||||
end
|
||||
|
||||
# version 3a: mov r/m32, r32
|
||||
elsif rm?(dest) && register?(src)
|
||||
# version 3b: mov moffs32, eax
|
||||
if offset?(dest) && src == EAX
|
||||
opcode = 0xa3
|
||||
offset = dest[0]
|
||||
else
|
||||
opcode = 0x89
|
||||
modrm = [dest, src.regnum]
|
||||
end
|
||||
|
||||
# version 4: mov r/m32, imm32
|
||||
elsif rm?(dest) && immediate?(src)
|
||||
opcode = 0xc7
|
||||
modrm = [dest, 0]
|
||||
immediate = src
|
||||
|
||||
# version 5: mov r/m8, imm8
|
||||
#
|
||||
# It's important that this check is first because src integers can
|
||||
# pass the register? check in version 7.
|
||||
elsif rm?(dest, :byte) && immediate?(src, :byte)
|
||||
opcode = 0xc6
|
||||
modrm = [dest, 0]
|
||||
immediate_byte = src
|
||||
|
||||
# version 6: mov r8, r/m8
|
||||
elsif register?(dest, :byte) && rm?(src, :byte)
|
||||
opcode = 0x8a
|
||||
modrm = [src, dest.regnum]
|
||||
|
||||
# version 7: mov r/m8, r8
|
||||
elsif rm?(dest, :byte) && register?(src, :byte)
|
||||
opcode = 0x88
|
||||
modrm = [dest, src.regnum]
|
||||
|
||||
else
|
||||
# puts "rm?(dest): #{rm?(dest)}\t\trm?(src): #{rm?(src)}"
|
||||
# puts "register?(dest): #{register?(dest)}\t\tregister?(src): #{register?(src)}"
|
||||
# puts "immediate?(dest): #{immediate?(dest)}\t\timmediate?(src): #{immediate?(src)}"
|
||||
# puts "offset?(dest): #{offset?(dest)}\t\toffset?(src): #{offset?(src)}"
|
||||
# puts "rm?(dest, :byte): #{rm?(dest)}\t\trm?(src, :byte): #{rm?(src, :byte)}"
|
||||
# puts "immediate?(dest, :byte): #{immediate?(dest)}\t\timmediate?(src, :byte): #{immediate?(src, :byte)}"
|
||||
raise "unsupported MOV instruction, #{dest.inspect}, #{src.inspect}"
|
||||
end
|
||||
|
||||
dword = immediate || offset
|
||||
|
||||
asm do
|
||||
emit_byte(opcode)
|
||||
emit_modrm(*modrm) if modrm
|
||||
|
||||
if dword.is_a?(VariableProxy)
|
||||
if dword.const?
|
||||
emit_const(dword)
|
||||
else
|
||||
emit_var(dword)
|
||||
end
|
||||
|
||||
elsif dword
|
||||
emit_dword(dword)
|
||||
|
||||
elsif immediate_byte
|
||||
emit_byte(immediate_byte)
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def movzx(dest, src)
|
||||
|
||||
# movzx Gv, ??
|
||||
if register?(dest)
|
||||
|
||||
opcode = case
|
||||
when rm?(src, :byte)
|
||||
0xb6 # movzx Gv, Eb
|
||||
when rm?(src, :word)
|
||||
0xb7 # movzx Gv, Ew
|
||||
else
|
||||
raise "unsupported MOVZX instruction, dest=#{dest.inspect} << src=#{src.inspect} >>"
|
||||
end
|
||||
asm do
|
||||
emit_byte(0x0f)
|
||||
emit_byte(opcode)
|
||||
emit_modrm(src, dest.regnum)
|
||||
end
|
||||
|
||||
else
|
||||
|
||||
raise "unimplemented MOVZX instruction, << dest=#{dest.inspect} >> src=#{src.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def xchg(dest, src)
|
||||
if dest == EAX && register?(src)
|
||||
asm { emit_byte(0x90 + src.regnum) }
|
||||
# swap the args if EAX comes last so we only need to handle one case below.
|
||||
elsif src == EAX && register?(dest)
|
||||
xchg(src, dest)
|
||||
elsif rm?(dest) && register?(src)
|
||||
asm do
|
||||
emit_byte(0x87)
|
||||
emit_modrm(dest, src.regnum)
|
||||
end
|
||||
elsif register?(dest) && rm?(src)
|
||||
asm do
|
||||
emit_byte(0x87)
|
||||
emit_modrm(src, dest.regnum)
|
||||
end
|
||||
else
|
||||
raise "unsupported XCHG instruction, dest=#{dest.inspect} src=#{src.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
# convert double to quad (sign-extend EAX into EDX)
|
||||
def cdq
|
||||
asm { emit_byte(0x99) }
|
||||
end
|
||||
|
||||
|
||||
def add(dest, src)
|
||||
# add r/m32, imm8
|
||||
if rm?(dest) && immediate?(src, :byte)
|
||||
asm do
|
||||
emit_byte(0x83)
|
||||
emit_modrm(dest, 0)
|
||||
emit_byte(src)
|
||||
end
|
||||
|
||||
# add r/m32, imm32
|
||||
elsif rm?(dest) && immediate?(src)
|
||||
asm do
|
||||
emit_byte(0x81)
|
||||
emit_modrm(dest, 0)
|
||||
emit_dword(src)
|
||||
end
|
||||
|
||||
# add eax, imm32
|
||||
elsif dest == EAX && immediate?(src)
|
||||
asm do
|
||||
emit_byte(0x05)
|
||||
emit_dword(src)
|
||||
end
|
||||
|
||||
# add reg32, r/m32
|
||||
elsif register?(dest) && rm?(src)
|
||||
asm do
|
||||
emit_byte(0x03)
|
||||
emit_modrm(src, dest.regnum)
|
||||
end
|
||||
|
||||
else
|
||||
raise "unsupported ADD instruction, dest=#{dest.inspect} src=#{src.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def sub(dest, src)
|
||||
# sub r/m32, imm8
|
||||
if rm?(dest) && immediate?(src, :byte)
|
||||
asm do
|
||||
emit_byte(0x83)
|
||||
emit_modrm(dest, 5)
|
||||
emit_byte(src)
|
||||
end
|
||||
|
||||
# sub r/m32, imm32
|
||||
elsif rm?(dest) && immediate?(src)
|
||||
asm do
|
||||
emit_byte(0x81)
|
||||
emit_modrm(dest, 5)
|
||||
emit_dword(src)
|
||||
end
|
||||
|
||||
# sub r/m32, reg32
|
||||
elsif rm?(dest) && register?(src)
|
||||
asm do
|
||||
emit_byte(0x29)
|
||||
emit_modrm(dest, src.regnum)
|
||||
end
|
||||
|
||||
# sub reg32, r/m32
|
||||
elsif register?(dest) && rm?(src)
|
||||
asm do
|
||||
emit_byte(0x2b)
|
||||
emit_modrm(src, dest.regnum)
|
||||
end
|
||||
|
||||
else
|
||||
raise "unsupported SUB instruction, dest=#{dest.inspect} src=#{src.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Signed multiply.
|
||||
def imul(*ops)
|
||||
case ops.size
|
||||
|
||||
when 1
|
||||
group3(ops[0], 5, 'IMUL')
|
||||
|
||||
when 2
|
||||
dest, src = ops
|
||||
raise "unsupported IMUL instruction, dest=#{dest.inspect} src=#{src.inspect}"
|
||||
|
||||
else
|
||||
raise ArgumentError, "IMUL accepts exactly 1 or 2 operands (got #{ops.inspect})"
|
||||
end
|
||||
end
|
||||
|
||||
# Unsigned multiply.
|
||||
def mul(op)
|
||||
group3(op, 4, 'MUL')
|
||||
end
|
||||
|
||||
|
||||
# Signed divide.
|
||||
def idiv(op)
|
||||
group3(op, 7, 'IDIV')
|
||||
end
|
||||
|
||||
# Unsigned divide.
|
||||
def div(op)
|
||||
group3(op, 6, 'DIV')
|
||||
end
|
||||
|
||||
|
||||
def inc(op)
|
||||
asm do
|
||||
if register?(op)
|
||||
emit_byte(0x40 + regnum(op))
|
||||
elsif rm?(op)
|
||||
# emit_byte(0xff)
|
||||
raise "unimplemented"
|
||||
else
|
||||
raise "unsupported op #{op}, wanted r32 or r/m32"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def dec(op)
|
||||
if register?(op)
|
||||
# dec reg32
|
||||
asm { emit_byte(0x48 + op.regnum) }
|
||||
else
|
||||
raise "unsupported DEC instruction, op=#{op.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def shr(op, n)
|
||||
|
||||
# shr r/m??, imm8
|
||||
if SIGNED_BYTE === n
|
||||
|
||||
opcode = register?(op, :byte) ? 0xc0 : 0xc1
|
||||
|
||||
asm do
|
||||
emit_byte(opcode)
|
||||
emit_modrm(op, 5)
|
||||
emit_byte(n)
|
||||
end
|
||||
|
||||
else
|
||||
raise "unsupported SHR instruction, op=#{op.inspect}, n=#{n.inspect}"
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
def and_(dest, src)
|
||||
if rm?(dest) && register?(src)
|
||||
asm do
|
||||
emit_byte(0x21)
|
||||
emit_modrm(dest, src.regnum)
|
||||
end
|
||||
elsif rm?(dest, 8) && immediate?(src, 8)
|
||||
asm do
|
||||
emit_byte(0x80)
|
||||
emit_modrm(dest, 4)
|
||||
emit_byte(src)
|
||||
end
|
||||
else
|
||||
raise "unsupported AND instruction: dest=#{dest.inspect}, src=#{src.inspect}"
|
||||
end
|
||||
end
|
||||
alias_method :and, :and_
|
||||
|
||||
def or_(dest, src)
|
||||
if rm?(dest) && register?(src)
|
||||
asm do
|
||||
emit_byte(0x9)
|
||||
emit_modrm(dest, src.regnum)
|
||||
end
|
||||
elsif rm?(dest, 8) && immediate?(src, 8)
|
||||
asm do
|
||||
emit_byte(0x80)
|
||||
emit_modrm(dest, 1)
|
||||
emit_byte(src)
|
||||
end
|
||||
else
|
||||
raise "unsupported OR instruction: dest=#{dest.inspect}, src=#{src.inspect}"
|
||||
end
|
||||
end
|
||||
alias_method :or, :or_
|
||||
|
||||
def xor(dest, src)
|
||||
# xor r/m32, reg32
|
||||
if rm?(dest) && register?(src)
|
||||
asm do
|
||||
emit_byte(0x31)
|
||||
emit_modrm(dest, src.regnum)
|
||||
end
|
||||
|
||||
else
|
||||
raise "unsupported XOR instruction, dest=#{dest.inspect} src=#{src.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def not_(op)
|
||||
group3(op, 2, 'NOT')
|
||||
end
|
||||
alias_method :not, :not_
|
||||
|
||||
|
||||
def neg(op)
|
||||
group3(op, 3, 'NEG')
|
||||
end
|
||||
|
||||
|
||||
def push(op)
|
||||
# push reg32
|
||||
if register?(op)
|
||||
asm { emit_byte(0x50 + op.regnum) }
|
||||
|
||||
elsif immediate?(op, :byte)
|
||||
asm do
|
||||
emit_byte(0x6a)
|
||||
emit_byte(op)
|
||||
end
|
||||
|
||||
elsif immediate?(op)
|
||||
asm do
|
||||
emit_byte(0x68)
|
||||
emit_dword(op)
|
||||
end
|
||||
|
||||
else
|
||||
raise "unsupported PUSH instruction: op=#{op.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def pop(op)
|
||||
# pop reg32
|
||||
if register?(op)
|
||||
asm { emit_byte(0x58 + op.regnum) }
|
||||
|
||||
else
|
||||
raise "unsupported POP instruction: op=#{op.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def cmp(op1, op2)
|
||||
# cmp r/m32, reg32
|
||||
if rm?(op1) && register?(op2)
|
||||
asm do
|
||||
emit_byte(0x39)
|
||||
emit_modrm(op1, op2.regnum)
|
||||
end
|
||||
|
||||
# cmp eax, imm32
|
||||
elsif op1 == EAX && immediate?(op2)
|
||||
asm do
|
||||
emit_byte(0x3d)
|
||||
emit_dword(op2)
|
||||
end
|
||||
|
||||
else
|
||||
raise "unsupported CMP instruction: op1=#{op1.inspect} op2=#{op2.inspect}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Only jmp rel32 is supported.
|
||||
def jmp(label)
|
||||
asm do
|
||||
emit_byte(0xe9)
|
||||
emit_label(label)
|
||||
end
|
||||
end
|
||||
|
||||
# These all jump near (rel32).
|
||||
JccOpcodeMap = Hash.new { |key| raise "unsupported Jcc instruction: #{key}" }.
|
||||
merge({
|
||||
:jc => 0x82, # carry (CF=1)
|
||||
:je => 0x84, # equal (ZF=1) --- same as jz
|
||||
:jg => 0x8f, # greater (ZF=0 and SF=OF)
|
||||
:jl => 0x8c, # less than (SF!=OF)
|
||||
:jne => 0x85, # not equal (ZF=0) --- same as jnz
|
||||
:jng => 0x8e, # not greater than (ZF=1 or SF!=OF)
|
||||
:jnl => 0x8d, # not less than (SF=OF)
|
||||
:jnz => 0x85, # not zero (ZF=0)
|
||||
:jo => 0x80, # overflow (OF=1)
|
||||
:js => 0x88, # sign (SF=1)
|
||||
:jz => 0x84 # zero (ZF=1)
|
||||
})
|
||||
|
||||
# Only Jcc rel32 is supported.
|
||||
def jcc(instruction, label)
|
||||
opcode = JccOpcodeMap[instruction]
|
||||
asm do
|
||||
emit_byte(0x0f)
|
||||
emit_byte(opcode)
|
||||
emit_label(label)
|
||||
end
|
||||
end
|
||||
|
||||
JccOpcodeMap.keys.each do |name|
|
||||
define_method(name) do |label|
|
||||
jcc(name, label)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def lea(r32, mem)
|
||||
asm do
|
||||
emit_byte(0x8d)
|
||||
emit_modrm(mem, r32.regnum)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def int(n)
|
||||
asm do
|
||||
emit_byte(0xcd)
|
||||
emit_byte(n)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
def ret
|
||||
asm { emit_byte(0xc3) }
|
||||
end
|
||||
|
||||
|
||||
def leave
|
||||
asm { emit_byte(0xc9) }
|
||||
end
|
||||
|
||||
|
||||
# NOTE: LOOP only accepts a 1-byte signed offset. Don't use it.
|
||||
def loop_(label)
|
||||
real_ip = ip + 2 # loop instruction is 2 bytes
|
||||
delta = @symtab.lookup_label(label) - real_ip
|
||||
unless SIGNED_BYTE === delta
|
||||
raise "LOOP can only jump -128 to 127 bytes, #{label} is #{delta} bytes away"
|
||||
end
|
||||
|
||||
asm do
|
||||
emit_byte(0xe2)
|
||||
emit_byte(delta)
|
||||
end
|
||||
end
|
||||
alias_method :loop, :loop_
|
||||
|
||||
|
||||
# Opcode group #3. 1-byte opcode, 1 operand (r/m8 or r/m32).
|
||||
#
|
||||
# Members of this group are: DIV, IDIV, MUL, IMUL, NEG, NOT, and TEST.
|
||||
def group3(op, reg, instruction)
|
||||
opcode =
|
||||
if rm?(op, 8)
|
||||
0xf6
|
||||
elsif rm?(op)
|
||||
0xf7
|
||||
else
|
||||
raise "unsupported #{instruction} instruction: op=#{op.inspect}"
|
||||
end
|
||||
|
||||
asm do
|
||||
emit_byte(opcode)
|
||||
emit_modrm(op, reg)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
32
lib/compiler/asm/x86/registers.rb
Normal file
32
lib/compiler/asm/x86/registers.rb
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
require 'asm/regproxy'
|
||||
|
||||
module ASM
|
||||
|
||||
module Registers
|
||||
|
||||
# This structure allows for x86 registers of all sizes. The
|
||||
# number of the register is the index of the array in which it was
|
||||
# found. The size of a register in bytes is 2 ** index-into-sub-array.
|
||||
Registers = [ [:al, :ax, :eax], # 0
|
||||
[:cl, :cx, :ecx], # 1
|
||||
[:dl, :dx, :edx], # 2
|
||||
[:bl, :bx, :ebx], # 3
|
||||
[:ah, :sp, :esp], # 4
|
||||
[:ch, :bp, :ebp], # 5
|
||||
[:dh, :si, :esi], # 6
|
||||
[:bh, :di, :edi] # 7
|
||||
]
|
||||
|
||||
# Setup register proxies which are used both in effective address
|
||||
# calculations, and also just as symbols representing registers.
|
||||
Registers.each_with_index do |group, regnum|
|
||||
group.each_with_index do |reg, i|
|
||||
name = reg.to_s.upcase
|
||||
const_set(name, RegisterProxy.new(reg, 8 * (2 ** i), regnum))
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
11
lib/compiler/asm/x86/template.darwin.asm
Normal file
11
lib/compiler/asm/x86/template.darwin.asm
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
BITS 32
|
||||
GLOBAL _main
|
||||
SECTION .data
|
||||
{data}
|
||||
SECTION .bss
|
||||
{bss}
|
||||
SECTION .text
|
||||
_main:
|
||||
{code}
|
||||
;; The result in eax is the exit code, just return.
|
||||
ret
|
||||
13
lib/compiler/asm/x86/template.linux.asm
Normal file
13
lib/compiler/asm/x86/template.linux.asm
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
BITS 32
|
||||
GLOBAL _start
|
||||
SECTION .data
|
||||
{data}
|
||||
SECTION .bss
|
||||
{bss}
|
||||
SECTION .text
|
||||
_start:
|
||||
{code}
|
||||
;; The result in eax is the exit code, move it to ebx.
|
||||
mov ebx, eax
|
||||
mov eax, 1 ; _exit syscall
|
||||
int 0x80 ; call Linux
|
||||
159
lib/compiler/asm/x86/text_assembler.rb
Normal file
159
lib/compiler/asm/x86/text_assembler.rb
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
# A subset of x86 assembly.
|
||||
#
|
||||
# sjs
|
||||
# may 2009
|
||||
|
||||
require 'compiler/asm/text_assembler'
|
||||
|
||||
class Compiler
|
||||
module ASM
|
||||
module X86
|
||||
|
||||
# ASM methods output nasm-friendly x86 asm code, line by
|
||||
# line. This is dead easy and we can trust nasm to compile
|
||||
# correct machine code, which isn't trivial.
|
||||
class TextAssembler < ASM::TextAssembler
|
||||
|
||||
def emit_entry_point
|
||||
end
|
||||
|
||||
# Define a constant in the .data section.
|
||||
def const(name, value)
|
||||
@data << "#{name}\tequ #{value}"
|
||||
end
|
||||
|
||||
# Define a variable with the given name and size in bytes.
|
||||
def define_var_impl(name, bytes = nil)
|
||||
super(name, bytes)
|
||||
dwords = bytes / 4
|
||||
@bss << "#{name}: resd #{dwords}\n"
|
||||
end
|
||||
|
||||
def output
|
||||
File.read(template_filename).
|
||||
sub("{data}", @data).
|
||||
sub("{bss}", @bss).
|
||||
sub("{code}", @code)
|
||||
end
|
||||
|
||||
def emit_label(name = label)
|
||||
emit("#{name}:", tab: nil)
|
||||
end
|
||||
|
||||
def mov(dest, src)
|
||||
emit("mov #{dest}, #{src}#{src.is_a?(Numeric) ? " ; 0x#{src.to_s(16)}" : ''}")
|
||||
end
|
||||
|
||||
def movzx(dest, src)
|
||||
emit("movzx #{dest}, #{src}")
|
||||
end
|
||||
|
||||
def add(dest, src)
|
||||
emit("add #{dest}, #{src}")
|
||||
end
|
||||
|
||||
def sub(dest, src)
|
||||
emit("sub #{dest}, #{src}")
|
||||
end
|
||||
|
||||
def imul(op)
|
||||
emit("imul #{op}")
|
||||
end
|
||||
|
||||
def idiv(op)
|
||||
emit("idiv #{op}")
|
||||
end
|
||||
|
||||
def inc(op)
|
||||
emit("inc #{op}")
|
||||
end
|
||||
|
||||
def dec(op)
|
||||
emit("dec #{op}")
|
||||
end
|
||||
|
||||
def push(reg)
|
||||
emit("push #{reg}")
|
||||
end
|
||||
|
||||
def pop(reg)
|
||||
emit("pop #{reg}")
|
||||
end
|
||||
|
||||
def call(label)
|
||||
emit("call #{label}")
|
||||
end
|
||||
|
||||
def leave
|
||||
emit("leave")
|
||||
end
|
||||
|
||||
def neg(reg)
|
||||
emit("neg #{reg}")
|
||||
end
|
||||
|
||||
def not(rm32)
|
||||
emit("not #{rm32}")
|
||||
end
|
||||
|
||||
def xchg(op1, op2)
|
||||
emit("xchg #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def and_(op1, op2)
|
||||
emit("and #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def or(op1, op2)
|
||||
emit("or #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def xor(op1, op2)
|
||||
emit("xor #{op1}, #{op2}")
|
||||
end
|
||||
|
||||
def jz(label)
|
||||
emit("jz #{label}")
|
||||
end
|
||||
|
||||
def jnz(label)
|
||||
emit("jnz #{label}")
|
||||
end
|
||||
|
||||
def jmp(label)
|
||||
emit("jmp #{label}")
|
||||
end
|
||||
|
||||
def jl(label)
|
||||
emit("jl #{label}")
|
||||
end
|
||||
|
||||
def cmp(a, b)
|
||||
emit("cmp #{a}, #{b}")
|
||||
end
|
||||
|
||||
def lea(a, b)
|
||||
emit("lea #{a}, #{b}")
|
||||
end
|
||||
|
||||
def shr(a, b)
|
||||
emit("shr #{a}, #{b}")
|
||||
end
|
||||
|
||||
def loop_(label)
|
||||
emit("loop #{label}")
|
||||
end
|
||||
|
||||
def int(num)
|
||||
emit("int 0x#{num.to_s(16)}")
|
||||
end
|
||||
|
||||
def cdq
|
||||
emit("cdq")
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
108
lib/compiler/build.rb
Executable file
108
lib/compiler/build.rb
Executable file
|
|
@ -0,0 +1,108 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
require 'compiler'
|
||||
|
||||
# usage: build.rb <filename> [output filename] [elf | macho] [asm | bin]
|
||||
|
||||
BIN_FORMATS = Hash.new('bin')
|
||||
BIN_FORMATS['darwin'] = 'macho'
|
||||
BIN_FORMATS['linux'] = 'elf'
|
||||
|
||||
def main
|
||||
filename = ARGV.shift.to_s
|
||||
raise "can't read #{filename}" unless File.readable?(filename)
|
||||
outdir = ARGV.shift || '.'
|
||||
platform = `uname -s`.chomp.downcase
|
||||
binformat = ARGV[1] ? ARGV[1].downcase : BIN_FORMATS[platform]
|
||||
puts "Building #{filename} for #{platform}, binformat is #{binformat} ..."
|
||||
outfile = build(filename, outdir, platform, binformat)
|
||||
puts outfile
|
||||
exit
|
||||
end
|
||||
|
||||
|
||||
def error(msg) STDERR.puts(msg) end
|
||||
|
||||
# name part (filename minus extension)
|
||||
def base(filename)
|
||||
filename.sub(/\.[^.]*$/, '')
|
||||
end
|
||||
|
||||
|
||||
# infile: input filename
|
||||
# outfile: output filename
|
||||
# asm: assembler to use
|
||||
def compile(infile, outfile, asm)
|
||||
|
||||
File.open(infile, 'r') do |input|
|
||||
File.open(outfile, 'wb') do |out|
|
||||
out.print(Compiler.compile(input, asm))
|
||||
end
|
||||
end
|
||||
|
||||
rescue ParseError => e
|
||||
error("[error] #{e.message}")
|
||||
error("[context] #{e.context}")
|
||||
error(e.caller)
|
||||
exit(1)
|
||||
end
|
||||
|
||||
def run_and_warn_on_failure(command)
|
||||
output = `#{command}`
|
||||
if $?.exitstatus != 0
|
||||
puts
|
||||
print output
|
||||
name = command.split.first
|
||||
raise "#{name} failed: #{$?.exitstatus}"
|
||||
end
|
||||
end
|
||||
|
||||
# link with ld, return resulting filename.
|
||||
def link(filename, outdir, platform = 'linux')
|
||||
f = base(filename)
|
||||
cmd, args = *case platform
|
||||
when 'darwin'
|
||||
['gcc', '-arch i386']
|
||||
when 'linux'
|
||||
['ld', '']
|
||||
else
|
||||
raise "unsupported platform: #{platform}"
|
||||
end
|
||||
run_and_warn_on_failure("#{cmd} #{args} -o #{f} #{filename} 2>&1")
|
||||
`chmod u+x #{f}`
|
||||
return f
|
||||
end
|
||||
|
||||
def build(filename, outdir, platform = 'linux', binformat = 'macho')
|
||||
objfile = File.join(outdir, base(filename) + '.o')
|
||||
symtab, objwriter_class =
|
||||
case binformat
|
||||
when 'elf'
|
||||
[Compiler::ELF::SymbolTable.new, Compiler::ELF::ObjectFile]
|
||||
when 'macho'
|
||||
[Compiler::MachO::SymbolTable.new, Compiler::MachO::ObjectFile]
|
||||
else
|
||||
raise "unsupported binary format: #{binformat}"
|
||||
end
|
||||
compile(filename, objfile, Compiler::ASM::Binary.new(platform, symtab, objwriter_class))
|
||||
exefile = link(objfile, outdir, platform)
|
||||
return exefile
|
||||
end
|
||||
|
||||
def build_asm(filename, outdir, platform = 'linux', binformat = 'macho')
|
||||
asmfile = File.join(outdir, base(filename) + '.asm')
|
||||
compile(filename, asmfile, Compiler::ASM::Text.new(platform))
|
||||
objfile = assemble(asmfile, binformat)
|
||||
exefile = link(objfile, platform)
|
||||
return exefile
|
||||
end
|
||||
|
||||
# assemble using nasm, return resulting filename.
|
||||
def assemble(filename, binformat = 'macho')
|
||||
f = base(filename)
|
||||
outfile = "#{f}.o"
|
||||
run_and_warn_on_failure("nasm -f #{binformat} -g -o #{outfile} #{filename} 2>&1")
|
||||
return outfile
|
||||
end
|
||||
|
||||
main if $0 == __FILE__
|
||||
14
lib/compiler/parse_error.rb
Normal file
14
lib/compiler/parse_error.rb
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
class Compiler
|
||||
|
||||
class ParseError < RuntimeError
|
||||
|
||||
attr_reader :caller, :context
|
||||
|
||||
def initialize(caller, context = nil)
|
||||
@caller = caller
|
||||
@context = context
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
966
lib/compiler/parser.rb
Normal file
966
lib/compiler/parser.rb
Normal file
|
|
@ -0,0 +1,966 @@
|
|||
# A compiler as described by Jack Crenshaw in his famous book "Let's
|
||||
# Build a Compiler". At least in the beginning, this code will
|
||||
# closely reflect the Pascal code written by Jack. Over time it may
|
||||
# become more idiomatic, however this is an academic exercise.
|
||||
#
|
||||
# sjs
|
||||
# may 2009
|
||||
|
||||
require 'compiler/parse_error'
|
||||
|
||||
class Compiler
|
||||
|
||||
class Parser
|
||||
|
||||
KEYWORDS = {
|
||||
'if' => :if_else_stmt,
|
||||
'while' => :while_stmt,
|
||||
'until' => :until_stmt,
|
||||
'repeat' => :repeat_stmt,
|
||||
'for' => :for_stmt,
|
||||
'do' => :do_stmt,
|
||||
'break' => :break_stmt,
|
||||
'print' => :print_stmt,
|
||||
'else' => nil,
|
||||
'end' => nil
|
||||
}
|
||||
|
||||
# Grouped by precedence.
|
||||
OPS = {
|
||||
:add => %w[+ -],
|
||||
:mul => %w[* /],
|
||||
:rel => %w[== != < > <= >=],
|
||||
:or => %w[||],
|
||||
:and => %w[&&],
|
||||
:bit => %w[| ^ &],
|
||||
:unary => %w[- +]
|
||||
}
|
||||
# Op chars are chars that can begin an op, so OP_CHARS needs to be a
|
||||
# map of kinds of operators to a list of valid prefix chars.
|
||||
OP_CHARS = OPS.inject({}) { |hash, kv|
|
||||
key, val = *kv
|
||||
hash[key] = val.map {|op| op[0, 1]} # slice off first char for each op
|
||||
hash
|
||||
# Include :all for a very general test.
|
||||
}.merge(:all => OPS.values.flatten.map{|op| op[0, 1]}.sort.uniq)
|
||||
|
||||
FALSE = 0
|
||||
TRUE = -1
|
||||
|
||||
attr_reader :asm
|
||||
|
||||
def initialize(input, asm)
|
||||
@indent = 0 # for pretty printing
|
||||
@look = '' # Next lookahead char.
|
||||
@token = nil # Type of last read token.
|
||||
@value = nil # Value of last read token.
|
||||
@input = input # Stream to read from.
|
||||
@asm = asm # assembler
|
||||
@keywords = KEYWORDS.clone
|
||||
@keyword_names = @keywords.keys
|
||||
@label_stack = []
|
||||
|
||||
# seed the lexer
|
||||
get_char
|
||||
end
|
||||
|
||||
def parse
|
||||
block # parse a block of code
|
||||
expected(:'end of file') unless eof?
|
||||
end
|
||||
|
||||
def compile
|
||||
asm.output
|
||||
end
|
||||
|
||||
# Scan the input stream for the next token.
|
||||
def scan
|
||||
return if @look.nil? # eof
|
||||
|
||||
if alpha?(@look)
|
||||
get_name
|
||||
|
||||
elsif digit?(@look)
|
||||
get_number
|
||||
|
||||
elsif op_char?(@look)
|
||||
get_op
|
||||
|
||||
elsif newline?(@look)
|
||||
skip_any_whitespace
|
||||
scan
|
||||
|
||||
elsif comment_char?(@look)
|
||||
skip_comment
|
||||
scan
|
||||
|
||||
else
|
||||
# XXX default to single char op... should probably raise.
|
||||
@token = :op
|
||||
@value = @look
|
||||
get_char
|
||||
end
|
||||
end
|
||||
|
||||
# put back the most recently parsed value
|
||||
def backtrack
|
||||
@input.ungetc(@look[0])
|
||||
@value.reverse.each_byte {|i| @input.ungetc(i)}
|
||||
get_char
|
||||
end
|
||||
|
||||
# Parse and translate an identifier or function call.
|
||||
def identifier
|
||||
name = get_name
|
||||
|
||||
if @look == '('
|
||||
# function call
|
||||
match('(')
|
||||
# TODO arg list
|
||||
match(')')
|
||||
asm.call(name)
|
||||
else
|
||||
# variable access
|
||||
asm.load_var(name)
|
||||
end
|
||||
end
|
||||
|
||||
# Parse and translate a single factor. Result is in eax.
|
||||
def factor
|
||||
if @look == '('
|
||||
match('(')
|
||||
boolean_expression
|
||||
match(')')
|
||||
elsif alpha?(@look)
|
||||
identifier # or call
|
||||
elsif digit?(@look)
|
||||
asm.load(get_number.to_i)
|
||||
else
|
||||
expected(:'integer, identifier, function call, or parenthesized expression', :got => @look)
|
||||
end
|
||||
end
|
||||
|
||||
# Parse a signed factor.
|
||||
def signed_factor
|
||||
sign = @look
|
||||
match(sign) if op?(:unary, sign)
|
||||
factor
|
||||
asm.neg(return_reg) if sign == '-'
|
||||
end
|
||||
|
||||
# Parse and translate a single term (factor or mulop). Result is in
|
||||
# eax.
|
||||
def term
|
||||
signed_factor # Result in eax.
|
||||
|
||||
while op?(:mul, @look)
|
||||
asm.push(return_reg)
|
||||
case @look
|
||||
when '*'
|
||||
multiply
|
||||
when '/'
|
||||
divide
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Parse and translate a general expression of terms. Result is
|
||||
# in eax.
|
||||
def arithmetic_expression
|
||||
term # Result is in eax.
|
||||
|
||||
while op_char?(@look, :add)
|
||||
asm.push(return_reg)
|
||||
case @look
|
||||
when '+'
|
||||
add
|
||||
when '-'
|
||||
subtract
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Parse an addition operator and the 2nd term (b). The result is
|
||||
# left in eax. The 1st term (a) is expected on the stack.
|
||||
def add
|
||||
match('+')
|
||||
term # Result is in eax.
|
||||
asm.stack_add(return_reg) # Add a to b.
|
||||
end
|
||||
|
||||
# Parse a subtraction operator and the 2nd term (b). The result is
|
||||
# left in eax. The 1st term (a) is expected on the stack.
|
||||
def subtract
|
||||
match('-')
|
||||
term # Result, b, is in eax.
|
||||
asm.neg(return_reg) # Fake the subtraction. a - b == a + -b
|
||||
asm.stack_add(return_reg) # Add a to -b.
|
||||
end
|
||||
|
||||
# Parse an addition operator and the 2nd term (b). The result is
|
||||
# left in eax. The 1st term (a) is expected on the stack.
|
||||
def multiply
|
||||
match('*')
|
||||
signed_factor # Result, b, is in return_reg.
|
||||
asm.stack_mul_signed(return_reg) # Multiply a by b.
|
||||
end
|
||||
|
||||
# Parse a division operator and the divisor (b). The result is
|
||||
# left in eax. The dividend (a) is expected on the stack.
|
||||
def divide
|
||||
match('/')
|
||||
signed_factor # Result is in eax.
|
||||
asm.stack_div(return_reg) # Divide a by b.
|
||||
end
|
||||
|
||||
|
||||
###################
|
||||
# bit expressions #
|
||||
###################
|
||||
|
||||
def bit_expression
|
||||
arithmetic_expression
|
||||
while op?(:bit, @look)
|
||||
scan
|
||||
case @value
|
||||
when '|'
|
||||
bitor_expression
|
||||
when '^'
|
||||
bitxor_expression
|
||||
when '&'
|
||||
bitand_expression
|
||||
else
|
||||
backtrack
|
||||
return
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def bit_op(op, token)
|
||||
asm.push(return_reg)
|
||||
arithmetic_expression
|
||||
asm.send("stack_#{op}", return_reg)
|
||||
end
|
||||
|
||||
def bitor_expression
|
||||
bit_op(:or, '|')
|
||||
end
|
||||
|
||||
def bitxor_expression
|
||||
bit_op(:xor, '^')
|
||||
end
|
||||
|
||||
def bitand_expression
|
||||
bit_op(:and, '&')
|
||||
end
|
||||
|
||||
|
||||
#######################
|
||||
# boolean expressions #
|
||||
#######################
|
||||
|
||||
def boolean_expression
|
||||
boolean_term
|
||||
while @look == '|'
|
||||
scan
|
||||
expected('||') unless match_word('||')
|
||||
|
||||
false_label = asm.make_label(:false)
|
||||
truthy_label = asm.make_label(:truthy)
|
||||
done_label = asm.make_label(:done)
|
||||
|
||||
asm.compare(return_reg, FALSE)
|
||||
asm.jne(truthy_label)
|
||||
|
||||
boolean_term
|
||||
asm.compare(return_reg, FALSE)
|
||||
asm.je(false_label)
|
||||
|
||||
asm.define_label(truthy_label)
|
||||
asm.mov_reg_imm(return_reg, TRUE)
|
||||
asm.jmp(done_label)
|
||||
|
||||
asm.define_label(false_label)
|
||||
asm.mov_reg_imm(return_reg, FALSE)
|
||||
|
||||
asm.define_label(done_label)
|
||||
end
|
||||
end
|
||||
|
||||
def boolean_term
|
||||
not_factor
|
||||
while @look == '&'
|
||||
scan
|
||||
expected('&&') unless match_word('&&')
|
||||
false_label = asm.make_label(:false)
|
||||
done_label = asm.make_label(:done)
|
||||
|
||||
asm.compare(return_reg, FALSE)
|
||||
asm.je(false_label)
|
||||
|
||||
not_factor
|
||||
asm.compare(return_reg, FALSE)
|
||||
asm.je(false_label)
|
||||
|
||||
asm.mov_reg_imm(return_reg, TRUE)
|
||||
asm.jmp(done_label)
|
||||
|
||||
asm.define_label(false_label)
|
||||
asm.mov_reg_imm(return_reg, TRUE)
|
||||
|
||||
asm.define_label(done_label)
|
||||
end
|
||||
end
|
||||
|
||||
def boolean_factor
|
||||
if boolean?(@look)
|
||||
if get_boolean == 'true'
|
||||
asm.mov_reg_imm(return_reg, TRUE)
|
||||
else
|
||||
asm.xor(return_reg, return_reg)
|
||||
end
|
||||
scan
|
||||
else
|
||||
relation
|
||||
end
|
||||
end
|
||||
|
||||
def not_factor
|
||||
if @look == '!'
|
||||
match('!')
|
||||
boolean_factor
|
||||
make_boolean(return_reg) # ensure it is -1 or 0...
|
||||
asm.not_(return_reg) # so that 1's complement NOT is also boolean not
|
||||
else
|
||||
boolean_factor
|
||||
end
|
||||
end
|
||||
|
||||
# Convert any identifier to a boolean (-1 or 0). This is
|
||||
# semantically equivalent to !!reg in C or Ruby.
|
||||
def make_boolean(reg)
|
||||
end_label = asm.make_label(:endmakebool)
|
||||
asm.compare(reg, FALSE) # if false do nothing
|
||||
asm.jz(end_label)
|
||||
asm.mov_reg_imm(reg, TRUE) # truthy, make it true
|
||||
asm.define_label(end_label)
|
||||
end
|
||||
|
||||
def relation
|
||||
bit_expression
|
||||
if op_char?(@look, :rel)
|
||||
scan
|
||||
asm.push(return_reg)
|
||||
|
||||
case @value
|
||||
when '=='
|
||||
eq_relation
|
||||
when '!='
|
||||
neq_relation
|
||||
when '>'
|
||||
gt_relation
|
||||
when '>='
|
||||
ge_relation
|
||||
when '<'
|
||||
lt_relation
|
||||
when '<='
|
||||
le_relation
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# a: <on the stack>
|
||||
# b: eax
|
||||
#
|
||||
# If b - a is zero then a = b, and make_boolean will leave the zero
|
||||
# to effectively return false. If b - a is non-zero then a != b,
|
||||
# and make_boolean will leave -1 (true) for us in eax.
|
||||
def neq_relation
|
||||
bit_expression
|
||||
asm.stack_sub(return_reg)
|
||||
make_boolean
|
||||
end
|
||||
|
||||
# Invert the != test for equal.
|
||||
def eq_relation
|
||||
neq_relation
|
||||
asm.not(return_reg)
|
||||
end
|
||||
|
||||
# > and < are both implemented in terms of jl (jump if less than).
|
||||
# We exploit the fact that cmp is the subtraction of src from dest
|
||||
# and order the terms appropriately for each function. As for >=
|
||||
# and <=, they in turn are implemented in terms of > and <. a is
|
||||
# greater than or equal to b if and only if a is *not* less than b.
|
||||
#
|
||||
# Note: This was done to minimize the number of instructions that
|
||||
# the assembler needed to implement, but since the Jcc
|
||||
# instructions are very cheap to implement this is no longer
|
||||
# a concern.
|
||||
|
||||
|
||||
# The next 4 relations all compare 2 values a and b, then return
|
||||
# true (-1) if the difference was below zero and false (0)
|
||||
# otherwise (using JL, jump if less than).
|
||||
def cmp_relation(a, b, options = {})
|
||||
bit_expression
|
||||
asm.pop(EBX)
|
||||
|
||||
# Invert the sense of the test?
|
||||
invert = options[:invert]
|
||||
|
||||
true_label = asm.make_label(:cmp)
|
||||
end_label = asm.make_label(:endcmp)
|
||||
asm.compare(a, b)
|
||||
asm.jl(true_label)
|
||||
|
||||
asm.mov(EAX, FALSE) # return false
|
||||
asm.not_(EAX) if invert # (or true if inverted)
|
||||
asm.jmp(end_label)
|
||||
|
||||
asm.define_label(true_label)
|
||||
asm.mov(EAX, FALSE) # return true
|
||||
asm.not_(EAX) unless invert # (or false if inverted)
|
||||
|
||||
asm.define_label(end_label)
|
||||
end
|
||||
|
||||
# a: <on the stack>
|
||||
# b: eax
|
||||
#
|
||||
# if a > b then b - a < 0
|
||||
def gt_relation
|
||||
TODO: fix this
|
||||
cmp_relation(EAX, EBX) # b - a
|
||||
end
|
||||
|
||||
# a: <on the stack>
|
||||
# b: eax
|
||||
#
|
||||
# if a < b then a - b < 0
|
||||
def lt_relation
|
||||
cmp_relation(EBX, EAX) # a - b
|
||||
end
|
||||
|
||||
# a: <on the stack>
|
||||
# b: eax
|
||||
#
|
||||
# if a >= b then !(a < b)
|
||||
def ge_relation
|
||||
# Compare them as in less than but invert the result.
|
||||
cmp_relation(EBX, EAX, :invert => true)
|
||||
end
|
||||
|
||||
# a: <on the stack>
|
||||
# b: eax
|
||||
#
|
||||
# if a <= b then !(a > b)
|
||||
def le_relation
|
||||
# Compare them as in greater than but invert the result.
|
||||
cmp_relation(EAX, EBX, :invert => true)
|
||||
end
|
||||
|
||||
|
||||
######################################
|
||||
# statements and controls structures #
|
||||
######################################
|
||||
|
||||
def keyword
|
||||
unless action = @keywords[@value]
|
||||
raise "unsupported keyword: #{@value}"
|
||||
end
|
||||
send(action)
|
||||
end
|
||||
|
||||
# Parse an assignment statement. Value is in eax.
|
||||
def assignment
|
||||
name = @value
|
||||
match('=')
|
||||
boolean_expression
|
||||
lval = asm.var!(name)
|
||||
asm.store_var(lval, return_reg)
|
||||
end
|
||||
|
||||
# Parse a code block.
|
||||
def block
|
||||
@indent += 1
|
||||
|
||||
# scan a token, type ends up in @token and value in @value
|
||||
scan
|
||||
|
||||
until @value == 'else' || @value == 'end' || eof?
|
||||
if @token == :keyword
|
||||
keyword
|
||||
else
|
||||
assignment
|
||||
end
|
||||
|
||||
scan
|
||||
end
|
||||
|
||||
@indent -= 1
|
||||
end
|
||||
|
||||
# Parse an if-else statement.
|
||||
def if_else_stmt
|
||||
else_label = asm.make_label(:end_or_else)
|
||||
end_label = else_label # only generated if else clause
|
||||
# present
|
||||
condition
|
||||
skip_any_whitespace
|
||||
asm.jz(else_label)
|
||||
block
|
||||
if @token == :keyword && @value == 'else'
|
||||
skip_any_whitespace
|
||||
end_label = asm.make_label(:endif) # now we need the 2nd label
|
||||
asm.jmp(end_label)
|
||||
asm.define_label(else_label)
|
||||
block
|
||||
end
|
||||
match_word('end')
|
||||
asm.define_label(end_label)
|
||||
end
|
||||
|
||||
# Used to implement the Two-Label-Loops (while, until, repeat).
|
||||
#
|
||||
# name: Name of the loop for readable labels.
|
||||
# block: Code to execute at the start of each iteration. (e.g. a
|
||||
# condition)
|
||||
def simple_loop(name)
|
||||
start_label = asm.make_label(:"#{name}_loop")
|
||||
end_label = asm.make_label(:"end_#{name}")
|
||||
asm.define_label(start_label)
|
||||
yield(end_label)
|
||||
pushing_label(end_label) { block }
|
||||
match_word('end')
|
||||
asm.jmp(start_label)
|
||||
asm.define_label(end_label)
|
||||
end
|
||||
|
||||
def condition_loop(name, jump_instruction)
|
||||
simple_loop(name) do |end_label|
|
||||
condition
|
||||
skip_any_whitespace
|
||||
asm.send(jump_instruction, end_label)
|
||||
end
|
||||
end
|
||||
|
||||
def while_stmt
|
||||
condition_loop('while', :jz) # done when == 0 (falsish)
|
||||
end
|
||||
|
||||
def until_stmt
|
||||
condition_loop('until', :jnz) # done when != 0 (truthy)
|
||||
end
|
||||
|
||||
def repeat_stmt
|
||||
simple_loop('repeat') do |end_label|
|
||||
skip_any_whitespace
|
||||
end
|
||||
end
|
||||
|
||||
# s = 0
|
||||
# f x = 1 to 5
|
||||
# s = s + x
|
||||
# e
|
||||
def for_stmt
|
||||
name = get_name
|
||||
counter = asm.define_var(name)
|
||||
match('=')
|
||||
|
||||
boolean_expression # initial value
|
||||
|
||||
asm.sub(return_reg, 1) # pre-decrement because of the
|
||||
# following pre-increment
|
||||
|
||||
asm.mov([counter], EAX) # stash the counter in memory
|
||||
match_word('to', :scan => true)
|
||||
|
||||
boolean_expression # final value
|
||||
skip_any_whitespace
|
||||
|
||||
asm.push(EAX) # stash final value on stack
|
||||
final = [ESP]
|
||||
|
||||
simple_loop('for') do |end_label|
|
||||
asm.mov(ECX, [counter]) # get the counter
|
||||
asm.add(ECX, 1) # increment
|
||||
asm.mov([counter], ECX) # store the counter
|
||||
asm.cmp(final, ECX) # check if we're done
|
||||
asm.jz(end_label) # if so jump to the end
|
||||
end
|
||||
|
||||
asm.add(ESP, 4) # clean up the stack
|
||||
end
|
||||
|
||||
# do 5
|
||||
# ...
|
||||
# end
|
||||
def do_stmt
|
||||
|
||||
boolean_expression
|
||||
skip_any_whitespace
|
||||
asm.mov(ECX, EAX)
|
||||
|
||||
start_label = asm.make_label(:do)
|
||||
end_label = asm.make_label(:enddo)
|
||||
asm.define_label(start_label)
|
||||
|
||||
asm.push(ECX)
|
||||
|
||||
pushing_label(end_label) { block }
|
||||
|
||||
asm.pop(ECX)
|
||||
|
||||
match_word('end')
|
||||
asm.dec(ECX)
|
||||
asm.jnz(start_label)
|
||||
|
||||
# Phony push! break needs to clean up the stack, but since we
|
||||
# don't know if there is a break at this point we fake a push and
|
||||
# always clean up the stack after.
|
||||
asm.sub(ESP, 4)
|
||||
|
||||
asm.define_label(end_label)
|
||||
|
||||
# If there was a break we have to clean up the stack here. If
|
||||
# there was no break we clean up the phony push above.
|
||||
asm.add(ESP, 4)
|
||||
end
|
||||
|
||||
def break_stmt
|
||||
if top_label
|
||||
asm.jmp(top_label)
|
||||
else
|
||||
expected(:'break to be somewhere useful',
|
||||
:got => :'a break outside a loop')
|
||||
end
|
||||
end
|
||||
|
||||
# Evaluates any expression for now. There are no boolean operators.
|
||||
def condition
|
||||
boolean_expression
|
||||
skip_whitespace
|
||||
asm.cmp(EAX, 0) # 0 is false, anything else is true
|
||||
end
|
||||
|
||||
# print eax in hex format
|
||||
def print_stmt
|
||||
# variables
|
||||
d = '__DIGITS'
|
||||
h = '__HEX'
|
||||
|
||||
digits = if asm.var?(d)
|
||||
asm.var(d)
|
||||
else
|
||||
d_var = asm.define_var(d, 16)
|
||||
asm.block do
|
||||
# define a lookup table of digits
|
||||
mov([d_var], 0x33323130)
|
||||
mov([d_var+4], 0x37363534)
|
||||
mov([d_var+8], 0x62613938)
|
||||
mov([d_var+12], 0x66656463)
|
||||
end
|
||||
d_var
|
||||
end
|
||||
|
||||
# 12 bytes: 2 for "0x", 8 hex digits, 2 for newline + null terminator
|
||||
hex = asm.var!(h, 12)
|
||||
|
||||
asm.block do
|
||||
# TODO check sign and prepend '-' if negative
|
||||
mov([hex], 0x7830) # "0x" ==> 0x30 (48), 0x78 (120)
|
||||
mov([hex+4], 0) # zero the rest
|
||||
mov([hex+8], 0)
|
||||
mov([:byte, hex+10], 0xa) # newline
|
||||
mov([:byte, hex+11], 0) # null terminator
|
||||
end
|
||||
boolean_expression # result in EAX
|
||||
asm.block do
|
||||
# convert eax to a hex string
|
||||
lea(ESI, [digits])
|
||||
lea(EDI, [hex+9])
|
||||
# build the string backwards (right to left), byte by byte
|
||||
mov(ECX, 4)
|
||||
end
|
||||
asm.block do
|
||||
define_label(loop_label = make_label)
|
||||
# low nybble of nth byte
|
||||
movzx(EBX, AL)
|
||||
and_(BL, 0x0f) # isolate low nybble
|
||||
movzx(EDX, [:byte, ESI+EBX])
|
||||
mov([EDI], DL)
|
||||
dec(EDI)
|
||||
# high nybble of nth byte
|
||||
movzx(EBX, AL)
|
||||
and_(BL, 0xf0) # isolate high nybble
|
||||
shr(BL, 4)
|
||||
mov(DL, [ESI+EBX])
|
||||
mov([EDI], DL)
|
||||
dec(EDI)
|
||||
shr(EAX, 8)
|
||||
loop_(loop_label)
|
||||
# write(int fd, char *s, int n)
|
||||
mov(EAX, 4) # SYS_write
|
||||
lea(ECX, [hex]) # ecx = &s
|
||||
args = [1, # fd = 1 (STDOUT)
|
||||
ECX, # s = &s
|
||||
11] # n = 11 (excluding term, max # of chars to print)
|
||||
case platform
|
||||
when 'darwin' # on the stack, right to left (right @ highest addr)
|
||||
####
|
||||
# setup bogus stack frame
|
||||
push(EBP)
|
||||
mov(EBP, ESP)
|
||||
sub(ESP, 36)
|
||||
####
|
||||
args.reverse.each { |a| push(a) }
|
||||
push(EAX)
|
||||
int(0x80)
|
||||
####
|
||||
# teardown bogus stack frame
|
||||
xor(EAX, EAX)
|
||||
add(ESP, 36)
|
||||
pop(EBX)
|
||||
leave
|
||||
####
|
||||
when 'linux'
|
||||
mov(EBX, args[0])
|
||||
mov(ECX, args[1])
|
||||
mov(EDX, args[2])
|
||||
int(0x80)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
############
|
||||
# internal #
|
||||
############
|
||||
|
||||
|
||||
def eof?
|
||||
@input.eof? && @look.nil?
|
||||
end
|
||||
|
||||
def op_char?(char, kind = :all)
|
||||
OP_CHARS[kind].include?(char)
|
||||
end
|
||||
|
||||
def op?(kind, token)
|
||||
OPS[kind].include?(token)
|
||||
end
|
||||
|
||||
# Read the next character from the input stream.
|
||||
def get_char
|
||||
@look = @input.readbyte.chr unless @input.eof?
|
||||
end
|
||||
|
||||
# Report what was expected
|
||||
def expected(what, options = {})
|
||||
got = options.has_key?(:got) ? options[:got] : @value
|
||||
got, what = *[got, what].map {|x| x.is_a?(Symbol) ? x : "'#{x}'" }
|
||||
if eof?
|
||||
raise ParseError.new(caller), "Premature end of file, expected: #{what}."
|
||||
else
|
||||
context = (@input.readline rescue '(EOF)').gsub("\n", "\\n")
|
||||
raise ParseError.new(caller, context), "Expected #{what} but got #{got}."
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
# Recognize an alphabetical character.
|
||||
def alpha?(char)
|
||||
('A'..'Z') === char.upcase
|
||||
end
|
||||
|
||||
# Recognize a decimal digit.
|
||||
def digit?(char)
|
||||
('0'..'9') === char
|
||||
end
|
||||
|
||||
# Recognize an alphanumeric character.
|
||||
def alnum?(char)
|
||||
alpha?(char) || digit?(char) || char == '_'
|
||||
end
|
||||
|
||||
# XXX disabled! ... should treat true/false as constants
|
||||
# once again we need a token of lookahead
|
||||
def boolean?(char)
|
||||
#char == 't' || char == 'f'
|
||||
false
|
||||
end
|
||||
|
||||
def whitespace?(char)
|
||||
char == ' ' || char == "\t"
|
||||
end
|
||||
|
||||
def newline?(char)
|
||||
char == "\n" || char == "\r"
|
||||
end
|
||||
|
||||
def comment_char?(char)
|
||||
char == '#'
|
||||
end
|
||||
|
||||
def any_whitespace?(char)
|
||||
whitespace?(char) || newline?(char)
|
||||
end
|
||||
|
||||
# Parse one or more newlines.
|
||||
def get_newline
|
||||
expected(:newline, :got => @look) unless newline?(@look)
|
||||
many(:newline?)
|
||||
@token = :newline
|
||||
@value = "\n"
|
||||
end
|
||||
|
||||
# Match literal input.
|
||||
def match(char)
|
||||
expected(char, :got => @look) unless @look == char
|
||||
# puts "[ch] #{indent}#{char}"
|
||||
get_char
|
||||
skip_whitespace
|
||||
end
|
||||
|
||||
# Match literal input.
|
||||
def match_word(word, options = {})
|
||||
scan if options[:scan]
|
||||
match = @value == word
|
||||
expected(word) unless match
|
||||
match
|
||||
end
|
||||
|
||||
# Parse zero or more consecutive characters for which the test is
|
||||
# true.
|
||||
def many(test)
|
||||
test = method(test) if test.is_a?(Symbol)
|
||||
token = ''
|
||||
while !eof? && test[@look]
|
||||
token << @look
|
||||
get_char
|
||||
end
|
||||
skip_whitespace
|
||||
token
|
||||
end
|
||||
|
||||
# Parse a "name" (keyword or identifier).
|
||||
def get_name
|
||||
expected(:identifier) unless alpha?(@look)
|
||||
@value = many(:alnum?)
|
||||
@token = @keyword_names.include?(@value) ? :keyword : :identifier
|
||||
@value
|
||||
end
|
||||
|
||||
# Parse a number.
|
||||
def get_number
|
||||
expected(:integer) unless digit?(@look)
|
||||
@token = :number
|
||||
@value = many(:digit?)
|
||||
# puts "[nu] #{indent}#{@value} (0x#{@value.to_i.to_s(16)})"
|
||||
@value
|
||||
end
|
||||
|
||||
def get_boolean
|
||||
get_name
|
||||
expected(:boolean) unless @value == 'true' || @value == 'false'
|
||||
@token = :boolean
|
||||
# puts "[bo] #{indent}#{@value}"
|
||||
@value
|
||||
end
|
||||
|
||||
def get_op
|
||||
expected(:operator) unless op_char?(@look)
|
||||
@token = :op
|
||||
@value = many(:op_char?)
|
||||
end
|
||||
|
||||
# Skip leading whitespace.
|
||||
def skip_whitespace
|
||||
get_char while whitespace?(@look)
|
||||
end
|
||||
|
||||
# Skip leading whitespace including newlines.
|
||||
def skip_any_whitespace
|
||||
get_char while any_whitespace?(@look)
|
||||
end
|
||||
|
||||
def skip_comment
|
||||
get_char until newline?(@look)
|
||||
skip_any_whitespace
|
||||
end
|
||||
|
||||
|
||||
def indent
|
||||
real_indent = if @value == 'else' || @value == 'end'
|
||||
@indent - 1
|
||||
else
|
||||
@indent
|
||||
end
|
||||
' ' * (real_indent * 4)
|
||||
end
|
||||
|
||||
def pushing(reg)
|
||||
asm.push(reg)
|
||||
yield
|
||||
asm.add(ESP, 4)
|
||||
end
|
||||
|
||||
def print_token
|
||||
print(case @token
|
||||
when :keyword
|
||||
'[kw] '
|
||||
when :number
|
||||
'[nu] '
|
||||
when :identifier
|
||||
'[id] '
|
||||
when :op
|
||||
'[op] '
|
||||
when :boolean
|
||||
'[bo] '
|
||||
when :newline
|
||||
''
|
||||
else
|
||||
raise "print doesn't know about #{@token}: #{@value}"
|
||||
end)
|
||||
print indent
|
||||
puts @value
|
||||
end
|
||||
|
||||
def pushing_label(label)
|
||||
push_label(label)
|
||||
yield
|
||||
pop_label
|
||||
end
|
||||
|
||||
def push_label(label)
|
||||
@label_stack.push(label)
|
||||
end
|
||||
|
||||
def top_label
|
||||
@label_stack[-1]
|
||||
end
|
||||
|
||||
def pop_label
|
||||
@label_stack.pop
|
||||
end
|
||||
|
||||
|
||||
# for debugging
|
||||
def self.hook(callback, methods)
|
||||
methods.each do |m|
|
||||
orig = :"orig_#{m}"
|
||||
alias_method orig, m
|
||||
define_method(m) do
|
||||
val = send(orig)
|
||||
send(callback)
|
||||
val
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# hook(:print_token, [:get_name, :get_newline, :get_number, :get_op, :get_boolean])
|
||||
|
||||
end
|
||||
end
|
||||
Loading…
Reference in a new issue