mirror of
https://github.com/samsonjs/compiler.git
synced 2026-04-27 14:57:45 +00:00
[big commit] use variable proxies to defer address calculation
This commit is contained in:
parent
19d79c8836
commit
0c21d1abc6
4 changed files with 325 additions and 109 deletions
287
asm/binary.rb
287
asm/binary.rb
|
|
@ -4,8 +4,15 @@
|
||||||
#
|
#
|
||||||
# sjs
|
# sjs
|
||||||
# may 2009
|
# may 2009
|
||||||
|
#
|
||||||
|
# Refer to the Intel[1] or AMD documentationon on x86 for explanations
|
||||||
|
# of Mod-R/M encoding, the Scale-Index-Base (SIB) byte, opcode groups.
|
||||||
|
#
|
||||||
|
# The start and exit shell codes were obtained by disassembling
|
||||||
|
# minimal binaries on the respective platforms.
|
||||||
|
|
||||||
require 'asm/asm'
|
require 'asm/asm'
|
||||||
|
require 'asm/varproxy'
|
||||||
|
|
||||||
module Assembler
|
module Assembler
|
||||||
|
|
||||||
|
|
@ -25,8 +32,9 @@ module Assembler
|
||||||
SignedInt = MinSigned..MaxSigned
|
SignedInt = MinSigned..MaxSigned
|
||||||
SignedByte = -128..127
|
SignedByte = -128..127
|
||||||
|
|
||||||
# This is used for encoding instructions. Just as the generated asm
|
# This is used for encoding instructions. Just as the equivalent
|
||||||
# contains "BITS 32", binary is generated for 32-bit protected mode.
|
# assembly would contain "BITS 32", binary is generated for 32-bit
|
||||||
|
# protected mode.
|
||||||
DefaultOperandSize = :dword
|
DefaultOperandSize = :dword
|
||||||
|
|
||||||
SizeMap = {:byte => 8, :word => 16, :dword => 32}
|
SizeMap = {:byte => 8, :word => 16, :dword => 32}
|
||||||
|
|
@ -50,63 +58,150 @@ module Assembler
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
attr_reader :eip
|
attr_reader :ip
|
||||||
|
|
||||||
def initialize(platform, symtab, objwriter)
|
def initialize(platform, symtab, objwriter_class)
|
||||||
super(platform)
|
super(platform)
|
||||||
@symtab = symtab
|
@symtab = symtab
|
||||||
@objwriter = objwriter
|
@objwriter_class = objwriter_class
|
||||||
@binary = [] # Byte array of machine code.
|
# @objwriter = objwriter
|
||||||
@eip = 0 # Our instruction pointer, or the number of bytes written.
|
|
||||||
|
# Almost a byte array, except for addresses.
|
||||||
|
#
|
||||||
|
# Addresses take the form [:<type>, <name>]
|
||||||
|
# where <type> is one of: var, const, or label
|
||||||
|
#
|
||||||
|
# NOTE the type is redundant because of VariableProxy#const?
|
||||||
|
# and labels are just strings.
|
||||||
|
#
|
||||||
|
# however, we could accept strings for variable names
|
||||||
|
# if we keep the type tag. something to think about.
|
||||||
|
@ir = []
|
||||||
|
|
||||||
|
# Our instruction pointer, or the number of bytes written.
|
||||||
|
@ip = 0
|
||||||
|
|
||||||
|
# Map locations in the byte array to var proxies so we can
|
||||||
|
# resolve address operations on the 2nd pass.
|
||||||
|
@proxies = {}
|
||||||
|
|
||||||
# Always include the _main entry point in our symbol table. It begins at the
|
# Always include the _main entry point in our symbol table. It begins at the
|
||||||
# beginning of the __TEXT segment, 0x0.
|
# beginning of the __TEXT segment, 0x0.
|
||||||
@symtab.deflabel('_main', @eip)
|
@symtab.deflabel('_main', @ip)
|
||||||
|
|
||||||
|
X86_start[@platform].each {|byte| emit_byte(byte)}
|
||||||
end
|
end
|
||||||
|
|
||||||
def output
|
def output
|
||||||
resolve_labels
|
X86_exit[@platform].each {|byte| emit_byte(byte)}
|
||||||
blobs = X86_start[@platform] + @binary + X86_exit[@platform]
|
|
||||||
binary = blobs.pack('c*')
|
byte_array = resolve_labels
|
||||||
@objwriter.text(binary)
|
|
||||||
@objwriter.const(@symtab.const_data)
|
#puts "1st pass: " + byte_array.inspect if DEBUG_OUTPUT
|
||||||
@objwriter.bss(@symtab.bss_size)
|
|
||||||
@objwriter.symtab(@symtab)
|
binary = package(byte_array)
|
||||||
@objwriter.serialize
|
|
||||||
|
@symtab.calculate_offsets(binary.length)
|
||||||
|
if DEBUG_OUTPUT
|
||||||
|
puts ">>> text offset: 0x#{@symtab.text_offset.to_s(16)}"
|
||||||
|
puts ">>> const offset: 0x#{@symtab.const_offset.to_s(16)}"
|
||||||
|
puts ">>> bss offset: 0x#{@symtab.bss_offset.to_s(16)}"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Now that we know where everything lies do the 2nd pass
|
||||||
|
# calculating and filling in final var and const addresses.
|
||||||
|
#
|
||||||
|
# outline:
|
||||||
|
# - resolve all variable proxies in @proxies replacing
|
||||||
|
# the 4 bytes (0xff) with the real address
|
||||||
|
|
||||||
|
bss_offset = @symtab.bss_offset
|
||||||
|
const_offset = @symtab.const_offset
|
||||||
|
@proxies.each do |i, proxy|
|
||||||
|
#puts ">>> Resolving #{proxy.name}" if DEBUG_OUTPUT
|
||||||
|
var = @symtab.var(proxy.name)
|
||||||
|
base_addr = if proxy.const?
|
||||||
|
const_offset + @symtab.const(proxy.name)
|
||||||
|
else
|
||||||
|
bss_offset + @symtab.var(proxy.name)
|
||||||
|
end
|
||||||
|
#puts ">>> Replacing #{byte_array[i,4].map{|x|'0x' + x.to_s(16)}.inspect} with #{num_to_quad(proxy.resolve(base_addr)).map{|x|'0x' + x.to_s(16)}.inspect}" if DEBUG_OUTPUT
|
||||||
|
byte_array[i, 4] = num_to_quad(proxy.resolve(base_addr))
|
||||||
|
end
|
||||||
|
|
||||||
|
binary = package(byte_array)
|
||||||
|
|
||||||
|
#puts "2nd pass: " + byte_array.inspect if DEBUG_OUTPUT
|
||||||
|
|
||||||
|
objwriter = @objwriter_class.new
|
||||||
|
objwriter.text(binary)
|
||||||
|
objwriter.const(@symtab.const_data) if @symtab.const_size > 0
|
||||||
|
objwriter.bss(@symtab.bss_size) if @symtab.bss_size > 0
|
||||||
|
objwriter.reloc(@symtab.reloc_info)
|
||||||
|
objwriter.symtab(@symtab)
|
||||||
|
objwriter.serialize
|
||||||
end
|
end
|
||||||
|
|
||||||
def resolve_labels
|
def resolve_labels
|
||||||
bytes_read = 0
|
bytes_read = 0
|
||||||
@binary.each_with_index do |x, i|
|
bytes = []
|
||||||
|
@ir.each_with_index do |x, i|
|
||||||
if x.is_a?(Numeric)
|
if x.is_a?(Numeric)
|
||||||
|
bytes << x
|
||||||
bytes_read += 1
|
bytes_read += 1
|
||||||
|
|
||||||
elsif addr?(x)
|
elsif addr?(x)
|
||||||
@binary[i, 1] = x[1..-1]
|
# remember this so we can replace the bogus addr later
|
||||||
bytes_read += 1
|
@proxies[bytes_read] = x[1]
|
||||||
|
|
||||||
else # label to resolve
|
# add a relocation entry for this address
|
||||||
|
@symtab.reloc(bytes_read)
|
||||||
|
|
||||||
|
# fill in said bogus addr
|
||||||
|
bytes += [0xff, 0xff, 0xff, 0xff]
|
||||||
|
|
||||||
|
bytes_read += 4
|
||||||
|
|
||||||
|
|
||||||
|
# TODO find out if we should calculate addrs as offsets rather than
|
||||||
|
# absolute as they are done now. (ok for Mach-O, maybe not ELF)
|
||||||
|
elsif label?(x)
|
||||||
# the actual eip points to the next instruction already, so should we.
|
# the actual eip points to the next instruction already, so should we.
|
||||||
real_eip = bytes_read + 4
|
real_ip = bytes_read + 4
|
||||||
addr = @symtab.lookup_label(x) - real_eip # dest - src to get relative addr
|
name = x[1]
|
||||||
puts "resolved label: #{x} = 0x#{@symtab.lookup_label(x).to_s(16)} (rel: 0x#{addr.to_s(16)}, eip = 0x#{real_eip.to_s(16)}, bytes_read = 0x#{bytes_read.to_s(16)})" if DEBUG_OUTPUT
|
addr = @symtab.lookup_label(name) - real_ip # dest - src to get relative addr
|
||||||
@binary[i, 1] = num_to_quad(addr)
|
#puts "resolved label: #{x} = 0x#{@symtab.lookup_label(name).to_s(16)} (rel: 0x#{addr.to_s(16)}, ip = 0x#{real_ip.to_s(16)}, bytes_read = 0x#{bytes_read.to_s(16)})" if DEBUG_OUTPUT
|
||||||
# count the first byte just written, the rest are counted normally
|
|
||||||
bytes_read += 1
|
|
||||||
|
bytes += num_to_quad(addr)
|
||||||
|
bytes_read += 4
|
||||||
|
|
||||||
|
else
|
||||||
|
raise "unknown value in the IR at #{bytes_read} - #{x.inspect}"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
return bytes
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def package(bytes)
|
||||||
|
bytes.pack('c*')
|
||||||
|
end
|
||||||
|
|
||||||
|
# Silly semantics, but labels don't count as an address since they
|
||||||
|
# don't need to be deferred.
|
||||||
def addr?(x)
|
def addr?(x)
|
||||||
x.is_a?(Array) && x[0] == :addr
|
x.is_a?(Array) && [:var, :const].include?(x[0])
|
||||||
end
|
end
|
||||||
|
|
||||||
def addr_size(addr)
|
def label?(x)
|
||||||
addr.length - 1
|
x.is_a?(Array) && x[0] == :label
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# XXX this should probably evaluate the value somehow
|
||||||
def defconst(name, bytes, value)
|
def defconst(name, bytes, value)
|
||||||
@symtab.defconst(name, bytes, value)
|
@symtab.defconst(name, bytes, value)
|
||||||
|
return const(name)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Define a variable with the given name and size in bytes.
|
# Define a variable with the given name and size in bytes.
|
||||||
|
|
@ -116,27 +211,49 @@ module Assembler
|
||||||
else
|
else
|
||||||
STDERR.puts "[warning] attempted to redefine #{name}"
|
STDERR.puts "[warning] attempted to redefine #{name}"
|
||||||
end
|
end
|
||||||
|
return var(name)
|
||||||
end
|
end
|
||||||
|
|
||||||
# These methods are all delegated to the symbol table.
|
def var(name)
|
||||||
%w[var var? const const?].each do |method|
|
STDERR.puts "[error] undefined variable #{name}" unless var?(name)
|
||||||
define_method(method) do |name|
|
# TODO bail on undefined vars
|
||||||
@symtab.send(method, name)
|
VariableProxy.new(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def const(name)
|
||||||
|
STDERR.puts "[error] undefined variable #{name}" unless const?(name)
|
||||||
|
# TODO bail on undefined consts
|
||||||
|
VariableProxy.new(name, true)
|
||||||
|
end
|
||||||
|
|
||||||
|
def var?(name)
|
||||||
|
@symtab.var?(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
def const?(name)
|
||||||
|
@symtab.const?(name)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Define a variable unless it exists.
|
||||||
|
def var!(name, bytes=4)
|
||||||
|
if var?(name)
|
||||||
|
var(name)
|
||||||
|
else
|
||||||
|
defvar(name, bytes)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
# Count the bytes that were encoded in the given block.
|
# Count the bytes that were encoded in the given block.
|
||||||
def asm
|
def asm
|
||||||
# stash the current number of bytes written
|
# stash the current number of bytes written
|
||||||
instruction_offset = @eip
|
instruction_offset = @ip
|
||||||
|
|
||||||
print "0x#{@eip.to_s(16).rjust(4, '0')}\t" if DEBUG_OUTPUT
|
print "0x#{@ip.to_s(16).rjust(4, '0')}\t" if DEBUG_OUTPUT
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
# return the number of bytes written
|
# return the number of bytes written
|
||||||
@eip - instruction_offset
|
@ip - instruction_offset
|
||||||
|
|
||||||
puts if DEBUG_OUTPUT
|
puts if DEBUG_OUTPUT
|
||||||
end
|
end
|
||||||
|
|
@ -165,21 +282,33 @@ module Assembler
|
||||||
|
|
||||||
print (byte >= 0 && byte < 0x10 ? '0' : '') + byte.to_s(16) + ' ' if DEBUG_OUTPUT
|
print (byte >= 0 && byte < 0x10 ? '0' : '') + byte.to_s(16) + ' ' if DEBUG_OUTPUT
|
||||||
|
|
||||||
@binary << byte
|
@ir << byte
|
||||||
@eip += 1
|
@ip += 1
|
||||||
end
|
end
|
||||||
|
|
||||||
def emit_addr(addr)
|
# addresses are emited as arrays of bytes, prefixed with :var, :const, or :label
|
||||||
@eip += addr.length
|
def emit_addr(type, name)
|
||||||
addr.insert(0, :addr)
|
placeholder = [type, name]
|
||||||
puts addr.inspect if DEBUG_OUTPUT
|
puts placeholder.inspect if DEBUG_OUTPUT
|
||||||
@binary << addr
|
@ir << placeholder
|
||||||
|
|
||||||
|
# all addresses are 32-bits and jumps are all 32-bit relative
|
||||||
|
@ip += 4
|
||||||
end
|
end
|
||||||
|
|
||||||
def emit_future_addr(label)
|
def emit_var(name_or_proxy)
|
||||||
print "<#{label}> " if DEBUG_OUTPUT
|
proxy = name_or_proxy.is_a?(VariableProxy) ? name_or_proxy : var(name_or_proxy)
|
||||||
@binary << label
|
emit_addr(:var, proxy)
|
||||||
@eip += 4 # all jumps are 32-bit relative for now
|
end
|
||||||
|
|
||||||
|
def emit_const(name)
|
||||||
|
proxy = name_or_proxy.is_a?(VariableProxy) ? name_or_proxy : const(name_or_proxy)
|
||||||
|
emit_addr(:const, proxy)
|
||||||
|
end
|
||||||
|
|
||||||
|
def emit_label(name)
|
||||||
|
print "<#{name}> " if DEBUG_OUTPUT
|
||||||
|
emit_addr(:label, name)
|
||||||
end
|
end
|
||||||
|
|
||||||
def emit_dword(num)
|
def emit_dword(num)
|
||||||
|
|
@ -190,9 +319,9 @@ module Assembler
|
||||||
@symtab.unique_label(suffix)
|
@symtab.unique_label(suffix)
|
||||||
end
|
end
|
||||||
|
|
||||||
def emit_label(name)
|
def deflabel(name)
|
||||||
puts "\n#{name} (0x#{@eip.to_s(16)}):" if DEBUG_OUTPUT
|
puts "\n#{name} (0x#{@ip.to_s(16)}):" if DEBUG_OUTPUT
|
||||||
@symtab.deflabel(name, @eip)
|
@symtab.deflabel(name, @ip)
|
||||||
end
|
end
|
||||||
|
|
||||||
def emit_modrm(addr, reg=0)
|
def emit_modrm(addr, reg=0)
|
||||||
|
|
@ -201,12 +330,14 @@ module Assembler
|
||||||
disp8 = nil
|
disp8 = nil
|
||||||
disp32 = nil
|
disp32 = nil
|
||||||
sib = nil
|
sib = nil
|
||||||
|
var = nil # variable proxy
|
||||||
|
|
||||||
# effective address
|
# effective address
|
||||||
if addr.is_a?(Array)
|
if addr.is_a?(Array)
|
||||||
eff_addr = addr[1] || addr[0] # works with or without size prefix
|
eff_addr = addr[1] || addr[0] # works with or without size prefix
|
||||||
raise "invalid effective address: #{addr.inspect}" unless eff_addr
|
raise "invalid effective address: #{addr.inspect}" unless eff_addr
|
||||||
case eff_addr
|
case eff_addr
|
||||||
|
|
||||||
when RegisterProxy
|
when RegisterProxy
|
||||||
|
|
||||||
# Simple register addressing, e.g. [ESI].
|
# Simple register addressing, e.g. [ESI].
|
||||||
|
|
@ -266,6 +397,11 @@ module Assembler
|
||||||
rm = 5 # 101
|
rm = 5 # 101
|
||||||
disp32 = eff_addr
|
disp32 = eff_addr
|
||||||
|
|
||||||
|
when VariableProxy
|
||||||
|
mod = 0
|
||||||
|
rm = 5
|
||||||
|
var = eff_addr
|
||||||
|
|
||||||
else
|
else
|
||||||
raise "unsupported effective address: #{addr.inspect}"
|
raise "unsupported effective address: #{addr.inspect}"
|
||||||
end
|
end
|
||||||
|
|
@ -275,14 +411,22 @@ module Assembler
|
||||||
mod = 3
|
mod = 3
|
||||||
rm = addr.regnum
|
rm = addr.regnum
|
||||||
|
|
||||||
|
# XXX TODO elsif addr.respond_to?(:name)
|
||||||
|
# (VariableProxy) => [:(var|const), addr.name]
|
||||||
|
#
|
||||||
|
# i.e. a pointer to that var
|
||||||
|
|
||||||
else
|
else
|
||||||
raise "unsupported effective address: #{addr.inspect}"
|
raise "unsupported effective address: #{addr.inspect}"
|
||||||
end
|
end
|
||||||
|
|
||||||
emit_byte((mod << 6) | (reg << 3) | rm)
|
emit_byte((mod << 6) | (reg << 3) | rm)
|
||||||
emit_byte(sib) if sib
|
emit_byte(sib) if sib
|
||||||
emit_addr([disp8]) if disp8
|
|
||||||
emit_addr(num_to_quad(disp32)) if disp32
|
emit_byte(disp8) if disp8
|
||||||
|
|
||||||
|
emit_dword(disp32) if disp32
|
||||||
|
emit_var(var) if var
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -311,12 +455,25 @@ module Assembler
|
||||||
op.is_a?(Numeric) && op >= -(2 ** bits / 2) && op <= (2 ** bits - 1)
|
op.is_a?(Numeric) && op >= -(2 ** bits / 2) && op <= (2 ** bits - 1)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Return true if op is a valid operand of the specified size.
|
||||||
|
# (:byte, :word, :dword)
|
||||||
|
#
|
||||||
|
# Valid operands are:
|
||||||
|
#
|
||||||
|
# * registers
|
||||||
|
#
|
||||||
|
# * effective addresses (wrapped in an array to look like nasm code)
|
||||||
|
#
|
||||||
|
# XXX This method is pretty ugly.
|
||||||
def rm?(op, size=DefaultOperandSize)
|
def rm?(op, size=DefaultOperandSize)
|
||||||
register?(op, size) || op.is_a?(Array) && (op.size == 1 || op[0] == size)
|
register?(op, size) ||
|
||||||
|
(op.is_a?(Array) &&
|
||||||
|
(op.size == 1 && [Numeric, RegisterProxy, VariableProxy].any?{|c| c == op[0].class}) ||
|
||||||
|
(op.size == 2 && rm?(op[1])))
|
||||||
end
|
end
|
||||||
|
|
||||||
def offset?(addr, size=DefaultOperandSize)
|
def offset?(addr, size=DefaultOperandSize)
|
||||||
addr.is_a?(Array) && addr[0].is_a?(Numeric)
|
addr.is_a?(Array) && (addr[0].is_a?(Numeric) || addr[0].is_a?(VariableProxy))
|
||||||
end
|
end
|
||||||
|
|
||||||
def constant?(op)
|
def constant?(op)
|
||||||
|
|
@ -434,10 +591,20 @@ module Assembler
|
||||||
raise "unsupported MOV instruction, #{dest.inspect}, #{src.inspect}"
|
raise "unsupported MOV instruction, #{dest.inspect}, #{src.inspect}"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
dword = immediate || offset
|
||||||
|
|
||||||
asm do
|
asm do
|
||||||
emit_byte(opcode)
|
emit_byte(opcode)
|
||||||
emit_modrm(*modrm) if modrm
|
emit_modrm(*modrm) if modrm
|
||||||
emit_dword(immediate || offset) if immediate || offset
|
if dword.is_a?(VariableProxy)
|
||||||
|
if dword.const?
|
||||||
|
emit_const(dword)
|
||||||
|
else
|
||||||
|
emit_var(dword)
|
||||||
|
end
|
||||||
|
elsif dword
|
||||||
|
emit_dword(dword)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -742,7 +909,7 @@ module Assembler
|
||||||
def jmp(label)
|
def jmp(label)
|
||||||
asm do
|
asm do
|
||||||
emit_byte(0xe9)
|
emit_byte(0xe9)
|
||||||
emit_future_addr(label)
|
emit_label(label)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -768,7 +935,7 @@ module Assembler
|
||||||
asm do
|
asm do
|
||||||
emit_byte(0x0f)
|
emit_byte(0x0f)
|
||||||
emit_byte(opcode)
|
emit_byte(opcode)
|
||||||
emit_future_addr(label)
|
emit_label(label)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -807,8 +974,8 @@ module Assembler
|
||||||
|
|
||||||
# NOTE: LOOP only accepts a 1-byte signed offset. Don't use it.
|
# NOTE: LOOP only accepts a 1-byte signed offset. Don't use it.
|
||||||
def loop_(label)
|
def loop_(label)
|
||||||
real_eip = @eip + 2 # loop instruction is 2 bytes
|
real_ip = @ip + 2 # loop instruction is 2 bytes
|
||||||
delta = @symtab.lookup_label(label) - real_eip
|
delta = @symtab.lookup_label(label) - real_ip
|
||||||
unless SignedByte === delta
|
unless SignedByte === delta
|
||||||
raise "LOOP can only jump -128 to 127 bytes, #{label} is #{delta} bytes away"
|
raise "LOOP can only jump -128 to 127 bytes, #{label} is #{delta} bytes away"
|
||||||
end
|
end
|
||||||
|
|
|
||||||
41
asm/varproxy.rb
Normal file
41
asm/varproxy.rb
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
module Assembler
|
||||||
|
|
||||||
|
# Wrap a variable's address so that we can perform arithmetic on it
|
||||||
|
# before resolving it when we know where things will go in memory.
|
||||||
|
# All we do is catch arithmetic ops and then provide a means to
|
||||||
|
# resolve a final addres by replaying them later.
|
||||||
|
#
|
||||||
|
# e.g. [symtab.var('i')] or [symtab.var('i') * 2]
|
||||||
|
class VariableProxy
|
||||||
|
|
||||||
|
attr_reader :name
|
||||||
|
attr_accessor :ops
|
||||||
|
|
||||||
|
def initialize(name, const=false)
|
||||||
|
@name = name
|
||||||
|
@const = const
|
||||||
|
@ops = []
|
||||||
|
end
|
||||||
|
|
||||||
|
%w[+ * / - % & |].each do |op|
|
||||||
|
define_method(op) do |*args|
|
||||||
|
new_proxy = self.class.new(@name, @const)
|
||||||
|
new_proxy.ops << [op, *args]
|
||||||
|
return new_proxy
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# XXX should this perhaps use the offset instead?
|
||||||
|
def resolve(base_addr)
|
||||||
|
@ops.inject(base_addr) do |addr, op|
|
||||||
|
addr.send(*op)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def const?
|
||||||
|
@const
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
8
build.rb
8
build.rb
|
|
@ -87,14 +87,14 @@ end
|
||||||
|
|
||||||
def build(filename, platform='linux', binformat='elf')
|
def build(filename, platform='linux', binformat='elf')
|
||||||
objfile = base(filename) + '.o'
|
objfile = base(filename) + '.o'
|
||||||
symtab, objwriter =
|
symtab, objwriter_class =
|
||||||
case binformat
|
case binformat
|
||||||
when 'elf': [Assembler::ELFSymtab.new, Assembler::ELFFile.new]
|
when 'elf': [Assembler::ELFSymtab.new, Assembler::ELFFile]
|
||||||
when 'macho': [Assembler::MachOSymtab.new, Assembler::MachOFile.new]
|
when 'macho': [Assembler::MachOSymtab.new, Assembler::MachOFile]
|
||||||
else
|
else
|
||||||
raise "unsupported binary format: #{binformat}"
|
raise "unsupported binary format: #{binformat}"
|
||||||
end
|
end
|
||||||
compile(filename, objfile, Assembler::Binary.new(platform, symtab, objwriter))
|
compile(filename, objfile, Assembler::Binary.new(platform, symtab, objwriter_class))
|
||||||
exefile = link(objfile, platform)
|
exefile = link(objfile, platform)
|
||||||
return exefile
|
return exefile
|
||||||
end
|
end
|
||||||
|
|
|
||||||
88
compiler.rb
88
compiler.rb
|
|
@ -13,6 +13,7 @@
|
||||||
# require 'unroller'
|
# require 'unroller'
|
||||||
|
|
||||||
require 'asm/registers'
|
require 'asm/registers'
|
||||||
|
require 'asm/varproxy'
|
||||||
|
|
||||||
class ParseError < StandardError
|
class ParseError < StandardError
|
||||||
attr_reader :caller, :context
|
attr_reader :caller, :context
|
||||||
|
|
@ -34,22 +35,19 @@ class Compiler
|
||||||
attr_reader :asm
|
attr_reader :asm
|
||||||
|
|
||||||
def initialize(input, asm)
|
def initialize(input, asm)
|
||||||
# XXX for development only!
|
|
||||||
@indent = 0 # for pretty printing
|
@indent = 0 # for pretty printing
|
||||||
|
|
||||||
@look = '' # Next lookahead char.
|
@look = '' # Next lookahead char.
|
||||||
@token = nil # Type of last read token.
|
@token = nil # Type of last read token.
|
||||||
@value = nil # Value of last read token.
|
@value = nil # Value of last read token.
|
||||||
@input = input # Stream to read from.
|
@input = input # Stream to read from.
|
||||||
|
@asm = asm # assembler
|
||||||
@asm = asm
|
|
||||||
|
|
||||||
# seed the lexer
|
# seed the lexer
|
||||||
get_char
|
get_char
|
||||||
end
|
end
|
||||||
|
|
||||||
def compile
|
def compile
|
||||||
block
|
block # parse a block of code
|
||||||
expected(:'end of file') unless eof?
|
expected(:'end of file') unless eof?
|
||||||
asm.output
|
asm.output
|
||||||
end
|
end
|
||||||
|
|
@ -267,7 +265,7 @@ class Compiler
|
||||||
asm.cmp(reg, 0) # if false do nothing
|
asm.cmp(reg, 0) # if false do nothing
|
||||||
asm.jz(end_label)
|
asm.jz(end_label)
|
||||||
asm.mov(reg, -1) # truthy, make it true
|
asm.mov(reg, -1) # truthy, make it true
|
||||||
asm.emit_label(end_label)
|
asm.deflabel(end_label)
|
||||||
end
|
end
|
||||||
|
|
||||||
def relation
|
def relation
|
||||||
|
|
@ -336,11 +334,11 @@ class Compiler
|
||||||
asm.not_(EAX) if invert # (or true if inverted)
|
asm.not_(EAX) if invert # (or true if inverted)
|
||||||
asm.jmp(end_label)
|
asm.jmp(end_label)
|
||||||
|
|
||||||
asm.emit_label(true_label)
|
asm.deflabel(true_label)
|
||||||
asm.xor(EAX, EAX) # return true
|
asm.xor(EAX, EAX) # return true
|
||||||
asm.not_(EAX) unless invert # (or false if inverted)
|
asm.not_(EAX) unless invert # (or false if inverted)
|
||||||
|
|
||||||
asm.emit_label(end_label)
|
asm.deflabel(end_label)
|
||||||
end
|
end
|
||||||
|
|
||||||
# a: <on the stack>
|
# a: <on the stack>
|
||||||
|
|
@ -387,11 +385,14 @@ class Compiler
|
||||||
name = @value
|
name = @value
|
||||||
match('=')
|
match('=')
|
||||||
boolean_expression
|
boolean_expression
|
||||||
asm.defvar(name) unless asm.var?(name)
|
lval = asm.var!(name)
|
||||||
asm.mov([asm.var(name)], EAX)
|
asm.mov([lval], EAX)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Parse a code block.
|
# Parse a code block.
|
||||||
|
#
|
||||||
|
# TODO replace the case..when with a lookup table
|
||||||
|
# (might be exposed in the language later)
|
||||||
def block(label=nil)
|
def block(label=nil)
|
||||||
scan
|
scan
|
||||||
until @value == 'else' || @value == 'end' || eof?
|
until @value == 'else' || @value == 'end' || eof?
|
||||||
|
|
@ -438,13 +439,13 @@ class Compiler
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
end_label = asm.mklabel(:endif) # now we need the 2nd label
|
end_label = asm.mklabel(:endif) # now we need the 2nd label
|
||||||
asm.jmp(end_label)
|
asm.jmp(end_label)
|
||||||
asm.emit_label(else_label)
|
asm.deflabel(else_label)
|
||||||
@indent += 1
|
@indent += 1
|
||||||
block(label)
|
block(label)
|
||||||
@indent -= 1
|
@indent -= 1
|
||||||
end
|
end
|
||||||
match_word('end')
|
match_word('end')
|
||||||
asm.emit_label(end_label)
|
asm.deflabel(end_label)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Used to implement the Two-Label-Loops (while, until, repeat).
|
# Used to implement the Two-Label-Loops (while, until, repeat).
|
||||||
|
|
@ -455,7 +456,7 @@ class Compiler
|
||||||
def simple_loop(name)
|
def simple_loop(name)
|
||||||
start_label = asm.mklabel(:"#{name}_loop")
|
start_label = asm.mklabel(:"#{name}_loop")
|
||||||
end_label = asm.mklabel(:"end_#{name}")
|
end_label = asm.mklabel(:"end_#{name}")
|
||||||
asm.emit_label(start_label)
|
asm.deflabel(start_label)
|
||||||
|
|
||||||
yield(end_label)
|
yield(end_label)
|
||||||
|
|
||||||
|
|
@ -464,7 +465,7 @@ class Compiler
|
||||||
@indent -= 1
|
@indent -= 1
|
||||||
match_word('end')
|
match_word('end')
|
||||||
asm.jmp(start_label)
|
asm.jmp(start_label)
|
||||||
asm.emit_label(end_label)
|
asm.deflabel(end_label)
|
||||||
end
|
end
|
||||||
|
|
||||||
def condition_loop(name, jump_instruction)
|
def condition_loop(name, jump_instruction)
|
||||||
|
|
@ -494,13 +495,13 @@ class Compiler
|
||||||
# s = s + x
|
# s = s + x
|
||||||
# e
|
# e
|
||||||
def for_stmt
|
def for_stmt
|
||||||
counter = get_name
|
name = get_name
|
||||||
asm.defvar(counter)
|
counter = asm.defvar(name)
|
||||||
match('=')
|
match('=')
|
||||||
boolean_expression # initial value
|
boolean_expression # initial value
|
||||||
asm.sub(EAX, 1) # pre-decrement because of the
|
asm.sub(EAX, 1) # pre-decrement because of the
|
||||||
# following pre-increment
|
# following pre-increment
|
||||||
asm.mov([asm.var(counter)], EAX) # stash the counter in memory
|
asm.mov([counter], EAX) # stash the counter in memory
|
||||||
match_word('to', :scan => true)
|
match_word('to', :scan => true)
|
||||||
boolean_expression # final value
|
boolean_expression # final value
|
||||||
skip_any_whitespace
|
skip_any_whitespace
|
||||||
|
|
@ -508,9 +509,9 @@ class Compiler
|
||||||
final = [ESP]
|
final = [ESP]
|
||||||
|
|
||||||
simple_loop('for') do |end_label|
|
simple_loop('for') do |end_label|
|
||||||
asm.mov(ECX, [asm.var(counter)]) # get the counter
|
asm.mov(ECX, [counter]) # get the counter
|
||||||
asm.add(ECX, 1) # increment
|
asm.add(ECX, 1) # increment
|
||||||
asm.mov([asm.var(counter)], ECX) # store the counter
|
asm.mov([counter], ECX) # store the counter
|
||||||
asm.cmp(final, ECX) # check if we're done
|
asm.cmp(final, ECX) # check if we're done
|
||||||
asm.jz(end_label) # if so jump to the end
|
asm.jz(end_label) # if so jump to the end
|
||||||
end
|
end
|
||||||
|
|
@ -529,7 +530,7 @@ class Compiler
|
||||||
|
|
||||||
start_label = asm.mklabel(:do)
|
start_label = asm.mklabel(:do)
|
||||||
end_label = asm.mklabel(:enddo)
|
end_label = asm.mklabel(:enddo)
|
||||||
asm.emit_label(start_label)
|
asm.deflabel(start_label)
|
||||||
|
|
||||||
asm.push(ECX)
|
asm.push(ECX)
|
||||||
|
|
||||||
|
|
@ -548,7 +549,7 @@ class Compiler
|
||||||
# always clean up the stack after.
|
# always clean up the stack after.
|
||||||
asm.sub(ESP, 4)
|
asm.sub(ESP, 4)
|
||||||
|
|
||||||
asm.emit_label(end_label)
|
asm.deflabel(end_label)
|
||||||
|
|
||||||
# If there was a break we have to clean up the stack here. If
|
# If there was a break we have to clean up the stack here. If
|
||||||
# there was no break we clean up the phony push above.
|
# there was no break we clean up the phony push above.
|
||||||
|
|
@ -573,35 +574,42 @@ class Compiler
|
||||||
|
|
||||||
# print eax in hex format
|
# print eax in hex format
|
||||||
def print_stmt
|
def print_stmt
|
||||||
# variable names
|
# variables
|
||||||
d = 'DIGITS'
|
d = '__DIGITS'
|
||||||
h = 'HEX'
|
h = '__HEX'
|
||||||
|
|
||||||
|
digits = if asm.var?(d)
|
||||||
|
asm.var(d)
|
||||||
|
else
|
||||||
|
d_var = asm.defvar(d, 4)
|
||||||
|
asm.block do
|
||||||
|
# define a lookup table of digits
|
||||||
|
mov([d_var], 0x33323130)
|
||||||
|
mov([d_var+4], 0x37363534)
|
||||||
|
mov([d_var+8], 0x62613938)
|
||||||
|
mov([d_var+12], 0x66656463)
|
||||||
|
end
|
||||||
|
d_var
|
||||||
|
end
|
||||||
|
|
||||||
|
# 3 dwords == 12 chars
|
||||||
|
hex = asm.var!(h, 3)
|
||||||
|
|
||||||
asm.block do
|
asm.block do
|
||||||
# define a lookup table of digits
|
|
||||||
unless var?(d)
|
|
||||||
defvar(d, 4)
|
|
||||||
mov([var(d)], 0x33323130)
|
|
||||||
mov([var(d)+4], 0x37363534)
|
|
||||||
mov([var(d)+8], 0x62613938)
|
|
||||||
mov([var(d)+12], 0x66656463)
|
|
||||||
end
|
|
||||||
# 3 dwords == 12 chars
|
|
||||||
defvar(h, 3) unless var?(h)
|
|
||||||
# TODO check sign and prepend '-' if negative
|
# TODO check sign and prepend '-' if negative
|
||||||
mov([var(h)], 0x7830) # "0x" == [48, 120]
|
mov([hex], 0x7830) # "0x" == [48, 120]
|
||||||
mov([var(h)+10], 0xa) # newline + null terminator
|
mov([hex+10], 0xa) # newline + null terminator
|
||||||
end
|
end
|
||||||
boolean_expression
|
boolean_expression
|
||||||
asm.block do
|
asm.block do
|
||||||
# convert eax to a hex string
|
# convert eax to a hex string
|
||||||
lea(ESI, [var(d)])
|
lea(ESI, [digits])
|
||||||
lea(EDI, [var(h)+9])
|
lea(EDI, [hex+9])
|
||||||
# build the string backwards (right to left), byte by byte
|
# build the string backwards (right to left), byte by byte
|
||||||
mov(ECX, 4)
|
mov(ECX, 4)
|
||||||
end
|
end
|
||||||
asm.emit_label(loop_label=asm.mklabel)
|
|
||||||
asm.block do
|
asm.block do
|
||||||
|
deflabel(loop_label=mklabel)
|
||||||
# low nybble of nth byte
|
# low nybble of nth byte
|
||||||
movzx(EBX, AL)
|
movzx(EBX, AL)
|
||||||
and_(BL, 0x0f) # isolate low nybble
|
and_(BL, 0x0f) # isolate low nybble
|
||||||
|
|
@ -619,7 +627,7 @@ class Compiler
|
||||||
loop_(loop_label)
|
loop_(loop_label)
|
||||||
# write(int fd, char *s, int n)
|
# write(int fd, char *s, int n)
|
||||||
mov(EAX, 4) # SYS_write
|
mov(EAX, 4) # SYS_write
|
||||||
lea(ECX, [var(h)]) # ecx = &s
|
lea(ECX, [hex]) # ecx = &s
|
||||||
args = [1, # fd = 1 (STDOUT)
|
args = [1, # fd = 1 (STDOUT)
|
||||||
ECX, # s = &s
|
ECX, # s = &s
|
||||||
11] # n = 11 (excluding term, max # of chars to print)
|
11] # n = 11 (excluding term, max # of chars to print)
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue